# Grade NBA Predictions

### Module Imports and Settings

In [111]:
from bs4 import BeautifulSoup as Soup
from os import path
from sys import exit
import requests
from pandas import DataFrame
import pandas as pd
import numpy as np
from time import sleep
from IPython.core.interactiveshell import InteractiveShell

In [85]:
pd.options.display.max_columns = None
InteractiveShell.ast_node_interactivity = "all"

In [130]:
DATA_DIR = 'C:\\Users\\Harry\\Documents\\LTCWFF\\ltcwff_files\\data\\daily_predictions'

teams = {'Atlanta': 'ATL', 'Boston': 'BOS', 'Brooklyn': 'BRK', 'Charlotte': 'CHO', 'Chicago': 'CHI', 'Cleveland': 'CLE', 'Dallas': 'DAL', 'Denver': 'DEN', 'Detroit': 'DET', 'Golden State': 'GSW', 'Houston': 'HOU', 'Indiana': 'IND', 'LA': 'LAC', 'L.A. Lakers': 'LAL', 'LA Lakers': 'LAL', 'LA Clippers': 'LAC', 'L.A. Clippers': 'LAC', 'Memphis': 'MEM', 'Miami': 'MIA', 'Milwaukee': 'MIL', 'Minnesota': 'MIN', 'New Orleans': 'NOP', 'New York': 'NYK', 'Oklahoma City': 'OKC', 'Orlando': 'ORL', 'Philadelphia': 'PHI', 'Phoenix': 'PHO', 'Portland': 'POR', 'Sacramento': 'SAC', 'San Antonio': 'SAS', 'Toronto': 'TOR', 'Utah': 'UTA', 'Washington': 'WAS'}

### Helper Functions

In [87]:
def get_url_from_date(month, day, year, prefix = 'https://www.basketball-reference.com/boxscores'):
    return f'{prefix}/?month={month}&day={day}&year={year}'

In [88]:
def get_soup(month, day, year):
    url = get_url_from_date(month, day, year)
    print(url)
    response = requests.get(url)
    if not 200 <= response.status_code < 300:
        exit('Invalid Date')
    return Soup(response.content, 'html.parser')

In [89]:
def parse_row(row):
    result = [ x.string if x.find('a') == None else x.find('a').string for x in row.find_all('td') ]
    return result

In [90]:
def table_to_df(table, overheader = 0):
    cols = table.find('thead').find_all('tr')[overheader].find_all('th')
    cols = [ col.string if col.string != None else '' for col in cols ]
    
    stat_table = table.find('tbody')
        
    rows = stat_table.find_all('tr')
    
    headers = [ row.find('th').string for row in rows if row.find('th') != None ]
    
    list_of_parsed_rows = [ parse_row(row) for row in rows[0:len(rows)] ]
    list_of_parsed_rows = [ row for row in list_of_parsed_rows if row != [] ]
    list_of_parsed_rows
    
    df = DataFrame(list_of_parsed_rows)
    if len(headers) != 0:
        df.insert(0, '', headers)
    df.columns = cols
    
    return df

In [132]:
def compress_table(predictions, results):
    rows = []
    
    i = 0
    while i < len(predictions.index):
        j = int(np.where(results['Team'] == predictions.loc[predictions.index[i], 'Team'])[0][0])
        
        row = {}
        row['Home Team'] = teams[predictions.loc[predictions.index[i], 'Team']]
        row['Away Team'] = teams[predictions.loc[predictions.index[i + 1], 'Team']]
        row['Home T'] = results.loc[results.index[j], 'T']
        row['Away T'] = results.loc[results.index[j + 1], 'T']
        row['Home ML'] = predictions.loc[predictions.index[i], 'ML']
        row['Away ML'] = predictions.loc[predictions.index[i + 1], 'ML']
        row['ML Prediction'] = predictions.loc[predictions.index[i], 'ML Prediction']
        row['Home Spread'] = predictions.loc[predictions.index[i], 'Spread']
        row['Home Spread Odds'] = predictions.loc[predictions.index[i], 'Spread Odds']
        row['Away Spread'] = predictions.loc[predictions.index[i + 1], 'Spread']
        row['Away Spread Odds'] = predictions.loc[predictions.index[i + 1], 'Spread Odds']
        row['Spread Prediction'] = predictions.loc[predictions.index[i], 'Spread Prediction']
        row['Over'] = predictions.loc[predictions.index[i], 'Over']
        row['Over Odds'] = predictions.loc[predictions.index[i], 'Over Odds']
        row['Under'] = predictions.loc[predictions.index[i], 'Under']
        row['Under Odds'] = predictions.loc[predictions.index[i], 'Under Odds']
        row['Total Prediction'] = predictions.loc[predictions.index[i], 'Total Prediction']

        rows.append(row)

        i += 2

    df = pd.DataFrame(rows)
    return df

### Get Games Dataframe

In [91]:
def get_all_games(month, day, year):
    soup = get_soup(month, day, year)

    boxscores = soup.find_all('div', {'class': 'game_summary expanded nohover'})

    if boxscores != None:
        tables = [ table_to_df(boxscore.find_all('table')[1]) for boxscore in boxscores ]

    rows = []

    for table in tables:
        row_1 = {'Date': f'{year}{month}{day}', 'Team': table.iloc[0, 0], '1': table.iloc[0, 1], '2': table.iloc[0, 2], '3': table.iloc[0, 3], '4': table.iloc[0, 4], 'T': int(table.iloc[0, 1]) + int(table.iloc[0, 2]) + int(table.iloc[0, 3]) + int(table.iloc[0, 4])}
        row_2 = {'Date': f'{year}{month}{day}', 'Team': table.iloc[1, 0], '1': table.iloc[1, 1], '2': table.iloc[1, 2], '3': table.iloc[1, 3], '4': table.iloc[1, 4], 'T': int(table.iloc[1, 1]) + int(table.iloc[1, 2]) + int(table.iloc[1, 3]) + int(table.iloc[1, 4])}
        rows.append(row_1)
        rows.append(row_2)

    return rows

In [92]:
date = input("Date to grade (YYYYMMDD format)")
year = date[0:4]
month = date[4:6]
day = date[6:8]

In [109]:
rows = get_all_games(month, day, year)
results = pd.DataFrame(rows)
results = results.drop(results.columns[0], axis = 1)
results

https://www.basketball-reference.com/boxscores/?month=01&day=31&year=2021


Unnamed: 0,Team,1,2,3,4,T
0,Utah,29,25,37,26,117
1,Denver,43,36,24,25,128
2,Philadelphia,18,36,28,37,119
3,Indiana,28,35,32,15,110
4,Cleveland,35,26,25,18,104
5,Minnesota,30,25,33,21,109
6,LA Clippers,29,37,35,28,129
7,New York,31,34,26,24,115
8,Orlando,15,29,29,29,102
9,Toronto,27,31,26,31,115


TypeError: compress_table() missing 1 required positional argument: 'results'

In [100]:
try:
    predictions = pd.read_csv(path.join(DATA_DIR, f"{date}.csv"))
except:
    print()

predictions = predictions.drop([predictions.columns[0], predictions.columns[1]], axis = 1)
predictions

Unnamed: 0,Team,Spread,Spread Odds,ML,Over,Over Odds,Under,Under Odds,Spread Prediction,Total Prediction,ML Prediction
0,Utah,-3.0,-102,-140,221.0,-103,221.0,-107,UTA,Under,UTA
1,Denver,3.0,-108,130,221.0,-103,221.0,-107,UTA,Under,UTA
2,Philadelphia,-1.5,-109,-118,223.0,-107,223.0,-103,PHI,Under,PHI
3,Indiana,1.5,-101,108,223.0,-107,223.0,-103,PHI,Under,PHI
4,Orlando,5.5,-103,200,215.5,-105,215.5,-105,TOR,Over,TOR
5,Toronto,-5.5,-107,-220,215.5,-105,215.5,-105,TOR,Over,TOR
6,Brooklyn,-6.0,-104,-250,245.5,-105,245.5,-105,BRK,Over,BRK
7,Washington,6.0,-106,230,245.5,-105,245.5,-105,BRK,Over,BRK
8,Cleveland,-3.0,-105,-143,218.0,-103,218.0,-107,CLE,Over,CLE
9,Minnesota,3.0,-105,133,218.0,-103,218.0,-107,CLE,Over,CLE


In [142]:
compressed = compress_table(predictions, results)

for ind in compressed.index:
    compressed.loc[ind, 'ML Prediction'] = f"{compressed.loc[ind, 'ML Prediction']} - Correct" if (compressed.loc[ind, 'ML Prediction'] == compressed.loc[ind, 'Home Team'] and compressed.loc[ind, 'Home T'] > compressed.loc[ind, 'Away T']) or (compressed.loc[ind, 'ML Prediction'] == compressed.loc[ind, 'Away Team'] and compressed.loc[ind, 'Away T'] > compressed.loc[ind, 'Home T']) else compressed.loc[ind, 'ML Prediction']
    compressed.loc[ind, 'Spread Prediction'] = f"{compressed.loc[ind, 'Spread Prediction']} - Correct" if (compressed.loc[ind, 'Spread Prediction'] == compressed.loc[ind, 'Home Team'] and compressed.loc[ind, 'Home T'] - compressed.loc[ind, 'Away T'] > -compressed.loc[ind, 'Home Spread']) or (compressed.loc[ind, 'Spread Prediction'] == compressed.loc[ind, 'Away Team'] and compressed.loc[ind, 'Away T'] - compressed.loc[ind, 'Home T'] > -compressed.loc[ind, 'Away Spread']) else compressed.loc[ind, 'Spread Prediction']
    compressed.loc[ind, 'Total Prediction'] = f"{compressed.loc[ind, 'Total Prediction']} - Correct" if (compressed.loc[ind, 'Total Prediction'] == 'Over' and compressed.loc[ind, 'Home T'] + compressed.loc[ind, 'Away T'] > compressed.loc[ind, 'Over']) or (compressed.loc[ind, 'Total Prediction'] == 'Under' and compressed.loc[ind, 'Home T'] + compressed.loc[ind, 'Away T'] < compressed.loc[ind, 'Under']) else compressed.loc[ind, 'Total Prediction']

prediction_cols = ['ML Prediction', 'Spread Prediction', 'Total Prediction']
compressed.style.apply(lambda x: ["background: green" if "Correct" in v else "" for v in x], axis = 1, subset = prediction_cols)

Unnamed: 0,Home Team,Away Team,Home T,Away T,Home ML,Away ML,ML Prediction,Home Spread,Home Spread Odds,Away Spread,Away Spread Odds,Spread Prediction,Over,Over Odds,Under,Under Odds,Total Prediction
0,UTA,DEN,117,128,-140,130,UTA,-3.0,-102,3.0,-108,UTA,221.0,-103,221.0,-107,Under
1,PHI,IND,119,110,-118,108,PHI - Correct,-1.5,-109,1.5,-101,PHI - Correct,223.0,-107,223.0,-103,Under
2,ORL,TOR,102,115,200,-220,TOR - Correct,5.5,-103,-5.5,-107,TOR - Correct,215.5,-105,215.5,-105,Over - Correct
3,BRK,WAS,146,149,-250,230,BRK,-6.0,-104,6.0,-106,BRK,245.5,-105,245.5,-105,Over - Correct
4,CLE,MIN,104,109,-143,133,CLE,-3.0,-105,3.0,-105,CLE,218.0,-103,218.0,-107,Over
