# Analyze Player Props

## Module Imports

In [1]:
from bs4 import BeautifulSoup as Soup
from bs4 import Comment
from sys import exit
from os import path
import requests
import pandas as pd
from time import sleep
from datetime import date
from unidecode import unidecode
from IPython.core.interactiveshell import InteractiveShell

## Global Variables and Settings

In [2]:
# Display all columns of DataFrames
pd.options.display.max_columns = None

# Print all output in a cell not just the last piece of output
InteractiveShell.ast_node_interactivity = "all"

# Location of sports data
DATA_DIR = 'C:\\Users\\Harry\\Documents\\LTCWFF\\ltcwff_files\\data\\player_props'

# Lists of columns relating to numeric stats
num_cols = ['MP', 'FG', 'FGA', 'FG%', '3P', '3PA', '3P%', 'FT', 'FTA', 'FT%', 'ORB', 'DRB', 'TRB', 'AST', 'STL', 'BLK', 'TOV', 'PF', 'PTS', 'GmSc', '+/-']

# Dictionary relating prop names to stats
stat_dict = {'Points': 'PTS', 'Assists': 'AST', 'Rebounds': 'TRB', 'Reb + Ast': 'TRB + AST', 'Made Threes': '3P', 'Pts + Ast': 'PTS + AST', 'Pts + Reb': 'PTS + TRB', 'Pts + Reb + Ast': 'PTS + TRB + AST'}

## Helper Functions

In [3]:
def get_url_from_player(player, prefix = 'https://www.basketball-reference.com/players'):
    first_name = player.split(' ')[0].lower()
    last_name = player.split(' ')[1].lower()
    num = 1
    return f'{prefix}/{last_name}[0]/{last_name[:5]}{first_name[:2]}{num:02d}.html'

In [4]:
def get_soup(player, year = '2021'):
    prefix = 'https://www.basketball-reference.com/players'
    player_name = player.split(' ')
    first_name = player_name[0].lower().replace('-', '').replace("'", '')
    adjuster = 1
    if player_name[len(player_name) - adjuster] == 'Jr.' or player_name[len(player_name) - adjuster] == 'Sr.':
        adjuster += 1
    last_name = player_name[len(player_name) - adjuster].lower().replace('-', '').replace("'", '')
    num = 1
    
    while num != 0:
        #print(num)
        url = f'{prefix}/{last_name[0]}/{last_name[:5]}{first_name[:2]}{num:02d}/gamelog/{year}'
        print(url)
        response = requests.get(url)
        if not 200 <= response.status_code < 300:
            print('Broken')
            exit('Invalid Date')
        else:
            soup = Soup(response.content, 'html.parser')
            #num = 0
            try:
                name = soup.find('h1', itemprop = 'name').find('span').string
                name = unidecode(name.split(f' {int(year) - 1}-{year[2:]} Game Log')[0])
                print(f'{name}, {player}')
                if name.lower().replace('-', ' ').replace("'", '') != player.lower().replace('-', ' ').replace("'", ''):
                    #print(f'Adding to {num}')
                    num = num + 1
                    #print(num)
                else:
                    num = 0
            except:
                num = 0
    return soup

In [5]:
def parse_row(row):
    return [ x.string for x in row.find_all('td') ]

In [6]:
def table_to_df(table, overheader = 0):
    cols = table.find('thead').find_all('tr')[overheader].find_all('th')
    cols = [ col.string if col.string != None else '' for col in cols[1:] ]
    #print(cols)
    
    stat_table = table.find('tbody')
        
    rows = stat_table.find_all('tr')
    
    #headers = [ row.find('th').string for row in rows if row.find('th') != None ]
    
    list_of_parsed_rows = [ parse_row(row) for row in rows[0:len(rows)] ]
    list_of_parsed_rows = [ row for row in list_of_parsed_rows if row != [] ]
    list_of_parsed_rows
    
    df = pd.DataFrame(list_of_parsed_rows)
    #if len(headers) != 0:
    #    df.insert(0, '', headers)
    df.columns = cols
    
    return df

In [7]:
def get_player_df(player, postseason = False):
    soup = get_soup(player)
    table = soup.find('table', id = 'pgl_basic')
    if postseason:
        comments = soup.find_all(string = lambda text: isinstance(text, Comment))
        for comment in comments:
            table = Soup(comment, 'html.parser')
            table = table.find('table', id = 'pgl_basic_playoffs')
            if table != None:
                break
    if table == None:
        return None
    df = table_to_df(table)
    
    df = df.dropna(axis = 0, subset = ['G'])
    
    df = df.drop(df.columns[0], axis = 1)
    df['Date'] = df['Date'].apply(lambda s: s.replace('-', ''))
    df = df.reset_index()
    return df

## Analyze Props

In [8]:
today = date.today()
datef = today.strftime("%Y%m%d")
print(datef)
#datef = '20210301'

20210717


In [9]:
try:
    props = pd.read_csv(path.join(DATA_DIR, f'{datef}.csv'))
except:
    os.system('python new_get_player_props.py')
    props = pd.read_csv(path.join(DATA_DIR, f'{datef}.csv'))
props = props.drop(props.columns[0], axis = 1)
players = list(props['Player'].unique())
props

Unnamed: 0,Player,Prop,Over,Over Odds,Under,Under Odds
0,Giannis Antetokounmpo,Points,33.5,-102,33.5,-120
1,Khris Middleton,Points,25.5,-104,25.5,-118
2,Jrue Holiday,Points,18.5,-116,18.5,-106
3,Brook Lopez,Points,10.5,-118,10.5,-102
4,P.J Tucker,Points,4.5,-124,4.5,102
...,...,...,...,...,...,...
77,Devin Booker,Pts + Reb + Ast,38.5,-106,38.5,-120
78,Chris Paul,Pts + Reb + Ast,34.5,-102,34.5,-125
79,Deandre Ayton,Pts + Reb + Ast,30.5,-106,30.5,-120
80,Jae Crowder,Pts + Reb + Ast,20.5,-111,20.5,-115


In [10]:
def get_player_prop_df(player, postseason = False, recent = 7):

    # Get DataFrame of player stats
    df = get_player_df(player, postseason)
    
    # If player stats were found, add them to the props table
    if not df is None:

        # Convert possible rows to numeric values
        for col in num_cols:
            try:
                df[col] = pd.to_numeric(df[col])
            except:
                continue

        # Set recent length to series length if postseason
        if postseason:
            series_opp = df.loc[len(df) - 1, 'Opp']
            series_df = df.loc[df['Opp'] == series_opp]
            recent = len(series_df)

        # Get overall and recent statistics means
        mean = df.mean()
        recent_mean = df[len(df) - recent: len(df)].mean()
        
        # Add the means to the dataframe and set the index to the date
        df = df.append(mean, ignore_index = True)
        df.loc[df.index[len(df) - 1], 'Date'] = 'Avg' if not postseason else 'Playoff Avg'
        df = df.append(recent_mean, ignore_index = True)
        df.loc[df.index[len(df) - 1], 'Date'] = 'Recent Avg' if not postseason else 'Series Avg'
        df = df.set_index('Date')

        # Subframe of props for this player
        player_props = props.loc[props['Player'] == player]

        # Append averages for each prop for this player
        for ind in player_props.index:

            # Get individual components of combo props to sum
            try:
                prop_parts = stat_dict[player_props.loc[ind, 'Prop']].split(' + ')
            except:
                continue
            
            # Set averages to 0 by default
            avg = 0
            recent_avg = 0

            # Loop through each prop part and add it to the average
            for prop_part in prop_parts:
                avg += df.loc['Avg', prop_part] if not postseason else df.loc['Playoff Avg', prop_part]
                recent_avg += df.loc['Recent Avg', prop_part] if not postseason else df.loc['Series Avg', prop_part]
            
            # Create average and differential columns
            if not postseason:
                props.loc[(props['Player'] == player) & (props['Prop'] == player_props.loc[ind, 'Prop']), 'Avg'] = avg
                props.loc[(props['Player'] == player) & (props['Prop'] == player_props.loc[ind, 'Prop']), 'Recent Avg'] = recent_avg
                over = props.loc[(props['Player'] == player) & (props['Prop'] == player_props.loc[ind, 'Prop']), 'Over']
                props.loc[(props['Player'] == player) & (props['Prop'] == player_props.loc[ind, 'Prop']), 'Avg +/-'] = avg - over
                props.loc[(props['Player'] == player) & (props['Prop'] == player_props.loc[ind, 'Prop']), 'Recent Avg +/-'] = recent_avg - over
            else:
                props.loc[(props['Player'] == player) & (props['Prop'] == player_props.loc[ind, 'Prop']), 'Playoff Avg'] = avg
                props.loc[(props['Player'] == player) & (props['Prop'] == player_props.loc[ind, 'Prop']), 'Series Avg'] = recent_avg
                over = props.loc[(props['Player'] == player) & (props['Prop'] == player_props.loc[ind, 'Prop']), 'Over']
                props.loc[(props['Player'] == player) & (props['Prop'] == player_props.loc[ind, 'Prop']), 'Playoff Avg +/-'] = avg - over
                props.loc[(props['Player'] == player) & (props['Prop'] == player_props.loc[ind, 'Prop']), 'Series Avg +/-'] = recent_avg - over

        # Print out player props and highlight extreme values
        player_props.style.apply(lambda x: [""] + [ "background: green" if v > x[0] else "background: red" for v in x[1:] ], axis = 1, subset = ['Over', 'Avg', 'Recent Avg'])

In [11]:
for player in players:
    get_player_prop_df(player)
    get_player_prop_df(player, True)

https://www.basketball-reference.com/players/a/antetgi01/gamelog/2021
Giannis Antetokounmpo, Giannis Antetokounmpo
https://www.basketball-reference.com/players/a/antetgi01/gamelog/2021
Giannis Antetokounmpo, Giannis Antetokounmpo
https://www.basketball-reference.com/players/m/middlkh01/gamelog/2021
Khris Middleton, Khris Middleton
https://www.basketball-reference.com/players/m/middlkh01/gamelog/2021
Khris Middleton, Khris Middleton
https://www.basketball-reference.com/players/h/holidjr01/gamelog/2021
Jrue Holiday, Jrue Holiday
https://www.basketball-reference.com/players/h/holidjr01/gamelog/2021
Jrue Holiday, Jrue Holiday
https://www.basketball-reference.com/players/l/lopezbr01/gamelog/2021
Brook Lopez, Brook Lopez
https://www.basketball-reference.com/players/l/lopezbr01/gamelog/2021
Brook Lopez, Brook Lopez
https://www.basketball-reference.com/players/t/tuckep.01/gamelog/2021
https://www.basketball-reference.com/players/t/tuckep.01/gamelog/2021
https://www.basketball-reference.com/pla

In [12]:
# Postseason extremes
edges = props.loc[(abs(props['Playoff Avg +/-']) > 3) | (abs(props['Series Avg +/-']) > 3)].sort_values(['Series Avg +/-', 'Player'])
edges.style.apply(lambda x: [ "background: green" if v > 3 else "background: red" if v < -3 else "" for v in x ], axis = 1, subset = ['Playoff Avg +/-', 'Series Avg +/-']) 

# Regular season extremes
'''edges = props.loc[(abs(props['Avg +/-']) > 3) | (abs(props['Recent Avg +/-']) > 3)].sort_values(['Recent Avg +/-', 'Player'])
edges.style.apply(lambda x: [ "background: green" if v > 3 else "background: red" if v < -3 else "" for v in x ], axis = 1, subset = ['Avg +/-', 'Recent Avg +/-'])'''

Unnamed: 0,Player,Prop,Over,Over Odds,Under,Under Odds,Avg,Recent Avg,Avg +/-,Recent Avg +/-,Playoff Avg,Series Avg,Playoff Avg +/-,Series Avg +/-
77,Devin Booker,Pts + Reb + Ast,38.5,-106,38.5,-120,34.059701,33.428571,-4.440299,-5.071429,37.55,35.0,-0.95,-3.5
2,Jrue Holiday,Points,18.5,-116,18.5,-106,17.728814,21.0,-0.771186,2.5,17.142857,15.25,-1.357143,-3.25
44,Jrue Holiday,Pts + Ast,26.5,-118,26.5,-108,23.779661,29.0,-2.720339,2.5,25.47619,23.25,-1.02381,-3.25
54,Jrue Holiday,Pts + Reb,24.5,-104,24.5,-122,22.271186,25.571429,-2.228814,1.071429,22.809524,21.25,-1.690476,-3.25
74,Jrue Holiday,Pts + Reb + Ast,32.5,-104,32.5,-122,28.322034,33.571429,-4.177966,1.071429,31.142857,29.25,-1.357143,-3.25
78,Chris Paul,Pts + Reb + Ast,34.5,-102,34.5,-125,29.757143,32.0,-4.742857,-2.5,31.0,32.25,-3.5,-2.25
42,Giannis Antetokounmpo,Pts + Ast,39.5,-102,39.5,-125,34.0,32.0,-5.5,-7.5,34.315789,37.75,-5.184211,-1.75
58,Chris Paul,Pts + Reb,25.5,-104,25.5,-122,20.871429,22.0,-4.628571,-3.5,22.388889,24.0,-3.111111,-1.5
0,Giannis Antetokounmpo,Points,33.5,-102,33.5,-120,28.147541,26.571429,-5.352459,-6.928571,29.052632,32.25,-4.447368,-1.25
52,Giannis Antetokounmpo,Pts + Reb,46.5,-125,46.5,-102,39.147541,36.714286,-7.352459,-9.785714,42.0,46.25,-4.5,-0.25


'edges = props.loc[(abs(props[\'Avg +/-\']) > 3) | (abs(props[\'Recent Avg +/-\']) > 3)].sort_values([\'Recent Avg +/-\', \'Player\'])\nedges.style.apply(lambda x: [ "background: green" if v > 3 else "background: red" if v < -3 else "" for v in x ], axis = 1, subset = [\'Avg +/-\', \'Recent Avg +/-\'])'

In [13]:
# Ask user for a list of players to research
interest_players = input('List of players to analyze (comma separated):')
interest_players = interest_players.split(', ')
interest_players

# Get a subtable of only relevant players
interest_df = props[props['Player'].isin(interest_players)].sort_values('Player')

# Print the relevant prop data and highlight extremes

# Postseason
interest_df.style.apply(lambda x: ["background: blue"] + [ "background: green" if v > x[0] else "background: red" for v in x[1:] ], axis = 1, subset = ['Over', 'Playoff Avg', 'Series Avg'])
interest_df.style.apply(lambda x: [ "background: green" if v > 1 else "background: red" if v < -1 else "" for v in x ], axis = 1, subset = ['Playoff Avg +/-', 'Series Avg +/-'])

# Regular Season
'''interest_df.style.apply(lambda x: [""] + [ "background: green" if v > x[0] else "background: red" for v in x[1:] ], axis = 1, subset = ['Over', 'Avg', 'Recent Avg'])
interest_df.style.apply(lambda x: [ "background: green" if v > 1 else "background: red" if v < -1 else "" for v in x ], axis = 1, subset = ['Avg +/-', 'Recent Avg +/-'])'''

['']

Unnamed: 0,Player,Prop,Over,Over Odds,Under,Under Odds,Avg,Recent Avg,Avg +/-,Recent Avg +/-,Playoff Avg,Series Avg,Playoff Avg +/-,Series Avg +/-


Unnamed: 0,Player,Prop,Over,Over Odds,Under,Under Odds,Avg,Recent Avg,Avg +/-,Recent Avg +/-,Playoff Avg,Series Avg,Playoff Avg +/-,Series Avg +/-


'interest_df.style.apply(lambda x: [""] + [ "background: green" if v > x[0] else "background: red" for v in x[1:] ], axis = 1, subset = [\'Over\', \'Avg\', \'Recent Avg\'])\ninterest_df.style.apply(lambda x: [ "background: green" if v > 1 else "background: red" if v < -1 else "" for v in x ], axis = 1, subset = [\'Avg +/-\', \'Recent Avg +/-\'])'

In [14]:
# Print all prop differences

# Postseason
props.style.apply(lambda x: [""] + [ "background: green" if v > x[0] else "background: red" for v in x[1:] ], axis = 1, subset = ['Over', 'Playoff Avg', 'Series Avg'])

# Regular Season
#props.style.apply(lambda x: [""] + [ "background: green" if v > x[0] else "background: red" for v in x[1:] ], axis = 1, subset = ['Over', 'Avg', 'Recent Avg'])

Unnamed: 0,Player,Prop,Over,Over Odds,Under,Under Odds,Avg,Recent Avg,Avg +/-,Recent Avg +/-,Playoff Avg,Series Avg,Playoff Avg +/-,Series Avg +/-
0,Giannis Antetokounmpo,Points,33.5,-102,33.5,-120,28.147541,26.571429,-5.352459,-6.928571,29.052632,32.25,-4.447368,-1.25
1,Khris Middleton,Points,25.5,-104,25.5,-118,20.367647,21.285714,-5.132353,-4.214286,23.619048,24.5,-1.880952,-1.0
2,Jrue Holiday,Points,18.5,-116,18.5,-106,17.728814,21.0,-0.771186,2.5,17.142857,15.25,-1.357143,-3.25
3,Brook Lopez,Points,10.5,-118,10.5,-102,12.271429,15.857143,1.771429,5.357143,13.285714,12.5,2.785714,2.0
4,P.J Tucker,Points,4.5,-124,4.5,102,,,,,,,,
5,Devin Booker,Points,28.5,-108,28.5,-114,25.552239,25.285714,-2.947761,-3.214286,27.1,27.5,-1.4,-1.0
6,Chris Paul,Points,20.5,-122,20.5,100,16.414286,18.285714,-4.085714,-2.214286,18.722222,21.0,-1.777778,0.5
7,Deandre Ayton,Points,15.5,-118,15.5,-104,14.449275,11.142857,-1.050725,-4.357143,15.75,14.0,0.25,-1.5
8,Jae Crowder,Points,11.5,-104,11.5,-116,10.05,10.857143,-1.45,-0.642857,10.6,11.25,-0.9,-0.25
9,Mikal Bridges,Points,10.5,-110,10.5,-110,13.472222,14.428571,2.972222,3.928571,11.2,13.0,0.7,2.5
