In [231]:
import pandas as pd
import numpy as np

In [232]:
df = pd.read_csv('clean_df.csv')

In [233]:
df.head()

Unnamed: 0.1,Unnamed: 0,Rk,Player,Age,Pos,Date,Tm,Location,Opp,Result,...,PF,PTS,GmSc,scoring_rate,3p_scoring_rate,assist_rate,orb_rate,drb_rate,steal_rate,block_rate
0,0,1,Devin Booker\bookede01,20.397,G,2017-03-24,PHO,Away,BOS,L,...,3,70,54.5,1.555556,0.088889,0.133333,0.044444,0.133333,0.066667,0.022222
1,1,2,Carmelo Anthony\anthoca01,29.658,F,2014-01-24,NYK,Home,CHA,W,...,1,62,50.6,1.589744,0.153846,0.0,0.025641,0.307692,0.0,0.0
2,2,3,LeBron James\jamesle01,29.173,F-G,2014-03-03,MIA,Home,CHA,W,...,2,61,48.8,1.487805,0.195122,0.097561,0.073171,0.097561,0.0,0.0
3,3,4,Kobe Bryant\bryanko01,37.641,G-F,2016-04-13,LAL,Home,UTA,W,...,1,60,36.3,1.428571,0.142857,0.095238,0.0,0.095238,0.02381,0.02381
4,4,5,James Harden\hardeja01,28.43,G,2018-01-30,HOU,Home,ORL,W,...,2,60,56.6,1.304348,0.108696,0.23913,0.043478,0.173913,0.086957,0.021739


In [234]:
def get_player_position_dict(team):
    
    
    # This function collects all the players names of the team, and inputs it into
    # A dictionary, along with each player prospective position.

    
    home_team_lineup = df.loc[(df['Tm'] == team) & (df['Date'] >= '2017-10-17 00:00:00')][['Player','Pos']]
    home_team_lineup = home_team_lineup.drop_duplicates().sort_values(by = 'Player')
    home_team_lineup_list = list(home_team_lineup['Player'])
    home_team_position_list = list(home_team_lineup['Pos'])
    team_player_position_dict = dict(zip(home_team_lineup_list, home_team_position_list ))
    
    return team_player_position_dict


In [236]:
player_dict = get_player_position_dict('GSW')
player_dict

{'Andre Iguodala\\iguodan01': 'G-F',
 'Chris Boucher\\bouchch01': 'F',
 'Damian Jones\\jonesda03': 'F-C',
 'David West\\westda01': 'F-C',
 'Draymond Green\\greendr01': 'F',
 'JaVale McGee\\mcgeeja01': 'C',
 'Jordan Bell\\belljo01': 'F',
 'Kevin Durant\\duranke01': 'F-G',
 'Kevon Looney\\looneke01': 'F-C',
 'Klay Thompson\\thompkl01': 'G-F',
 'Nick Young\\youngni01': 'G-F',
 'Omri Casspi\\casspom01': 'F',
 'Patrick McCaw\\mccawpa01': 'G',
 'Quinn Cook\\cookqu01': 'G',
 'Shaun Livingston\\livinsh01': 'G',
 'Stephen Curry\\curryst01': 'G',
 'Zaza Pachulia\\pachuza01': 'C'}

In [238]:
def score_sim(mp_mean, scoring_rate_mean, mp_std, scoring_rate_std, away_PTS_conceded_mean, away_PTS_conceded_std, ns): 
    
    score_list = []
    
    for i in range(ns):
        # How long an individual home team player plays at home
        home_minutes_played = np.random.normal(mp_mean,
                                          mp_std)

        # How effective they are at scoring points at home
        home_player_scoring_effectiveness =  np.random.normal(scoring_rate_mean, scoring_rate_std)

    # How often does the team that is away concede when they are away, broken down by location
    # Note here location is also technically 'home' because we are looking at them as an Opp

        away_positional_conceding = np.random.normal(away_PTS_conceded_mean, away_PTS_conceded_std)


        # Force negatives to be 0
        parameters = [home_minutes_played, home_player_scoring_effectiveness, away_positional_conceding]
        parameters = [0 if x<= 0 else x for x in parameters]

        # Convert to integer as we cannot score 0.5 points
        try:
            home_player_score = int((((parameters[0] * parameters[1]) + parameters[2])/2))
        except:
            home_player_score = 'NA'
        
        score_list.append(home_player_score)
    

    return score_list

In [241]:
def convert_to_line(score):
    
    score.sort()
    freq = {x:score.count(x) for x in unordered_list}
    values_list = list()
    for val, count in freq.items():
        for count in range(count):
            values_list.append(val)

    median_computed = statistics.median(values_list)
    return median_computed

In [242]:
def simulation(df, home_team, away_team, ns):
    
#     Calculate lines for home team
    home_player_dict = get_player_position_dict(home_team)
    
    home_points_df = df.loc[(df['Tm'] ==home_team) & (df['Location'] == 'Home')].groupby('Player').mean()
    home_points_df.reset_index(inplace = True)
    home_points_df = home_points_df[['Player','MP', 'scoring_rate']]
    home_points_df.rename(columns = {'MP': 'MP_mean', 'scoring_rate': 'scoring_rate_mean'}, inplace = True)
    
    home_pts_std_df = df.loc[(df['Tm'] == home_team) & (df['Location'] == 'Home')].groupby('Player').std()
    home_pts_std_df.reset_index(inplace = True)
    home_pts_std_df = home_pts_std_df[['Player','MP', 'scoring_rate']]
    home_pts_std_df.rename(columns = {'MP': 'MP_std', 'scoring_rate': 'scoring_rate_std'}, inplace = True)
    
    home2_points_df = pd.merge(home_points_df, home_pts_std_df, on='Player')
    home2_points_df['Pos'] = home2_points_df['Player'].map(home_player_dict)
    home2_points_df.dropna(inplace = True)
    
    
    away_concedeing_df = df.loc[(df['Opp'] == away_team) & (df['Location'] == 'Home')].groupby('Pos').mean().reset_index()
    away_concedeing_df = away_concedeing_df[['Pos', 'PTS']]
    away_concedeing_df.columns = ['Pos', 'away_PTS_conceded_mean']
    combined1_df = pd.merge(home2_points_df, away_concedeing_df, on = 'Pos')
    
    away_concedeing_df_std = df.loc[(df['Opp'] == away_team) & (df['Location'] == 'Home')].groupby('Pos').std().reset_index()
    away_concedeing_df_std = away_concedeing_df_std[['Pos', 'PTS']]
    away_concedeing_df_std.columns = ['Pos', 'away_PTS_conceded_std']
    home_df = pd.merge(combined1_df, away_concedeing_df_std, on = 'Pos')
    
    home_df['scores'] = home_df.apply(lambda x: score_sim(x['MP_mean'],x['scoring_rate_mean'], x['MP_std'], 
                                                         x['scoring_rate_std'], x['away_PTS_conceded_mean'], x['away_PTS_conceded_std'], ns),axis=1)
    
    home_df['score_line'] = home_df.apply(lambda x: convert_to_line(x['scores']),axis=1)
    
#     Calculate lines for away_team
    
    away_player_dict = get_player_position_dict(away_team)
    
    away_points_df = df.loc[(df['Tm'] ==away_team) & (df['Location'] == 'Away')].groupby('Player').mean()
    away_points_df.reset_index(inplace = True)
    away_points_df = away_points_df[['Player','MP', 'scoring_rate']]
    away_points_df.rename(columns = {'MP': 'MP_mean', 'scoring_rate': 'scoring_rate_mean'}, inplace = True)
    
    away_points_std_df = df.loc[(df['Tm'] ==away_team) & (df['Location'] == 'Away')].groupby('Player').std()
    away_points_std_df.reset_index(inplace = True)
    away_points_std_df = away_points_std_df[['Player','MP', 'scoring_rate']]
    away_points_std_df.rename(columns = {'MP': 'MP_std', 'scoring_rate': 'scoring_rate_std'}, inplace = True)
    
    away2_points_df = pd.merge(away_points_df, away_points_std_df, on='Player')
    away2_points_df['Pos'] = away2_points_df['Player'].map(away_player_dict)
    away2_points_df.dropna(inplace = True)
    
    
    home__conceding_df = df.loc[(df['Opp'] == home_team) & (df['Location'] == 'Away')].groupby('Pos').mean().reset_index()
    home__conceding_df = home__conceding_df[['Pos', 'PTS']]
    home__conceding_df.columns = ['Pos', 'home_PTS_conceded_mean']
    combined2_df = pd.merge(away2_points_df, home__conceding_df, on = 'Pos')
    
    home_conceding_df_std = df.loc[(df['Opp'] == home_team) & (df['Location'] == 'Away')].groupby('Pos').std().reset_index()
    home_conceding_df_std = home_conceding_df_std[['Pos', 'PTS']]
    home_conceding_df_std.columns = ['Pos', 'home_PTS_conceded_std']
    away_df = pd.merge(combined2_df, home_conceding_df_std, on = 'Pos')
    
    away_df['scores'] = away_df.apply(lambda x: score_sim(x['MP_mean'],x['scoring_rate_mean'], x['MP_std'], x['scoring_rate_std'], x['home_PTS_conceded_mean'], x['home_PTS_conceded_std'], ns),axis=1)
    
    away_df['score_line'] = away_df.apply(lambda x: convert_to_line(x['scores']),axis=1)
    
    return home_df[['Player', 'score_line']], away_df[['Player', 'score_line']]
      

In [245]:
%%time
eg1 = simulation(df,'GSW', 'TOR', 10000)

CPU times: user 37.8 s, sys: 294 ms, total: 38.1 s
Wall time: 42.6 s


In [246]:
eg1[0]

Unnamed: 0,Player,score_line
0,Andre Iguodala\iguodan01,8.0
1,Klay Thompson\thompkl01,15.0
2,Nick Young\youngni01,8.0
3,Damian Jones\jonesda03,4.0
4,David West\westda01,6.0
5,Kevon Looney\looneke01,5.0
6,Draymond Green\greendr01,9.0
7,Jordan Bell\belljo01,7.0
8,Omri Casspi\casspom01,7.0
9,JaVale McGee\mcgeeja01,7.0
