In [1]:
#initial match predictor for NBA games

In [None]:
#lets start with individual player metrics. 
from sqlalchemy import create_engine, text
import pandas as pd

#create our engine for creating sql entries - this wont work right now 
from dotenv import load_dotenv
import os

load_dotenv()

DATABASE_URL = os.getenv("DATABASE_URL")

engine = create_engine(DATABASE_URL)

team_name = 'Atlanta Hawks'

#retreive players we want stats for 
#take those players stats from the advanced table
with engine.connect() as conn:
    with conn.begin():
        queries = {
            'hawks_roster_estimates': """
            	select t."TEAM_NAME", p.*
            	from "all_teams_misc_stats" t
            	join "all_players_estimated_stats" p
            	on t."TEAM_ID" = p."TEAM_ID"
            	where t."TEAM_NAME" = :team_name
            """,
            'hawks_roster_advanced': """
            	select t."TEAM_NAME", p.*
                from "all_teams_misc_stats" t
                join "all_players_advanced_stats" p
                on t."TEAM_ID" = p."TEAM_ID"
                where t."TEAM_NAME" = :team_name
            """
        }
        #starters
        dfs = {name: pd.read_sql_query(text(query), engine, params={'team_name': team_name})
              for name, query in queries.items()}
        #players with over 25 minutes of game time
        dfs['all_star_player'] = dfs['hawks_roster_advanced'][
            (dfs['hawks_roster_advanced']['MIN'] > 25)]
        
        #rotation players 
        dfs['key_rotation_player'] = dfs['hawks_roster_advanced'][
            (dfs['hawks_roster_advanced']['MIN'] > 15) & 
            (dfs['hawks_roster_advanced']['MIN'] < 25)]


#get key stats for players we want 
player_weights = {
    'PIE' : 0.075,
    'USG' : 0.075,
    'PlusMinus' : .1,
    'TrueShooting' : .15,
    'TurnoverRate' : .15,
    'RecentGames' : .1,
    'MinutesConsistency' : .1
}

#get high impact stats - player impact rating, net rating, usg pct, plus/minus adjsuted for minutes
#get efficiency stats - true shooting, turnover rate 
#get availability - least important


#stats for all star players
dfs['all_star_estimates'] = pd.merge(
    dfs['hawks_roster_estimates'],
    dfs['all_star_player'][['PLAYER_ID','PLAYER_NAME', 'PIE', 'TS_PCT', 'MIN', 'NET_RATING', 'GP']]#these are the columns we are adding from all_star_player
)[['PLAYER_NAME', 'PIE', 'E_NET_RATING', 'E_USG_PCT', 'TS_PCT', 'E_TOV_PCT', 'MIN', 'NET_RATING', 'GP']] #these are the columns we are keeping from the final dataframe

#stats for key rotation players
dfs['key_rotation_estimates'] = pd.merge(
    dfs['hawks_roster_estimates'],
    dfs['key_rotation_player'][['PLAYER_ID', 'PLAYER_NAME', 'PIE', 'TS_PCT', 'MIN', 'NET_RATING', 'GP']]
)[['PLAYER_NAME', 'PIE', 'E_NET_RATING', 'E_USG_PCT', 'TS_PCT', 'E_TOV_PCT', 'MIN', 'NET_RATING', 'GP']]

#apply weights to key points 
#star players 1x weight, key rotation .75 weight 
#bench .5 weight 

In [148]:
pd.set_option('display.max_columns', 90)
dfs['hawks_roster_estimates']

print(dfs['hawks_roster_estimates'].shape)
print(dfs['hawks_roster_estimates'].columns)

print(dfs['all_star_player'].shape)
print(dfs['all_star_player'].columns)

(17, 35)
Index(['TEAM_NAME', 'index', 'PLAYER_ID', 'PLAYER_NAME', 'GP', 'W', 'L',
       'W_PCT', 'MIN', 'E_OFF_RATING', 'E_DEF_RATING', 'E_NET_RATING',
       'E_AST_RATIO', 'E_OREB_PCT', 'E_DREB_PCT', 'E_REB_PCT', 'E_TOV_PCT',
       'E_USG_PCT', 'E_PACE', 'GP_RANK', 'W_RANK', 'L_RANK', 'W_PCT_RANK',
       'MIN_RANK', 'E_OFF_RATING_RANK', 'E_DEF_RATING_RANK',
       'E_NET_RATING_RANK', 'E_AST_RATIO_RANK', 'E_OREB_PCT_RANK',
       'E_DREB_PCT_RANK', 'E_REB_PCT_RANK', 'E_TOV_PCT_RANK', 'E_USG_PCT_RANK',
       'E_PACE_RANK', 'TEAM_ID'],
      dtype='object')
(4, 80)
Index(['TEAM_NAME', 'index', 'PLAYER_ID', 'PLAYER_NAME', 'NICKNAME', 'TEAM_ID',
       'TEAM_ABBREVIATION', 'AGE', 'GP', 'W', 'L', 'W_PCT', 'MIN',
       'E_OFF_RATING', 'OFF_RATING', 'sp_work_OFF_RATING', 'E_DEF_RATING',
       'DEF_RATING', 'sp_work_DEF_RATING', 'E_NET_RATING', 'NET_RATING',
       'sp_work_NET_RATING', 'AST_PCT', 'AST_TO', 'AST_RATIO', 'OREB_PCT',
       'DREB_PCT', 'REB_PCT', 'TM_TOV_PCT', 'E_TOV_PCT

In [150]:
pd.set_option('display.max_columns', 90)
dfs['all_star_player']

Unnamed: 0,TEAM_NAME,index,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,E_OFF_RATING,OFF_RATING,sp_work_OFF_RATING,E_DEF_RATING,DEF_RATING,sp_work_DEF_RATING,E_NET_RATING,NET_RATING,sp_work_NET_RATING,AST_PCT,AST_TO,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,E_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,sp_work_PACE,PIE,POSS,FGM,FGA,FGM_PG,FGA_PG,FG_PCT,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,E_OFF_RATING_RANK,OFF_RATING_RANK,sp_work_OFF_RATING_RANK,E_DEF_RATING_RANK,DEF_RATING_RANK,sp_work_DEF_RATING_RANK,E_NET_RATING_RANK,NET_RATING_RANK,sp_work_NET_RATING_RANK,AST_PCT_RANK,AST_TO_RANK,AST_RATIO_RANK,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,E_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,USG_PCT_RANK,E_USG_PCT_RANK,E_PACE_RANK,PACE_RANK,sp_work_PACE_RANK,PIE_RANK,FGM_RANK,FGA_RANK,FGM_PG_RANK,FGA_PG_RANK,FG_PCT_RANK
4,Atlanta Hawks,158,1630700,Dyson Daniels,Dyson,1610612737,ATL,21.0,50,24,26,0.48,34.0,110.4,111.3,111.3,113.4,114.6,114.6,-2.9,-3.3,-3.3,0.16,2.16,21.1,0.046,0.113,0.078,9.8,9.7,0.52,0.531,0.174,0.178,105.59,104.62,87.19,104.62,0.091,3695,290,612,5.8,12.2,0.474,82,134,434,290,39,229,213,213,391,412,412,314,317,317,192,154,171,211,295,277,269,270,288,363,248,252,54,107,107,244,64,60,87,90,178
7,Atlanta Hawks,223,1630552,Jalen Johnson,Jalen,1610612737,ATL,23.0,36,18,18,0.5,35.7,111.3,111.8,111.8,110.1,110.2,110.2,1.2,1.6,1.6,0.203,1.71,20.5,0.046,0.216,0.132,12.0,12.0,0.541,0.569,0.214,0.22,106.98,106.68,88.9,106.68,0.135,2858,271,542,7.5,15.1,0.5,265,227,280,249,17,207,200,200,244,239,239,211,195,195,115,266,178,212,50,92,394,391,219,237,141,142,26,43,43,59,71,81,43,53,121
12,Atlanta Hawks,407,1630168,Onyeka Okongwu,Onyeka,1610612737,ATL,24.0,48,22,26,0.458,25.5,111.7,112.2,112.2,112.6,113.2,113.2,-0.9,-1.0,-1.0,0.12,2.06,16.4,0.105,0.204,0.153,8.0,8.0,0.599,0.628,0.166,0.17,106.16,105.65,88.04,105.65,0.123,2694,232,405,4.8,8.4,0.573,114,172,434,314,165,193,186,186,348,356,356,258,258,258,275,181,293,47,67,48,135,136,89,74,277,276,42,71,71,95,98,141,120,158,49
14,Atlanta Hawks,489,1629027,Trae Young,Trae,1610612737,ATL,26.0,51,24,27,0.471,36.2,112.1,113.1,113.1,112.1,112.9,112.9,-0.1,0.2,0.2,0.452,2.38,30.8,0.013,0.072,0.042,12.9,12.9,0.489,0.561,0.281,0.288,107.08,106.21,88.51,106.21,0.124,4091,373,917,7.3,18.0,0.407,64,134,447,297,12,182,158,158,325,347,347,242,228,228,2,122,33,475,478,503,430,429,395,257,35,30,25,54,54,90,33,11,48,25,396


In [152]:
pd.set_option('display.max_columns', 90)
dfs['key_rotation_player']

Unnamed: 0,TEAM_NAME,index,PLAYER_ID,PLAYER_NAME,NICKNAME,TEAM_ID,TEAM_ABBREVIATION,AGE,GP,W,L,W_PCT,MIN,E_OFF_RATING,OFF_RATING,sp_work_OFF_RATING,E_DEF_RATING,DEF_RATING,sp_work_DEF_RATING,E_NET_RATING,NET_RATING,sp_work_NET_RATING,AST_PCT,AST_TO,AST_RATIO,OREB_PCT,DREB_PCT,REB_PCT,TM_TOV_PCT,E_TOV_PCT,EFG_PCT,TS_PCT,USG_PCT,E_USG_PCT,E_PACE,PACE,PACE_PER40,sp_work_PACE,PIE,POSS,FGM,FGA,FGM_PG,FGA_PG,FG_PCT,GP_RANK,W_RANK,L_RANK,W_PCT_RANK,MIN_RANK,E_OFF_RATING_RANK,OFF_RATING_RANK,sp_work_OFF_RATING_RANK,E_DEF_RATING_RANK,DEF_RATING_RANK,sp_work_DEF_RATING_RANK,E_NET_RATING_RANK,NET_RATING_RANK,sp_work_NET_RATING_RANK,AST_PCT_RANK,AST_TO_RANK,AST_RATIO_RANK,OREB_PCT_RANK,DREB_PCT_RANK,REB_PCT_RANK,TM_TOV_PCT_RANK,E_TOV_PCT_RANK,EFG_PCT_RANK,TS_PCT_RANK,USG_PCT_RANK,E_USG_PCT_RANK,E_PACE_RANK,PACE_RANK,sp_work_PACE_RANK,PIE_RANK,FGM_RANK,FGA_RANK,FGM_PG_RANK,FGA_PG_RANK,FG_PCT_RANK
0,Atlanta Hawks,77,1627747,Caris LeVert,Caris,1610612737,ATL,30.0,41,35,6,0.854,24.1,121.9,122.0,122.0,110.1,111.1,111.1,11.8,10.9,10.9,0.191,2.81,26.0,0.028,0.095,0.063,9.2,9.3,0.561,0.588,0.179,0.183,103.54,103.03,85.86,103.03,0.092,2125,151,328,3.7,8.0,0.46,210,20,96,21,193,18,18,18,246,275,275,44,45,45,134,71,80,347,373,376,234,237,168,175,233,240,114,153,153,236,186,184,177,181,223
1,Atlanta Hawks,88,203991,Clint Capela,Clint,1610612737,ATL,30.0,45,22,23,0.489,22.4,107.3,108.1,108.1,109.3,109.6,109.6,-2.0,-1.5,-1.5,0.074,1.23,11.6,0.13,0.242,0.184,9.5,9.5,0.573,0.577,0.16,0.164,106.08,105.56,87.97,105.56,0.12,2218,189,330,4.2,7.3,0.573,161,172,383,282,217,340,311,311,216,217,217,288,274,274,416,388,426,16,35,20,250,250,130,209,302,308,45,75,75,102,139,182,152,210,50
2,Atlanta Hawks,105,1631342,Daeqwon Plowden,Daeqwon,1610612737,ATL,26.0,2,1,1,0.5,16.4,96.7,97.2,97.2,84.5,80.6,80.6,12.2,16.7,16.7,0.0,0.0,0.0,0.075,0.057,0.067,8.3,8.4,1.1,1.103,0.15,0.15,103.43,105.63,88.03,105.63,0.18,72,8,10,4.0,5.0,0.8,516,501,8,249,314,493,485,485,11,15,15,41,19,19,513,502,513,100,511,352,159,166,2,2,347,374,123,72,72,9,461,484,161,303,5
5,Atlanta Hawks,171,1629726,Garrison Mathews,Garrison,1610612737,ATL,28.0,43,18,25,0.419,17.7,108.0,107.3,107.3,111.5,111.2,111.2,-3.5,-3.9,-3.9,0.104,1.93,16.0,0.016,0.082,0.048,8.3,8.2,0.553,0.601,0.154,0.162,105.42,105.9,88.25,105.9,0.067,1684,92,236,2.1,5.5,0.39,188,227,410,349,287,312,337,337,302,284,284,324,335,335,321,212,306,453,440,476,157,154,188,128,327,318,58,62,62,398,276,245,319,278,428
6,Atlanta Hawks,175,1627777,Georges Niang,Georges,1610612737,ATL,31.0,54,43,11,0.796,21.1,119.0,118.4,118.4,109.1,110.1,110.1,9.9,8.3,8.3,0.087,1.48,14.2,0.032,0.135,0.085,9.6,9.6,0.608,0.619,0.164,0.168,101.82,101.64,84.7,101.64,0.076,2423,182,381,3.4,7.1,0.478,10,4,164,45,231,41,50,50,209,237,237,58,76,76,371,340,364,314,221,244,257,260,77,95,284,287,232,234,234,343,145,156,206,222,167
8,Atlanta Hawks,306,1630811,Keaton Wallace,Keaton,1610612737,ATL,25.0,24,10,14,0.417,17.3,106.5,104.9,104.9,109.0,109.0,109.0,-2.6,-4.1,-4.1,0.186,1.96,26.1,0.011,0.067,0.04,13.3,13.3,0.516,0.54,0.153,0.16,102.69,103.46,86.22,103.46,0.069,898,52,123,2.2,5.1,0.423,363,347,207,351,296,361,395,395,207,201,201,307,344,344,140,207,79,482,493,511,439,439,304,324,329,326,156,138,138,387,334,328,310,296,351
10,Atlanta Hawks,341,1626204,Larry Nance Jr.,Larry,1610612737,ATL,32.0,24,10,14,0.417,19.3,107.7,107.5,107.5,116.1,116.5,116.5,-8.4,-9.0,-9.0,0.125,2.38,17.7,0.046,0.174,0.108,7.4,7.5,0.626,0.632,0.155,0.16,104.86,104.76,87.3,104.76,0.106,1009,80,155,3.3,6.5,0.516,363,347,207,351,262,327,330,330,464,468,468,436,447,447,267,123,252,205,117,161,110,114,50,70,317,325,73,103,103,162,287,305,210,247,100
11,Atlanta Hawks,386,1631243,Mouhamed Gueye,Mouhamed,1610612737,ATL,22.0,8,2,6,0.25,15.2,99.2,98.8,98.8,96.8,95.3,95.3,2.5,3.5,3.5,0.064,2.5,7.8,0.053,0.2,0.124,3.1,3.1,0.407,0.44,0.205,0.214,100.77,101.75,84.79,101.75,0.114,257,20,54,2.5,6.8,0.37,470,483,96,451,330,483,474,474,32,32,32,181,154,154,465,103,488,177,73,108,27,27,474,473,159,157,332,227,227,120,418,405,274,235,449
13,Atlanta Hawks,479,1629611,Terance Mann,Terance,1610612737,ATL,28.0,40,23,17,0.575,19.7,107.4,107.8,107.8,112.7,112.7,112.7,-5.3,-4.9,-4.9,0.117,2.5,20.6,0.044,0.103,0.073,8.2,8.2,0.517,0.535,0.133,0.136,100.18,99.99,83.32,99.99,0.065,1636,93,208,2.3,5.2,0.447,221,150,265,169,254,337,322,322,351,335,335,367,368,368,285,103,177,222,334,307,153,155,301,346,419,425,374,363,363,410,272,260,296,294,263
15,Atlanta Hawks,518,1630249,Vít Krejčí,Vít,1610612737,ATL,24.0,39,18,21,0.462,20.8,106.1,106.6,106.6,110.2,110.1,110.1,-4.1,-3.5,-3.5,0.181,3.0,28.5,0.017,0.113,0.064,9.5,9.5,0.577,0.587,0.135,0.137,103.52,103.31,86.09,103.31,0.077,1737,96,215,2.5,5.5,0.447,230,227,348,306,237,373,365,365,253,236,236,342,323,323,149,48,46,443,292,370,252,252,122,177,414,420,115,144,144,337,263,255,279,277,267


In [154]:
pd.set_option('display.max_columns', 90)
dfs['all_star_estimates']

Unnamed: 0,PLAYER_NAME,PIE,E_NET_RATING,E_USG_PCT,TS_PCT,E_TOV_PCT,MIN,NET_RATING,GP
0,Trae Young,0.124,-0.1,0.288,0.561,12.924,36.2,0.2,51
1,Jalen Johnson,0.135,1.2,0.22,0.569,11.985,35.7,1.6,36
2,Onyeka Okongwu,0.123,-0.9,0.17,0.628,7.981,25.5,-1.0,48
3,Dyson Daniels,0.091,-2.9,0.178,0.531,9.742,34.0,-3.3,50


In [156]:
pd.set_option('display.max_columns', 90)
dfs['key_rotation_estimates']

Unnamed: 0,PLAYER_NAME,PIE,E_NET_RATING,E_USG_PCT,TS_PCT,E_TOV_PCT,MIN,NET_RATING,GP
0,Clint Capela,0.12,-2.0,0.164,0.577,9.454,22.4,-1.5,45
1,Garrison Mathews,0.067,-3.5,0.162,0.601,8.182,17.7,-3.9,43
2,Larry Nance Jr.,0.106,-8.4,0.16,0.632,7.452,19.3,-9.0,24
3,Vít Krejčí,0.077,-4.1,0.137,0.587,9.506,20.8,-3.5,39
4,Daeqwon Plowden,0.18,12.2,0.15,1.103,8.418,16.4,16.7,2
5,Keaton Wallace,0.069,-2.6,0.16,0.54,13.28,17.3,-4.1,24
6,Mouhamed Gueye,0.114,2.5,0.214,0.44,3.079,15.2,3.5,8
7,Zaccharie Risacher,0.063,-3.7,0.203,0.526,8.073,23.8,-3.7,48


In [None]:
#lets start with individual player metrics. 
from sqlalchemy import create_engine, text
import pandas as pd

#create our engine for creating sql entries - this wont work right now 
from dotenv import load_dotenv
import os

load_dotenv()

DATABASE_URL = os.getenv("DATABASE_URL")

engine = create_engine(DATABASE_URL)

team_name = 'Atlanta Hawks'

#retreive players we want stats for 
#take those players stats from the advanced table
with engine.connect() as conn:
    with conn.begin():
        queries = {
            'hawks_roster_estimates': """
            	SELECT 
                	t."TEAM_NAME",
                	p.*,
                	c."CLUTCH_SCORE_PCT",
                	c."CLUTCH_USAGE_RATE",
                	c."CLUTCH_NET_RATING",
                	m."GP" as TOTAL_GAMES,
                	m."MIN" as TOTAL_MINUTES,
                	m."PTS" as AVG_POINTS,
                	m."AST" as AVG_ASSISTS
                FROM "all_teams_misc_stats" t
                JOIN "all_players_estimated_stats" p
                	ON t."TEAM_ID" = p."TEAM_ID"
                LEFT JOIN "player_clutch_stats" c
                	ON p."PLAYER_ID" = c."PLAYER_ID"
                LEFT JOIN "all_players_misc_stats" m
                	ON p."PLAYER_ID" = m."PLAYER_ID"
                WHERE t."TEAM_NAME" = :team_name

            """,
            'hawks_roster_advanced': """
            	SELECT 
                	t."TEAM_NAME",
                	p.*,
                	c."CLUTCH_SCORE_PCT",
                	c."CLUTCH_USAGE_RATE",
                	c."CLUTCH_NET_RATING",
                	m."GP" as TOTAL_GAMES,
                	m."MIN" as TOTAL_MINUTES
                FROM "all_teams_misc_stats" t
                JOIN "all_players_advanced_stats" p
                	ON t."TEAM_ID" = p."TEAM_ID"
                LEFT JOIN "player_clutch_stats" c
                	ON p."PLAYER_ID" = c."PLAYER_ID"
                LEFT JOIN "all_players_misc_stats" m
                	ON p."PLAYER_ID" = m."PLAYER_ID"
                WHERE t."TEAM_NAME" = :team_name
            """
        }
        #starters
        dfs = {name: pd.read_sql_query(text(query), engine, params={'team_name': team_name})
              for name, query in queries.items()}
        #players with over 25 minutes of game time
        dfs['all_star_player'] = dfs['hawks_roster_advanced'][
            (dfs['hawks_roster_advanced']['MIN'] > 25)]
        
        #rotation players 
        dfs['key_rotation_player'] = dfs['hawks_roster_advanced'][
            (dfs['hawks_roster_advanced']['MIN'] > 15) & 
            (dfs['hawks_roster_advanced']['MIN'] < 25)]

#get high impact stats - player impact rating, net rating, usg pct, plus/minus adjsuted for minutes
#get efficiency stats - true shooting, turnover rate 
#get availability - least important

#stats for all star players
dfs['all_star_estimates'] = pd.merge(
    dfs['hawks_roster_estimates'],
    dfs['all_star_player'][['PLAYER_ID','PLAYER_NAME', 'PIE', 'TS_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']]#these are the columns we are adding from all_star_player
)[['PLAYER_ID', 'PLAYER_NAME', 'PIE', 'E_NET_RATING', 'E_USG_PCT', 'TS_PCT', 'E_TOV_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']] #these are the columns we are keeping from the final dataframe

#stats for key rotation players
dfs['key_rotation_estimates'] = pd.merge(
    dfs['hawks_roster_estimates'],
    dfs['key_rotation_player'][['PLAYER_ID', 'PLAYER_NAME', 'PIE', 'TS_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']]
)[['PLAYER_ID', 'PLAYER_NAME', 'PIE', 'E_NET_RATING', 'E_USG_PCT', 'TS_PCT', 'E_TOV_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']]

#apply weights to key points 
#star players 1x weight, key rotation .75 weight 
#bench .5 weight 

In [None]:
#lets start with individual player metrics. 
from sqlalchemy import create_engine, text
import pandas as pd
from dotenv import load_dotenv
import os

load_dotenv()

star_players = dfs['all_star_estimates']
key_rotation_players = dfs['key_rotation_estimates']

def calculate_recent_scores(player_data):
    player_id = player_data['PLAYER_ID']
    name = player_data['PLAYER_NAME']
        
    #create our engine for creating sql entries - this wont work right now 


    DATABASE_URL = os.getenv("DATABASE_URL")

    engine = create_engine(DATABASE_URL)
        
    with engine.connect() as conn:
        with conn.begin():
            query10 = text("""
                with recent_games as (
                	select *,
                		ROW_NUMBER() OVER (
                			PARTITION BY "PLAYER_ID"
                			ORDER BY "GAME_DATE" DESC
                		) as game_number
                	from "all_player_game_stats"
                )
                select
                	"PLAYER_ID",
                    AVG("PTS") as "AVG_PTS",
                    AVG("PLUS_MINUS") as "AVG_PLUS_MINUS",
                    AVG("MIN") as "AVG_MIN",
                    AVG("FGA") as "AVG_FGA", 
                    AVG("FTA") as "AVG_FTA",
                    AVG("TOV") as "AVG_TOV",
                    AVG("OREB") as "AVG_OREB",
                    COUNT(*) as "GAMES_PLAYED"
                from recent_games
                where "PLAYER_ID" = :player_id
                and game_number <= 10
                GROUP BY "PLAYER_ID"
            """)
            query5 = text("""
                with recent_games as (
                	select *,
                		ROW_NUMBER() OVER (
                			PARTITION BY "PLAYER_ID"
                			ORDER BY "GAME_DATE" DESC
                		) as game_number
                	from "all_player_game_stats"
                )
                select
                	"PLAYER_ID",
                    AVG("PTS") as "AVG_PTS",
                    AVG("PLUS_MINUS") as "AVG_PLUS_MINUS",
                    AVG("MIN") as "AVG_MIN",
                    AVG("FGA") as "AVG_FGA", 
                    AVG("FTA") as "AVG_FTA",
                    AVG("TOV") as "AVG_TOV",
                    AVG("OREB") as "AVG_OREB",
                    COUNT(*) as "GAMES_PLAYED"
                from recent_games
                where "PLAYER_ID" = :player_id
                and game_number <= 5
                GROUP BY "PLAYER_ID"
            """)
            recent_games = pd.read_sql_query(query10, engine, params={'player_id': player_id})
            more_recent_games = pd.read_sql_query(query5, engine, params={'player_id': player_id})

    #now that we have the recent games for this player, calculate net rating
    possessions = 0.96 * recent_games['AVG_FGA'].values[0] + 0.44 * recent_games['AVG_FTA'].values[0] - recent_games['AVG_OREB'].values[0] + recent_games['AVG_TOV'].values[0]

    offensive_rating = (recent_games['AVG_PTS'].values[0] / possessions) 
    defensive_rating = (recent_games['AVG_PTS'].values[0] - recent_games['AVG_PLUS_MINUS'].values[0]) / possessions
    net_ratingTen = offensive_rating - defensive_rating

    #-----------------------------
    possessionsFive = 0.96 * more_recent_games['AVG_FGA'].values[0] + 0.44 * more_recent_games['AVG_FTA'].values[0] - more_recent_games['AVG_OREB'].values[0] + more_recent_games['AVG_TOV'].values[0]

    offensive_ratingFive = (more_recent_games['AVG_PTS'].values[0] / possessions) 
    defensive_ratingFive = (more_recent_games['AVG_PTS'].values[0] - more_recent_games['AVG_PLUS_MINUS'].values[0]) / possessions
    net_ratingFive= offensive_rating - defensive_rating

    #calculate true shooting pct
    true_shootingTen = recent_games['AVG_PTS'].values[0] /  (2 * (recent_games['AVG_FGA'].values[0] + 0.44 * recent_games['AVG_FTA'].values[0]))
    #-----------------------------
    true_shootingFive = more_recent_games['AVG_PTS'].values[0] /  (2 * (more_recent_games['AVG_FGA'].values[0] + 0.44 * more_recent_games['AVG_FTA'].values[0]))
    
    #calculate usage rate
    usage_rateTen = ((recent_games['AVG_FGA'].values[0] + 0.44 * recent_games['AVG_FTA'].values[0] + recent_games['AVG_TOV'].values[0]) / recent_games['AVG_MIN'].values[0])
    #-----------------------------
    usage_rateFive = ((more_recent_games['AVG_FGA'].values[0] + 0.44 * more_recent_games['AVG_FTA'].values[0] + more_recent_games['AVG_TOV'].values[0]) / more_recent_games['AVG_MIN'].values[0])

    #calculate turnover rate
    turnover_rateTen = (recent_games['AVG_TOV'].values[0] / (recent_games['AVG_FGA'].values[0] + 0.44 * recent_games['AVG_FTA'].values[0] + recent_games['AVG_TOV'].values[0]))
    #-----------------------------
    turnover_rateFive = (more_recent_games['AVG_TOV'].values[0] / (more_recent_games['AVG_FGA'].values[0] + 0.44 * more_recent_games['AVG_FTA'].values[0] + more_recent_games['AVG_TOV'].values[0]))

    #calculate minutes per game
    minutes_per_gameTen = recent_games['AVG_MIN'].values[0] / recent_games['GAMES_PLAYED'].values[0]
    #-----------------------------
    minutes_per_gameFive = more_recent_games['AVG_MIN'].values[0] / more_recent_games['GAMES_PLAYED'].values[0]

    # print(net_ratingTen)
    # print(true_shootingTen)
    # print(usage_rateTen)
    # print(turnover_rateTen)
    # print(minutes_per_gameTen)
    # print()
    recentFormTen = (0.4 * net_ratingTen) + (0.2 * true_shootingTen) + (0.15 * usage_rateTen) + (0.1 * turnover_rateTen) + (0.15 * minutes_per_gameTen)

    # print(net_ratingFive)
    # print(true_shootingFive)
    # print(usage_rateFive)
    # print(turnover_rateFive)
    # print(minutes_per_gameFive)
    # print()
    recentFormFive = (0.4 * net_ratingFive) + (0.2 * true_shootingFive) + (0.15 * usage_rateFive) + (0.1 * turnover_rateFive) + (0.15 * minutes_per_gameFive)

    finalScore = (0.6 * recentFormTen) + (0.4 * recentFormFive)
    # print(finalScore)
    # print()
    return finalScore

# for index, player in star_players.iterrows():
#     calculate_recent_scores(player)

In [106]:
import math
#now we want to create functions that calculate the final scores for each playerb based on their respective stats 

star_players = dfs['all_star_estimates']
key_rotation_players = dfs['key_rotation_estimates']

# player_weights = {
#     'PIE' : 0.075,
#     'USG' : 0.075,
#     'PlusMinus' : .1,
#     'TrueShooting' : .15,
#     'TurnoverRate' : .15,
#     'Availability' : .1,
#     'MinutesConsistency' : .1
# }
def calculate_player_metrics(star_players, key_rotation_players):
    primary_weights = {
        'PIE' : 0.175,
        'USG' : 0.125,
        'NetRating' : .1
    }
    
    secondary_weights = {
        'TrueShooting' : .125,
        'TurnoverRate' : .05,
        'Availability' : .1,
        'Mins' : .15 #this can be adjusted to chnage to minutes consistency 
    }
    clutch_weights = {
        'CLUTCH_SCORE_PCT' : .5,
        'CLUTCH_USAGE_RATE' : .25,
        'CLUTCH_NET_RATING' : .25
    }

    def calculate_primary_scores(player_data):
        score = 0
        score += float(player_data['PIE']) * primary_weights['PIE']
        score += float(player_data['E_USG_PCT']) * primary_weights['USG']
        
        #adjust the net rating based on how many games have been played
        netRating = player_data['NET_RATING'] * primary_weights['NetRating']
        GP = player_data['GP']
        a = 0.75
        #this is the adjusted net rating based on number of games played
        #note that when we make this for a full team performance, we need to get the number of games the team has played
        netRating = netRating * (GP/54) ** a
        score += netRating
        return score
    
    def calculate_secondary_scores(player_data):
        score = 0
        score += float(player_data['TS_PCT']) * secondary_weights['TrueShooting']
        score += float(player_data['E_TOV_PCT']) * secondary_weights['TurnoverRate']
        #player minutes should be split between trends in recent games and season averages
        score += player_data['MIN'] * secondary_weights['Mins'] #in the future we want to make thier average just part of it
        
        # score += availability * secondary_weights['AVAILABILITY']
        return score

    def calculate_clutch_scores(player_data):
        score = 0
        #get raw stats
        scorePct = float(player_data['CLUTCH_SCORE_PCT']) * clutch_weights['CLUTCH_SCORE_PCT']
        usgRate = float(player_data['CLUTCH_USAGE_RATE']) * clutch_weights['CLUTCH_USAGE_RATE']
        netRating = float(player_data['CLUTCH_NET_RATING']) * clutch_weights['CLUTCH_NET_RATING']
        #setup adjustment constants 
        GP = player_data['GP']
        a = 0.75
        #scale scores to avoid small sample outliers 
        score += scorePct * (GP/54) ** a
        score += usgRate * (GP/54) ** a
        score += netRating * (GP/54) ** a

        if math.isnan(score):
            return 0
        else:
            return score
    
    def calculate_player_scores(players_df, baseWeight = 0.5, clutchWeight = 0.3, recentWeight = 0.2):
        scores = []
        for index, player in players_df.iterrows():
            
            
            primaryScore = calculate_primary_scores(player)
            secondaryScore = calculate_secondary_scores(player)
            clutchScore = calculate_clutch_scores(player)
            recentScore = calculate_recent_scores(player)
    
            final_score = (primaryScore + secondaryScore) * baseWeight
            final_score += clutchScore * clutchWeight
            final_score += recentScore * recentWeight
    
            scores.append({
                'PLAYER_NAME': player['PLAYER_NAME'],
                'PLAYER_ID': player['PLAYER_ID'],
                'SCORE': final_score,
                'PRIMARY_CONTRIBUTION': primaryScore * baseWeight,
                'SECONDARY_CONTRIBUTION': secondaryScore * baseWeight,
                'CLUTCH_CONTRIBUTION': clutchScore * clutchWeight,
                'RECENT_CONTRIBUTION': recentScore * recentWeight,
                'CATEGORY': 'Star' if player['MIN'] > 25 else 'Rotation'
            })
    
        return pd.DataFrame(scores)
            
    
    starScores = calculate_player_scores(star_players)
    rotationScores = calculate_player_scores(key_rotation_players)
    
    allScores = pd.concat([starScores, rotationScores])
    allScores = allScores.sort_values('SCORE', ascending=False)
    
    return allScores


calculate_player_metrics(star_players, key_rotation_players)

returning 0


Unnamed: 0,PLAYER_NAME,PLAYER_ID,SCORE,PRIMARY_CONTRIBUTION,SECONDARY_CONTRIBUTION,CLUTCH_CONTRIBUTION,RECENT_CONTRIBUTION,CATEGORY
3,Trae Young,1629027,3.435605,0.029037,3.072388,0.133121,0.201059,Star
1,Jalen Johnson,1630552,3.379366,0.084586,3.012688,0.081522,0.200571,Star
0,Dyson Daniels,1630700,2.917876,-0.143716,2.820912,0.097912,0.142768,Star
2,Onyeka Okongwu,1630168,2.337869,-0.025098,2.161987,0.095976,0.105005,Star
0,Clint Capela,203991,2.15632,-0.05929,1.94055,0.120384,0.154676,Rotation
6,Zaccharie Risacher,1642258,2.04788,-0.135054,2.018,0.04615,0.118784,Rotation
7,Daeqwon Plowden,1631342,2.015155,0.095621,1.509387,0.0,0.410146,Rotation
5,Vít Krejčí,1630249,1.773502,-0.121801,1.834338,0.058212,0.002754,Rotation
2,Keaton Wallace,1630811,1.656173,-0.09555,1.66325,0.037015,0.051459,Rotation
3,Larry Nance Jr.,1626204,1.535203,-0.225674,1.6733,0.065852,0.021724,Rotation


In [51]:
star_players = dfs['all_star_estimates']
curr = star_players.iloc[0]

score = 0
score += float(curr['PIE']) * .1
print(score)
curr

0.012400000000000001


PLAYER_NAME     Trae Young
PIE                  0.124
E_NET_RATING          -0.1
E_USG_PCT            0.288
TS_PCT               0.561
E_TOV_PCT           12.924
Name: 0, dtype: object

In [None]:
#lets start with individual player metrics. 
from sqlalchemy import create_engine, text
import pandas as pd

#create our engine for creating sql entries - this wont work right now 
from dotenv import load_dotenv
import os

load_dotenv()

DATABASE_URL = os.getenv("DATABASE_URL")

engine = create_engine(DATABASE_URL)

team_name = 'Atlanta Hawks'

#retreive players we want stats for 
#take those players stats from the advanced table
with engine.connect() as conn:
    with conn.begin():
        queries = {
            'hawks_roster_estimates': """
            	SELECT 
                	t."TEAM_NAME",
                	p.*,
                	c."CLUTCH_SCORE_PCT",
                	c."CLUTCH_USAGE_RATE",
                	c."CLUTCH_NET_RATING",
                	m."GP" as TOTAL_GAMES,
                	m."MIN" as TOTAL_MINUTES,
                	m."PTS" as AVG_POINTS,
                	m."AST" as AVG_ASSISTS
                FROM "all_teams_misc_stats" t
                JOIN "all_players_estimated_stats" p
                	ON t."TEAM_ID" = p."TEAM_ID"
                LEFT JOIN "player_clutch_stats" c
                	ON p."PLAYER_ID" = c."PLAYER_ID"
                LEFT JOIN "all_players_misc_stats" m
                	ON p."PLAYER_ID" = m."PLAYER_ID"
                WHERE t."TEAM_NAME" = :team_name

            """,
            'hawks_roster_advanced': """
            	SELECT 
                	t."TEAM_NAME",
                	p.*,
                	c."CLUTCH_SCORE_PCT",
                	c."CLUTCH_USAGE_RATE",
                	c."CLUTCH_NET_RATING",
                	m."GP" as TOTAL_GAMES,
                	m."MIN" as TOTAL_MINUTES
                FROM "all_teams_misc_stats" t
                JOIN "all_players_advanced_stats" p
                	ON t."TEAM_ID" = p."TEAM_ID"
                LEFT JOIN "player_clutch_stats" c
                	ON p."PLAYER_ID" = c."PLAYER_ID"
                LEFT JOIN "all_players_misc_stats" m
                	ON p."PLAYER_ID" = m."PLAYER_ID"
                WHERE t."TEAM_NAME" = :team_name
            """
        }
        #starters
        dfs = {name: pd.read_sql_query(text(query), engine, params={'team_name': team_name})
              for name, query in queries.items()}
        #players with over 25 minutes of game time
        dfs['all_star_player'] = dfs['hawks_roster_advanced'][
            (dfs['hawks_roster_advanced']['MIN'] > 25)]
        
        #rotation players 
        dfs['key_rotation_player'] = dfs['hawks_roster_advanced'][
            (dfs['hawks_roster_advanced']['MIN'] > 15) & 
            (dfs['hawks_roster_advanced']['MIN'] < 25)]

#get high impact stats - player impact rating, net rating, usg pct, plus/minus adjsuted for minutes
#get efficiency stats - true shooting, turnover rate 
#get availability - least important

#stats for all star players
dfs['all_star_estimates'] = pd.merge(
    dfs['hawks_roster_estimates'],
    dfs['all_star_player'][['PLAYER_ID','PLAYER_NAME', 'PIE', 'TS_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']]#these are the columns we are adding from all_star_player
)[['PLAYER_NAME', 'PIE', 'E_NET_RATING', 'E_USG_PCT', 'TS_PCT', 'E_TOV_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']] #these are the columns we are keeping from the final dataframe

#stats for key rotation players
dfs['key_rotation_estimates'] = pd.merge(
    dfs['hawks_roster_estimates'],
    dfs['key_rotation_player'][['PLAYER_ID', 'PLAYER_NAME', 'PIE', 'TS_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']]
)[['PLAYER_NAME', 'PIE', 'E_NET_RATING', 'E_USG_PCT', 'TS_PCT', 'E_TOV_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']]

#apply weights to key points 
#star players 1x weight, key rotation .75 weight 
#bench .5 weight 

In [14]:
#now we want to create functions that calculate the final scores for each playerb based on their respective stats 

star_players = dfs['all_star_estimates']
key_rotation_players = dfs['key_rotation_estimates']

In [16]:
star_players

Unnamed: 0,PLAYER_NAME,PIE,E_NET_RATING,E_USG_PCT,TS_PCT,E_TOV_PCT,MIN,NET_RATING,GP,CLUTCH_SCORE_PCT,CLUTCH_USAGE_RATE,CLUTCH_NET_RATING
0,Dyson Daniels,0.091,-3.2,0.179,0.525,9.524,34.0,-3.4,51,0.5,0.338537,0.024138
1,Jalen Johnson,0.135,1.2,0.22,0.569,11.985,35.7,1.6,36,0.5,0.390909,0.082353
2,Onyeka Okongwu,0.123,-1.0,0.17,0.627,8.112,25.6,-1.0,49,0.5,0.309744,0.066667
3,Trae Young,0.124,-0.4,0.291,0.561,12.893,36.2,0.0,52,0.392857,1.005714,0.034483


In [18]:
key_rotation_players

Unnamed: 0,PLAYER_NAME,PIE,E_NET_RATING,E_USG_PCT,TS_PCT,E_TOV_PCT,MIN,NET_RATING,GP,CLUTCH_SCORE_PCT,CLUTCH_USAGE_RATE,CLUTCH_NET_RATING
0,Clint Capela,0.118,-2.5,0.163,0.57,9.297,22.3,-1.8,46,0.714286,0.388333,-0.006667
1,Garrison Mathews,0.067,-3.5,0.162,0.601,8.182,17.7,-3.9,43,0.0,0.184444,0.154545
2,Keaton Wallace,0.069,-2.6,0.16,0.54,13.28,17.3,-4.1,24,0.2,0.666667,-0.16
3,Larry Nance Jr.,0.106,-8.4,0.16,0.632,7.452,19.3,-9.0,24,0.722222,0.418605,-0.25
4,Mouhamed Gueye,0.1,0.2,0.195,0.442,2.987,15.1,1.7,9,1.0,0.522222,2.0
5,Vít Krejčí,0.077,-4.1,0.137,0.587,9.506,20.8,-3.5,39,0.333333,0.232381,0.091667
6,Zaccharie Risacher,0.064,-3.4,0.204,0.522,8.015,23.8,-3.3,49,0.125,0.536842,-0.125
7,Daeqwon Plowden,0.18,12.2,0.15,1.103,8.418,16.4,16.7,2,,,


In [None]:
from dotenv import load_dotenv
import os

load_dotenv()


def calculate_recent_scores(player_data):
    player_id = player_data['PLAYER_ID']
    name = player_data['PLAYER_NAME']
        
    #create our engine for creating sql entries - this wont work right now 
    DATABASE_URL = os.getenv("DATABASE_URL")

    engine = create_engine(DATABASE_URL)
        
    with engine.connect() as conn:
        with conn.begin():
            query10 = text("""
                with recent_games as (
                	select *,
                		ROW_NUMBER() OVER (
                			PARTITION BY "PLAYER_ID"
                			ORDER BY "GAME_DATE" DESC
                		) as game_number
                	from "all_player_game_stats"
                )
                select
                	"PLAYER_ID",
                    AVG("PTS") as "AVG_PTS",
                    AVG("PLUS_MINUS") as "AVG_PLUS_MINUS",
                    AVG("MIN") as "AVG_MIN",
                    AVG("FGA") as "AVG_FGA", 
                    AVG("FTA") as "AVG_FTA",
                    AVG("TOV") as "AVG_TOV",
                    AVG("OREB") as "AVG_OREB",
                    COUNT(*) as "GAMES_PLAYED"
                from recent_games
                where "PLAYER_ID" = :player_id
                and game_number <= 10
                GROUP BY "PLAYER_ID"
            """)
            query5 = text("""
                with recent_games as (
                	select *,
                		ROW_NUMBER() OVER (
                			PARTITION BY "PLAYER_ID"
                			ORDER BY "GAME_DATE" DESC
                		) as game_number
                	from "all_player_game_stats"
                )
                select
                	"PLAYER_ID",
                    AVG("PTS") as "AVG_PTS",
                    AVG("PLUS_MINUS") as "AVG_PLUS_MINUS",
                    AVG("MIN") as "AVG_MIN",
                    AVG("FGA") as "AVG_FGA", 
                    AVG("FTA") as "AVG_FTA",
                    AVG("TOV") as "AVG_TOV",
                    AVG("OREB") as "AVG_OREB",
                    COUNT(*) as "GAMES_PLAYED"
                from recent_games
                where "PLAYER_ID" = :player_id
                and game_number <= 5
                GROUP BY "PLAYER_ID"
            """)
            recent_games = pd.read_sql_query(query10, engine, params={'player_id': player_id})
            more_recent_games = pd.read_sql_query(query5, engine, params={'player_id': player_id})

    #now that we have the recent games for this player, calculate net rating
    possessions = 0.96 * recent_games['AVG_FGA'].values[0] + 0.44 * recent_games['AVG_FTA'].values[0] - recent_games['AVG_OREB'].values[0] + recent_games['AVG_TOV'].values[0]

    offensive_rating = (recent_games['AVG_PTS'].values[0] / possessions) 
    defensive_rating = (recent_games['AVG_PTS'].values[0] - recent_games['AVG_PLUS_MINUS'].values[0]) / possessions
    net_ratingTen = offensive_rating - defensive_rating

    #-----------------------------
    possessionsFive = 0.96 * more_recent_games['AVG_FGA'].values[0] + 0.44 * more_recent_games['AVG_FTA'].values[0] - more_recent_games['AVG_OREB'].values[0] + more_recent_games['AVG_TOV'].values[0]

    offensive_ratingFive = (more_recent_games['AVG_PTS'].values[0] / possessions) 
    defensive_ratingFive = (more_recent_games['AVG_PTS'].values[0] - more_recent_games['AVG_PLUS_MINUS'].values[0]) / possessions
    net_ratingFive= offensive_rating - defensive_rating

    #calculate true shooting pct
    true_shootingTen = recent_games['AVG_PTS'].values[0] /  (2 * (recent_games['AVG_FGA'].values[0] + 0.44 * recent_games['AVG_FTA'].values[0]))
    #-----------------------------
    true_shootingFive = more_recent_games['AVG_PTS'].values[0] /  (2 * (more_recent_games['AVG_FGA'].values[0] + 0.44 * more_recent_games['AVG_FTA'].values[0]))
    
    #calculate usage rate
    usage_rateTen = ((recent_games['AVG_FGA'].values[0] + 0.44 * recent_games['AVG_FTA'].values[0] + recent_games['AVG_TOV'].values[0]) / recent_games['AVG_MIN'].values[0])
    #-----------------------------
    usage_rateFive = ((more_recent_games['AVG_FGA'].values[0] + 0.44 * more_recent_games['AVG_FTA'].values[0] + more_recent_games['AVG_TOV'].values[0]) / more_recent_games['AVG_MIN'].values[0])

    #calculate turnover rate
    turnover_rateTen = (recent_games['AVG_TOV'].values[0] / (recent_games['AVG_FGA'].values[0] + 0.44 * recent_games['AVG_FTA'].values[0] + recent_games['AVG_TOV'].values[0]))
    #-----------------------------
    turnover_rateFive = (more_recent_games['AVG_TOV'].values[0] / (more_recent_games['AVG_FGA'].values[0] + 0.44 * more_recent_games['AVG_FTA'].values[0] + more_recent_games['AVG_TOV'].values[0]))

    #calculate minutes per game
    minutes_per_gameTen = recent_games['AVG_MIN'].values[0] / recent_games['GAMES_PLAYED'].values[0]
    #-----------------------------
    minutes_per_gameFive = more_recent_games['AVG_MIN'].values[0] / more_recent_games['GAMES_PLAYED'].values[0]

    # print(net_ratingTen)
    # print(true_shootingTen)
    # print(usage_rateTen)
    # print(turnover_rateTen)
    # print(minutes_per_gameTen)
    # print()
    recentFormTen = (0.4 * net_ratingTen) + (0.2 * true_shootingTen) + (0.15 * usage_rateTen) + (0.1 * turnover_rateTen) + (0.15 * minutes_per_gameTen)

    # print(net_ratingFive)
    # print(true_shootingFive)
    # print(usage_rateFive)
    # print(turnover_rateFive)
    # print(minutes_per_gameFive)
    # print()
    recentFormFive = (0.4 * net_ratingFive) + (0.2 * true_shootingFive) + (0.15 * usage_rateFive) + (0.1 * turnover_rateFive) + (0.15 * minutes_per_gameFive)

    finalScore = (0.6 * recentFormTen) + (0.4 * recentFormFive)
    # print(finalScore)
    # print()
    return finalScore

def calculate_player_metrics(star_players, key_rotation_players):
    primary_weights = {
        'PIE' : 0.175,
        'USG' : 0.125,
        'NetRating' : .1
    }
    
    secondary_weights = {
        'TrueShooting' : .125,
        'TurnoverRate' : .05,
        'Availability' : .1,
        'Mins' : .15 #this can be adjusted to chnage to minutes consistency 
    }
    clutch_weights = {
        'CLUTCH_SCORE_PCT' : .5,
        'CLUTCH_USAGE_RATE' : .25,
        'CLUTCH_NET_RATING' : .25
    }

    def calculate_primary_scores(player_data):
        score = 0
        score += float(player_data['PIE']) * primary_weights['PIE']
        score += float(player_data['E_USG_PCT']) * primary_weights['USG']
        
        #adjust the net rating based on how many games have been played
        netRating = player_data['NET_RATING'] * primary_weights['NetRating']
        GP = player_data['GP']
        a = 0.75
        #this is the adjusted net rating based on number of games played
        #note that when we make this for a full team performance, we need to get the number of games the team has played
        netRating = netRating * (GP/54) ** a
        score += netRating
        return score
    
    def calculate_secondary_scores(player_data):
        score = 0
        score += float(player_data['TS_PCT']) * secondary_weights['TrueShooting']
        score += float(player_data['E_TOV_PCT']) * secondary_weights['TurnoverRate']
        #player minutes should be split between trends in recent games and season averages
        score += player_data['MIN'] * secondary_weights['Mins'] #in the future we want to make thier average just part of it
        
        # score += availability * secondary_weights['AVAILABILITY']
        return score

    def calculate_clutch_scores(player_data):
        score = 0
        #get raw stats
        scorePct = float(player_data['CLUTCH_SCORE_PCT']) * clutch_weights['CLUTCH_SCORE_PCT']
        usgRate = float(player_data['CLUTCH_USAGE_RATE']) * clutch_weights['CLUTCH_USAGE_RATE']
        netRating = float(player_data['CLUTCH_NET_RATING']) * clutch_weights['CLUTCH_NET_RATING']
        #setup adjustment constants 
        GP = player_data['GP']
        a = 0.75
        #scale scores to avoid small sample outliers 
        score += scorePct * (GP/54) ** a
        score += usgRate * (GP/54) ** a
        score += netRating * (GP/54) ** a

        if math.isnan(score):
            return 0
        else:
            return score
    
    def calculate_player_scores(players_df, baseWeight = 0.5, clutchWeight = 0.3, recentWeight = 0.2):
        scores = []
        for index, player in players_df.iterrows():
            
            
            primaryScore = calculate_primary_scores(player)
            secondaryScore = calculate_secondary_scores(player)
            clutchScore = calculate_clutch_scores(player)
            recentScore = calculate_recent_scores(player)
    
            final_score = (primaryScore + secondaryScore) * baseWeight
            final_score += clutchScore * clutchWeight
            final_score += recentScore * recentWeight
    
            scores.append({
                'PLAYER_NAME': player['PLAYER_NAME'],
                'PLAYER_ID': player['PLAYER_ID'],
                'SCORE': final_score,
                'PRIMARY_CONTRIBUTION': primaryScore * baseWeight,
                'SECONDARY_CONTRIBUTION': secondaryScore * baseWeight,
                'CLUTCH_CONTRIBUTION': clutchScore * clutchWeight,
                'RECENT_CONTRIBUTION': recentScore * recentWeight,
                'CATEGORY': 'Star' if player['MIN'] > 25 else 'Rotation'
            })
    
        return pd.DataFrame(scores)
            
    
    starScores = calculate_player_scores(star_players)
    rotationScores = calculate_player_scores(key_rotation_players)
    
    allScores = pd.concat([starScores, rotationScores])
    allScores = allScores.sort_values('SCORE', ascending=False)
    
    return allScores


def get_team_player_scores(teamName):
    #create our engine for creating sql entries - this wont work right now 
    DATABASE_URL = os.getenv("DATABASE_URL")

    engine = create_engine(DATABASE_URL)
    
    team_name = teamName
    
    #retreive players we want stats for 
    #take those players stats from the advanced table
    with engine.connect() as conn:
        with conn.begin():
            queries = {
                'hawks_roster_estimates': """
                	SELECT 
                    	t."TEAM_NAME",
                    	p.*,
                    	c."CLUTCH_SCORE_PCT",
                    	c."CLUTCH_USAGE_RATE",
                    	c."CLUTCH_NET_RATING",
                    	m."GP" as TOTAL_GAMES,
                    	m."MIN" as TOTAL_MINUTES,
                    	m."PTS" as AVG_POINTS,
                    	m."AST" as AVG_ASSISTS
                    FROM "all_teams_misc_stats" t
                    JOIN "all_players_estimated_stats" p
                    	ON t."TEAM_ID" = p."TEAM_ID"
                    LEFT JOIN "player_clutch_stats" c
                    	ON p."PLAYER_ID" = c."PLAYER_ID"
                    LEFT JOIN "all_players_misc_stats" m
                    	ON p."PLAYER_ID" = m."PLAYER_ID"
                    WHERE t."TEAM_NAME" = :team_name
    
                """,
                'hawks_roster_advanced': """
                	SELECT 
                    	t."TEAM_NAME",
                    	p.*,
                    	c."CLUTCH_SCORE_PCT",
                    	c."CLUTCH_USAGE_RATE",
                    	c."CLUTCH_NET_RATING",
                    	m."GP" as TOTAL_GAMES,
                    	m."MIN" as TOTAL_MINUTES
                    FROM "all_teams_misc_stats" t
                    JOIN "all_players_advanced_stats" p
                    	ON t."TEAM_ID" = p."TEAM_ID"
                    LEFT JOIN "player_clutch_stats" c
                    	ON p."PLAYER_ID" = c."PLAYER_ID"
                    LEFT JOIN "all_players_misc_stats" m
                    	ON p."PLAYER_ID" = m."PLAYER_ID"
                    WHERE t."TEAM_NAME" = :team_name
                """
            }
            #starters
            dfs = {name: pd.read_sql_query(text(query), engine, params={'team_name': team_name})
                  for name, query in queries.items()}
            #players with over 25 minutes of game time
            dfs['all_star_player'] = dfs['hawks_roster_advanced'][
                (dfs['hawks_roster_advanced']['MIN'] > 25)]
            
            #rotation players 
            dfs['key_rotation_player'] = dfs['hawks_roster_advanced'][
                (dfs['hawks_roster_advanced']['MIN'] > 15) & 
                (dfs['hawks_roster_advanced']['MIN'] < 25)]
    
    #get high impact stats - player impact rating, net rating, usg pct, plus/minus adjsuted for minutes
    #get efficiency stats - true shooting, turnover rate 
    #get availability - least important
    
    #stats for all star players
    dfs['all_star_estimates'] = pd.merge(
        dfs['hawks_roster_estimates'],
        dfs['all_star_player'][['PLAYER_ID','PLAYER_NAME', 'PIE', 'TS_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']]#these are the columns we are adding from all_star_player
    )[['PLAYER_ID', 'PLAYER_NAME', 'PIE', 'E_NET_RATING', 'E_USG_PCT', 'TS_PCT', 'E_TOV_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']] #these are the columns we are keeping from the final dataframe
    
    #stats for key rotation players
    dfs['key_rotation_estimates'] = pd.merge(
        dfs['hawks_roster_estimates'],
        dfs['key_rotation_player'][['PLAYER_ID', 'PLAYER_NAME', 'PIE', 'TS_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']]
    )[['PLAYER_ID', 'PLAYER_NAME', 'PIE', 'E_NET_RATING', 'E_USG_PCT', 'TS_PCT', 'E_TOV_PCT', 'MIN', 'NET_RATING', 'GP', 'CLUTCH_SCORE_PCT', 'CLUTCH_USAGE_RATE', 'CLUTCH_NET_RATING']]
    
    #apply weights to key points 
    #star players 1x weight, key rotation .75 weight 
    #bench .5 weight 
    
    star_players = dfs['all_star_estimates']
    key_rotation_players = dfs['key_rotation_estimates']

    player_scores = calculate_player_metrics(star_players, key_rotation_players)
    return player_scores

In [112]:
team_one = get_team_player_scores('Brooklyn Nets')
team_one

Unnamed: 0,PLAYER_NAME,PLAYER_ID,SCORE,PRIMARY_CONTRIBUTION,SECONDARY_CONTRIBUTION,CLUTCH_CONTRIBUTION,RECENT_CONTRIBUTION,CATEGORY
0,Cam Thomas,1630560,2.88704,-0.055401,2.71035,0.051463,0.180628,Star
1,Cameron Johnson,1629661,2.677725,-0.178581,2.635437,0.071081,0.149787,Star
2,D'Angelo Russell,1626156,2.485555,0.036938,2.196862,0.08013,0.171625,Star
3,Tosan Evbuomwan,1641787,2.398218,0.016576,2.176575,0.077652,0.127414,Rotation
3,Jalen Wilson,1630592,2.222215,-0.349712,2.320425,0.093857,0.157646,Star
6,Ziaire Williams,1630533,1.966898,-0.286404,2.043837,0.067176,0.142288,Rotation
4,Nic Claxton,1629651,1.925339,-0.551195,2.277087,0.060933,0.138514,Star
0,Day'Ron Sharpe,1630549,1.902082,0.118184,1.67265,0.0,0.111248,Rotation
4,Trendon Watford,1630570,1.869615,0.045577,1.63715,0.069265,0.117623,Rotation
1,Keon Johnson,1630553,1.662542,-0.508177,2.005925,0.125892,0.038902,Rotation


In [114]:
team_two = get_team_player_scores('Philadelphia 76ers')
team_two

Unnamed: 0,PLAYER_NAME,PLAYER_ID,SCORE,PRIMARY_CONTRIBUTION,SECONDARY_CONTRIBUTION,CLUTCH_CONTRIBUTION,RECENT_CONTRIBUTION,CATEGORY
5,Tyrese Maxey,1630178,3.31154,-0.081826,3.070562,0.12309,0.199713,Star
3,Kelly Oubre Jr.,1626162,2.908398,-0.165635,2.835387,0.088639,0.150007,Star
4,Paul George,202331,2.803402,-0.115692,2.7237,0.052731,0.142663,Star
2,Joel Embiid,203954,2.713255,-0.07671,2.5893,0.053929,0.146736,Star
0,Guerschon Yabusele,1627824,2.414286,-0.073749,2.301462,0.062256,0.124315,Star
1,Jared McCain,1642272,2.295724,-0.030584,2.198112,0.037537,0.090659,Star
2,Justin Edwards,1642348,2.020297,-0.115013,2.047075,0.04834,0.039896,Rotation
1,Eric Gordon,201569,1.690928,-0.196665,1.734487,0.060318,0.092787,Rotation
0,Andre Drummond,203083,1.569077,-0.37496,1.869725,-0.001649,0.075961,Rotation
3,Kyle Lowry,200768,1.531704,-0.160407,1.66025,0.01296,0.018901,Rotation
