In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)
from sqlalchemy import create_engine

In [2]:
engine = create_engine("postgresql://postgres:postgres@127.0.0.1:5432/armagedon")

In [3]:
data_games = pd.read_sql('SELECT * FROM games WHERE status = 3',engine)
data_stats = pd.read_sql('SELECT * FROM stats',engine)

In [4]:
data_games.shape, data_stats.shape

((11134, 7), (22354, 27))

In [5]:
data_stats = pd.merge(data_stats,data_games[['id','home_team','date','season']], how='left', left_on='game_id', right_on='id')
data_stats = data_stats.drop(columns=["id"])
data_stats = data_stats.sort_values(by='date')

In [6]:
data_stats.isnull().sum()

game_id                  0
team_id                  0
fastBreakPoints       4536
pointsInPaint         4536
biggestLead           4536
secondChancePoints    7154
pointsOffTurnovers    4536
longestRun            7154
points                   0
fgm                      0
fga                      0
fgp                      0
ftm                      0
fta                      0
ftp                      0
tpm                      0
tpa                      0
tpp                      0
offReb                   0
defReb                   0
totReb                   0
assists                  0
pFouls                   0
steals                   0
turnovers                0
blocks                   0
plusMinus                0
home_team                0
date                     0
season                   0
dtype: int64

In [7]:
data_stats = data_stats.drop(columns=["fastBreakPoints","pointsInPaint","biggestLead","secondChancePoints","pointsOffTurnovers","longestRun"])

In [8]:
data_stats.head()

Unnamed: 0,game_id,team_id,points,fgm,fga,fgp,ftm,fta,ftp,tpm,tpa,tpp,offReb,defReb,totReb,assists,pFouls,steals,turnovers,blocks,plusMinus,home_team,date,season
19476,116,10,92,30,74,40.5,25,34,73.5,7,19,36.8,8,35,43,16,20,2,10,4,5.0,10,2015-10-28,2015
19481,118,15,99,32,86,37.2,26,31,83.9,9,23,39.1,8,32,40,23,30,14,13,3,-7.0,38,2015-10-28,2015
19480,118,38,106,36,80,45.0,27,39,69.2,7,18,38.9,9,41,50,19,24,8,20,2,7.0,38,2015-10-28,2015
19479,117,5,94,33,84,39.3,22,29,75.9,6,24,25.0,11,32,43,16,16,4,8,1,-10.0,20,2015-10-28,2015
19478,117,20,104,36,73,49.3,20,21,95.2,12,20,60.0,2,39,41,23,25,5,13,7,10.0,20,2015-10-28,2015


In [9]:
data_stats['home_team'] = data_stats['team_id']==data_stats['home_team']
data_stats['win'] = data_stats['plusMinus']> 0 
data_stats['win_home'] = data_stats['home_team'] & data_stats['win']
data_stats['win_away'] = (-data_stats['home_team']) & data_stats['win']

In [10]:
mask = (data_stats["team_id"]==1) & (data_stats["season"]==2022)
data_stats[mask].head(10)

Unnamed: 0,game_id,team_id,points,fgm,fga,fgp,ftm,fta,ftp,tpm,tpa,tpp,offReb,defReb,totReb,assists,pFouls,steals,turnovers,blocks,plusMinus,home_team,date,season,win,win_home,win_away
81,11054,1,117,45,90,50.0,20,24,83.3,7,25,28.0,4,34,38,30,18,12,9,5,10.0,True,2022-10-19,2022,True,True,False
111,11069,1,108,40,89,44.9,15,16,93.8,13,31,41.9,9,37,46,26,22,10,17,2,10.0,True,2022-10-21,2022,True,True,False
147,11087,1,109,39,95,41.1,23,28,82.1,8,35,22.9,16,29,45,23,21,8,12,9,-17.0,True,2022-10-23,2022,False,False,False
11,11106,1,118,45,91,68.2,20,25,80.0,8,22,36.4,10,36,46,22,20,6,12,12,5.0,False,2022-10-26,2022,True,False,True
37,11119,1,136,55,97,81.6,14,18,77.8,12,29,41.4,10,35,45,31,26,5,7,8,24.0,False,2022-10-28,2022,True,False,True
67,11134,1,115,45,94,65.5,12,15,80.0,13,29,44.8,12,30,42,19,23,5,13,2,-8.0,False,2022-10-30,2022,False,False,False
197,11148,1,109,39,84,82.5,20,25,80.0,11,29,37.9,7,30,37,28,27,1,18,4,-30.0,False,2022-10-31,2022,False,False,False
219,11159,1,112,44,107,75.0,12,16,75.0,12,34,35.3,17,34,51,28,18,11,9,4,13.0,False,2022-11-02,2022,True,False,True
266,11183,1,124,47,103,45.6,21,25,84.0,9,36,25.0,12,38,50,28,23,12,13,8,3.0,True,2022-11-05,2022,True,True,False
296,11198,1,117,46,99,46.5,14,17,82.4,11,29,37.9,12,36,48,23,18,11,12,3,19.0,True,2022-11-08,2022,True,True,False


In [11]:
data_stats['nb_games'] = data_stats.groupby(['team_id', 'season'])["game_id"].cumcount() + 1

for col in data_stats.drop(columns=["game_id","team_id","date","season",'nb_games']).columns:
    data_stats[f'{col}_cumul'] = data_stats.groupby(['team_id', 'season'])[col].cumsum()
    data_stats[f'{col}_avg'] = data_stats[f'{col}_cumul'] /data_stats['nb_games']
data_stats['win_home_avg'] = data_stats['win_home_cumul'] /data_stats['home_team_cumul']
data_stats['win_away_avg'] = data_stats['win_away_cumul'] /(data_stats['nb_games']-data_stats['home_team_cumul'])



In [12]:
mask = (data_stats["team_id"]==1) & (data_stats["season"]==2022)

In [13]:

data_stats = data_stats.sort_values(by=['team_id','season','date']).reset_index(drop=False)
data_stats['last_10_games_wins'] = data_stats.groupby(['team_id','season']).rolling(window=10, min_periods=1, on="date")['win'].sum().reset_index(drop=False)['win']


In [14]:
mask = (data_stats["team_id"]==1) & (data_stats["season"]==2022)
data_stats[mask][['season','date','win','last_10_games_wins']]

Unnamed: 0,season,date,win,last_10_games_wins
589,2022,2022-10-19,True,1.0
590,2022,2022-10-21,True,2.0
591,2022,2022-10-23,False,2.0
592,2022,2022-10-26,True,3.0
593,2022,2022-10-28,True,4.0
...,...,...,...,...
673,2022,2023-04-18,False,5.0
674,2022,2023-04-21,True,5.0
675,2022,2023-04-23,False,5.0
676,2022,2023-04-25,True,5.0


In [15]:
def streak(row):
    mask = (data_stats['team_id'] == row['team_id']) & (data_stats['season'] == row['season']) & (data_stats['date'] < row['date']) 
    df_ = data_stats[mask][['date','win']]
    df_ = df_.sort_values(by='date',ascending=False)
    val = 1 if row['win'] else -1
    for _, row_temp in df_.iterrows():
        if row['win'] != row_temp['win']:
            return val
        val += 1 if row['win'] else -1
    return val

In [16]:
data_stats['serie'] = data_stats.apply(streak, axis=1)

In [17]:
mask = (data_stats["team_id"]==1) & (data_stats["season"]==2022)
data_stats[mask][['season','date','win','serie']]

Unnamed: 0,season,date,win,serie
589,2022,2022-10-19,True,1
590,2022,2022-10-21,True,2
591,2022,2022-10-23,False,-1
592,2022,2022-10-26,True,1
593,2022,2022-10-28,True,2
...,...,...,...,...
673,2022,2023-04-18,False,-2
674,2022,2023-04-21,True,1
675,2022,2023-04-23,False,-1
676,2022,2023-04-25,True,1


In [21]:
data_stats.drop(columns='index').to_csv('stats.csv', index=False)

In [19]:
data_stats

Unnamed: 0,index,game_id,team_id,points,fgm,fga,fgp,ftm,fta,ftp,tpm,tpa,tpp,offReb,defReb,totReb,assists,pFouls,steals,turnovers,blocks,plusMinus,home_team,date,season,win,win_home,win_away,nb_games,points_cumul,points_avg,fgm_cumul,fgm_avg,fga_cumul,fga_avg,fgp_cumul,fgp_avg,ftm_cumul,ftm_avg,fta_cumul,fta_avg,ftp_cumul,ftp_avg,tpm_cumul,tpm_avg,tpa_cumul,tpa_avg,tpp_cumul,tpp_avg,offReb_cumul,offReb_avg,defReb_cumul,defReb_avg,totReb_cumul,totReb_avg,assists_cumul,assists_avg,pFouls_cumul,pFouls_avg,steals_cumul,steals_avg,turnovers_cumul,turnovers_avg,blocks_cumul,blocks_avg,plusMinus_cumul,plusMinus_avg,home_team_cumul,home_team_avg,win_cumul,win_avg,win_home_cumul,win_home_avg,win_away_cumul,win_away_avg,last_10_games_wins,serie
0,19464,110,1,94,37,82,45.1,12,15,80.0,8,27,29.6,7,33,40,22,25,9,15,4,-12.0,True,2015-10-28,2015,False,False,False,1,94,94.000000,37,37.000000,82,82.000000,45.1,45.100000,12,12.000000,15,15.000000,80.0,80.000000,8,8.000000,27,27.000000,29.6,29.600000,7,7.000000,33,33.000000,40,40.000000,22,22.000000,25,25.000000,9,9.000000,15,15.000000,4,4.000000,-12.0,-12.000000,1,1.000000,0,0.000000,0,0.000000,0,,0.0,-1
1,19501,128,1,112,42,83,50.6,18,26,69.2,10,24,41.7,7,32,39,26,18,11,15,4,11.0,False,2015-10-30,2015,True,False,True,2,206,103.000000,79,39.500000,165,82.500000,95.7,47.850000,30,15.000000,41,20.500000,149.2,74.600000,18,9.000000,51,25.500000,71.3,35.650000,14,7.000000,65,32.500000,79,39.500000,48,24.000000,43,21.500000,20,10.000000,30,15.000000,8,4.000000,-1.0,-0.500000,1,0.500000,1,0.500000,0,0.000000,1,1.000000,1.0,1
2,19514,135,1,97,36,83,43.4,17,22,77.3,8,23,34.8,8,37,45,23,17,9,15,4,3.0,True,2015-10-31,2015,True,True,False,3,303,101.000000,115,38.333333,248,82.666667,139.1,46.366667,47,15.666667,63,21.000000,226.5,75.500000,26,8.666667,74,24.666667,106.1,35.366667,22,7.333333,102,34.000000,124,41.333333,71,23.666667,60,20.000000,29,9.666667,45,15.000000,12,4.000000,2.0,0.666667,2,0.666667,2,0.666667,1,0.500000,1,1.000000,2.0,2
3,19541,148,1,94,37,88,42.0,13,14,92.9,7,29,24.1,9,39,48,22,16,9,11,6,2.0,False,2015-11-01,2015,True,False,True,4,397,99.250000,152,38.000000,336,84.000000,181.1,45.275000,60,15.000000,77,19.250000,319.4,79.850000,33,8.250000,103,25.750000,130.2,32.550000,31,7.750000,141,35.250000,172,43.000000,93,23.250000,76,19.000000,38,9.500000,56,14.000000,18,4.500000,4.0,1.000000,2,0.500000,3,0.750000,1,0.500000,2,1.000000,3.0,3
4,19573,164,1,98,37,90,41.1,17,22,77.3,7,21,33.3,16,35,51,25,14,10,15,1,6.0,False,2015-11-04,2015,True,False,True,5,495,99.000000,189,37.800000,426,85.200000,222.2,44.440000,77,15.400000,99,19.800000,396.7,79.340000,40,8.000000,124,24.800000,163.5,32.700000,47,9.400000,176,35.200000,223,44.600000,118,23.600000,90,18.000000,48,9.600000,71,14.200000,19,3.800000,10.0,2.000000,2,0.400000,4,0.800000,1,0.500000,3,1.000000,4.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22349,22242,13368,41,105,40,90,44.4,13,18,72.2,12,40,30.0,4,36,40,29,22,6,9,2,-9.0,True,2024-02-25,2023,False,False,False,59,6735,114.152542,2540,43.050847,5436,92.135593,3841.0,65.101695,935,15.847458,1233,20.898305,4451.1,75.442373,720,12.203390,2062,34.949153,2059.8,34.911864,562,9.525424,1844,31.254237,2406,40.779661,1647,27.915254,1175,19.915254,473,8.016949,819,13.881356,303,5.135593,-622.0,-10.542373,27,0.457627,9,0.152542,3,0.111111,6,0.187500,0.0,-13
22350,22170,13368,41,105,40,90,44.4,13,18,72.2,12,40,30.0,4,36,40,29,22,6,9,2,-9.0,True,2024-02-25,2023,False,False,False,60,6840,114.000000,2580,43.000000,5526,92.100000,3885.4,64.756667,948,15.800000,1251,20.850000,4523.3,75.388333,732,12.200000,2102,35.033333,2089.8,34.830000,566,9.433333,1880,31.333333,2446,40.766667,1676,27.933333,1197,19.950000,479,7.983333,828,13.800000,305,5.083333,-631.0,-10.516667,28,0.466667,9,0.150000,3,0.107143,6,0.187500,0.0,-13
22351,22270,13382,41,112,46,93,49.5,8,10,80.0,12,35,34.3,11,30,41,34,12,5,21,5,-11.0,True,2024-02-28,2023,False,False,False,61,6952,113.967213,2626,43.049180,5619,92.114754,3934.9,64.506557,956,15.672131,1261,20.672131,4603.3,75.463934,744,12.196721,2137,35.032787,2124.1,34.821311,577,9.459016,1910,31.311475,2487,40.770492,1710,28.032787,1209,19.819672,484,7.934426,849,13.918033,310,5.081967,-642.0,-10.524590,29,0.475410,9,0.147541,3,0.103448,6,0.187500,0.0,-15
22352,22315,13404,41,131,52,102,83.3,10,12,83.3,17,41,41.5,10,35,45,35,19,6,15,6,-3.0,False,2024-03-01,2023,False,False,False,62,7083,114.241935,2678,43.193548,5721,92.274194,4018.2,64.809677,966,15.580645,1273,20.532258,4686.6,75.590323,761,12.274194,2178,35.129032,2165.6,34.929032,587,9.467742,1945,31.370968,2532,40.838710,1745,28.145161,1228,19.806452,490,7.903226,864,13.935484,316,5.096774,-645.0,-10.403226,29,0.467742,9,0.145161,3,0.103448,6,0.181818,0.0,-16
