# Calculate team statistics for games
In this notebook we use the cumilative statistics of each player in a team to calculate the team statistics for every game. We can then use this data to train our models. The input will be the statistics of the individual team and the target will the be the winning team index.

In [5]:
import pandas as pd

games = pd.read_csv("player_with_game_id.csv")
playerStats = pd.read_csv("cumilative_player_attributes.csv")
games['game_id'].nunique()

23785

In [6]:
games['year'] = games.apply(lambda x: int(x['game_date'][0:4]), axis=1)
gameWithPlayerStats = pd.merge(games, playerStats, on=["ilkid", "year"], how='inner')
gameWithPlayerStats['game_id'].nunique()

# games[games['year'] > 1995]['game_id'].nunique()

8777

In [7]:
def getTeamAttributes(game, teams, columns):
    teamAttributes = {}
    teamIndex = ['t1','t2']
    teamAttributes['game_id'] = game.iloc[0]['game_id']
    teamAttributes['game_date'] = game.iloc[0]['game_date']
     
    for i in range(len(teams)):
        for col in columns:
            teamAttributes[teamIndex[i]+"_"+col] = game[(game["team_id_x"] == teams[i])][col].mean()
    teamAttributes["t1_team_ilkid"] = game[(game["team_id_x"] == teams[0])]["ilkid"].tolist()
    teamAttributes["t2_team_ilkid"] = game[(game["team_id_x"] == teams[1])]["ilkid"].tolist()

    if game.iloc[0]["winning_team_abbr"] == teams[0]:
        teamAttributes["winner"] = 0
    else:
        teamAttributes["winner"] = 1
    
    return teamAttributes

columns = ['avg_gp','avg_minutes','avg_pts','avg_oreb','avg_dreb','avg_reb','avg_asts','avg_stl','avg_blk','avg_turnover','avg_pf','avg_fga','avg_fgm','avg_fta','avg_ftm','avg_tpa','avg_tpm']
aggregatedGame = pd.DataFrame(columns=['game_id', 'game_date', 't1_team_ilkid','t1_gp', 't1_minutes', 't1_pts','t1_oreb','t1_dreb','t1_reb','t1_asts','t1_stl','t1_blk','t1_turnover','t1_pf', 't1_fga', 't1_fgm', 't1_fta', 't1_ftm', 't1_tpa', 't1_tpm',
                                       't2_team_ilkid','t2_gp', 't2_minutes', 't2_pts','t2_oreb','t2_dreb','t2_reb','t2_asts','t2_stl','t2_blk','t2_turnover','t2_pf', 't2_fga', 't2_fgm', 't2_fta', 't2_ftm', 't2_tpa', 't2_tpm','winner'])
invalidGameIds = []
rows = []
for game_id in gameWithPlayerStats["game_id"].unique():
    gameDetails = gameWithPlayerStats[gameWithPlayerStats["game_id"] == game_id]
    teams = gameDetails["team_id_x"].unique().tolist()
    if len(teams) != 2:
        invalidGameIds.append(game_id)
        continue
    rows.append(getTeamAttributes(gameDetails, teams, columns))

aggregatedGame = pd.DataFrame(rows)
aggregatedGame

Unnamed: 0,game_id,game_date,t1_avg_gp,t1_avg_minutes,t1_avg_pts,t1_avg_oreb,t1_avg_dreb,t1_avg_reb,t1_avg_asts,t1_avg_stl,...,t2_avg_pf,t2_avg_fga,t2_avg_fgm,t2_avg_fta,t2_avg_ftm,t2_avg_tpa,t2_avg_tpm,t1_team_ilkid,t2_team_ilkid,winner
0,29600012,1996-11-01 00:00:00,58.044444,20.621097,8.956167,1.018667,2.458639,3.478097,2.257903,0.682903,...,2.057056,7.413583,3.327046,1.910250,1.420528,1.855657,0.656852,"[JONESED01, SCOTTBY01, FISHEDE01, KNIGHTR01, R...","[MANNIDA01, KLEINJO01, KLEINJO01, KLEINJO01, K...",0
1,29600027,1996-11-03 00:00:00,71.388889,24.334444,11.026667,1.338889,3.030000,4.369444,2.382778,0.820556,...,2.292778,7.477222,3.476111,2.456111,1.857778,1.098333,0.358889,"[JONESED01, SCOTTBY01, FISHEDE01, KNIGHTR01, B...","[MITCHSA01, GARNEKE01, CARRCH01, PARKSCH01, GU...",0
2,29600044,1996-11-06 00:00:00,71.437500,25.341250,11.802500,1.277500,3.076250,4.354375,2.611875,0.868125,...,2.225500,6.096278,2.776722,1.933667,1.437722,1.569778,0.616556,"[JONESED01, SCOTTBY01, FISHEDE01, BLOUNCO01, O...","[MASONAN01, BURRESC01, BURRESC01, BURRESC01, Z...",1
3,29600057,1996-11-08 00:00:00,71.437500,25.341250,11.802500,1.277500,3.076250,4.354375,2.611875,0.868125,...,2.600833,7.811111,3.590000,2.410000,1.696667,1.321667,0.505556,"[JONESED01, SCOTTBY01, FISHEDE01, BLOUNCO01, O...","[WHITEDO01, JONESPO01, WILLIWA01, CAMBYMA01, T...",1
4,29600072,1996-11-10 00:00:00,73.125000,25.505000,11.944375,1.308750,3.122500,4.431875,2.596875,0.871875,...,2.181786,7.403571,3.348214,2.612857,2.017857,1.857143,0.656786,"[JONESED01, SCOTTBY01, FISHEDE01, KNIGHTR01, O...","[LAETTCH01, BLAYLMO01, BARRYJO01, MUTOMDI01, C...",0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8772,20400220,2004-12-01 00:00:00,51.617965,19.719773,7.393171,1.048268,2.414578,3.460931,1.742229,0.631104,...,1.425208,3.205625,1.317917,1.036563,0.721563,0.616875,0.185938,"[BRUNSRI01, MOOREMI01, MAGGECO01, SIMMOBO01, W...","[GILLED01, FOWLKTR01, JONESFR01, HAISLMA01, JO...",0
8773,20400254,2004-12-06 00:00:00,51.617965,19.719773,7.393171,1.048268,2.414578,3.460931,1.742229,0.631104,...,1.747262,6.069881,2.613095,1.705357,1.123452,0.791429,0.275595,"[BRUNSRI01, MOOREMI01, MAGGECO01, SIMMOBO01, W...","[HARTJA01, BREZEPR01, SLAYTA01, ELYME01, KAPON...",0
8774,20400415,2004-12-29 00:00:00,41.925084,15.828897,5.888948,0.877357,1.964116,2.839983,1.086549,0.405859,...,1.952024,6.485557,2.814627,2.094221,1.561055,0.848295,0.281326,"[BRUNSRI01, MOOREMI01, MAGGECO01, REBRAZE01, S...","[HARPRMA01, ARROYCA01, ARROYCA01, ARROYCA01, B...",0
8775,20400392,2004-12-27 00:00:00,62.980159,25.328968,9.839405,1.380278,2.845913,4.225119,2.114484,0.919921,...,1.656944,5.399352,2.330648,1.639352,1.077037,0.599630,0.220278,"[HUGHELA01, BROWNKW01, ARENAGI01, HAYWOBR01, J...","[HARTJA01, WALLAGE01, BREZEPR01, ELYME01, SAMP...",0


In [8]:
aggregatedGame.to_csv("avg_team_attr_by_game.csv")