# Calculate team statistics for games
In this notebook we use the cumilative statistics of each player in a team to calculate the team statistics for every game. We can then use this data to train our models. The input will be the statistics of the individual team and the target will the be the winning team index.

In [4]:
import pandas as pd

games = pd.read_csv("player_with_game_id.csv")
playerStats = pd.read_csv("cumilative_player_attributes.csv")

In [5]:
games['year'] = games.apply(lambda x: int(x['game_date'][0:4]), axis=1)
gameWithPlayerStats = pd.merge(games, playerStats, on=["ilkid", "year"], how='inner')

In [6]:
def getTeamAttributes(game, teams, columns):
    teamAttributes = {}
    teamIndex = ['t1','t2']
    teamAttributes['game_id'] = game.iloc[0]['game_id']
    teamAttributes['game_date'] = game.iloc[0]['game_date']
     
    for i in range(len(teams)):
        for col in columns:
            teamAttributes[teamIndex[i]+"_"+col] = game[(game["team_id_x"] == teams[i])][col].mean()
    teamAttributes["t1_team_ilkid"] = game[(game["team_id_x"] == teams[0])]["ilkid"].tolist()
    teamAttributes["t2_team_ilkid"] = game[(game["team_id_x"] == teams[1])]["ilkid"].tolist()

    if game.iloc[0]["winning_team_abbr"] == teams[0]:
        teamAttributes["winner"] = 0
    else:
        teamAttributes["winner"] = 1
    
    return teamAttributes

columns = ['avg_gp','avg_minutes','avg_pts','avg_oreb','avg_dreb','avg_reb','avg_asts','avg_stl','avg_blk','avg_turnover','avg_pf','avg_fga','avg_fgm','avg_fta','avg_ftm','avg_tpa','avg_tpm']
aggregatedGame = pd.DataFrame(columns=['game_id', 'game_date', 't1_team_ilkid','t1_gp', 't1_minutes', 't1_pts','t1_oreb','t1_dreb','t1_reb','t1_asts','t1_stl','t1_blk','t1_turnover','t1_pf', 't1_fga', 't1_fgm', 't1_fta', 't1_ftm', 't1_tpa', 't1_tpm',
                                       't2_team_ilkid','t2_gp', 't2_minutes', 't2_pts','t2_oreb','t2_dreb','t2_reb','t2_asts','t2_stl','t2_blk','t2_turnover','t2_pf', 't2_fga', 't2_fgm', 't2_fta', 't2_ftm', 't2_tpa', 't2_tpm','winner'])
invalidGameIds = []
rows = []
for game_id in gameWithPlayerStats["game_id"].unique():
    gameDetails = gameWithPlayerStats[gameWithPlayerStats["game_id"] == game_id]
    teams = gameDetails["team_id_x"].unique().tolist()
    if len(teams) != 2:
        invalidGameIds.append(game_id)
        continue
    rows.append(getTeamAttributes(gameDetails, teams, columns))

aggregatedGame = pd.DataFrame(rows)

In [7]:
aggregatedGame.to_csv("avg_team_attr_by_game.csv")