In [2]:
import pandas as pd

In [3]:
teams = pd.read_csv('data/clean/cleaned_teams.csv')
players = pd.read_csv('data/clean/cleaned_players.csv')
players_teams = pd.read_csv('data/clean/cleaned_players_teams.csv')

In [None]:
def predict_team_year_stats(team_id, year, teams, players_teams, players):

    # Select player ids for the team for that year
    players_ids = players_teams[(players_teams['tmID'] == team_id) & (players_teams['year'] == year)]['playerID']    

    # Select player stats for last year
    team_players = players_teams[(players_teams['playerID'].isin(players_ids)) & (players_teams['year'] == year - 1)]
    team_players_bio = players[(players['bioID'].isin(players_ids))]

    # Verify if there is no data for the team in the previous year
    # Aqui talvez colocar os valores medios dos rookies?
    if team_players.empty:
        print(f"No player data for team {team_id} in year {year - 1}. Using default values.")
        return teams

    # Copy the stats from the previous year into the new year    
    predicted_stats = teams.loc[(teams['tmID'] == team_id) & (teams['year'] == year - 1)].copy()
    if predicted_stats.empty:
        print(f"No team data for team {team_id} in year {year - 1}. Using default values.") # Equipa nova
        predicted_stats = pd.DataFrame([{'tmID': team_id, 'year': year - 1}])
    predicted_stats['year'] = year

    # Calculate the player stats
    predicted_stats['player_average_height'] = team_players_bio['height'].mean()
    predicted_stats['player_average_weight'] = team_players_bio['weight'].mean()
    for stat in ['GP', 'GS', 'points', 'oRebounds', 'dRebounds', 'rebounds', 'assists', 'steals', 'blocks',
                 'turnovers', 'PF', 'fgAttempted', 'fgMade', 'ftAttempted', 'ftMade', 'threeAttempted', 'threeMade',
                 'dq', 'PostGP', 'PostGS', 'PostMinutes', 'PostPoints', 'PostoRebounds', 'PostdRebounds',
                 'PostRebounds', 'PostAssists', 'PostSteals', 'PostBlocks', 'PostTurnovers', 'PostPF',
                 'PostfgAttempted', 'PostfgMade', 'PostftAttempted', 'PostftMade', 'PostthreeAttempted',
                 'PostthreeMade', 'PostDQ']:
        predicted_stats[f'player_total_{stat}'] = team_players[stat].sum()

    # Concatenate the new stats with the old ones
    predicted_stats = pd.DataFrame([predicted_stats.squeeze()]) 
    updated_teams = pd.concat([teams, predicted_stats], ignore_index=True)

    # Return the updated dataset
    return updated_teams

teams_updated = predict_team_year_stats(
    team_id='SEA',
    year=7,
    teams=teams,
    players_teams=players_teams,
    players=players
)

print(teams_updated[(teams_updated['tmID'] == 'SEA') & (teams_updated['year'] == 7)])



     year tmID franchID confID  rank playoff firstRound semis finals  o_fgm  \
125     7  SEA      SEA     WE     4       Y          L   NaN    NaN    961   
142     7  SEA      SEA     WE     2       Y          L   NaN    NaN    906   

     ...  player_total_PostBlocks  player_total_PostTurnovers  \
125  ...                     16.0                        32.0   
142  ...                      5.0                        30.0   

     player_total_PostPF  player_total_PostfgAttempted  \
125                 49.0                         180.0   
142                 55.0                         151.0   

     player_total_PostfgMade  player_total_PostftAttempted  \
125                     68.0                          78.0   
142                     59.0                          45.0   

     player_total_PostftMade  player_total_PostthreeAttempted  \
125                     63.0                             64.0   
142                     42.0                             46.0   

     pla