In [None]:
import pandas as pd
import numpy as np

# load data
games = pd.read_csv('../data/la-liga-results-1995-2020.csv')
players = pd.read_csv('../data/player-status-2022-2023.csv')

# calculate team form
def calculate_team_form(data, n_games=5):
    """
    Calculate form for home and away teams based on the last n_games games.
    Win = 1, Draw = 0.5, Loss = 0
    """
    team_form = {}

    def update_form(row, team, result):
        if team not in team_form:
            team_form[team] = []
        team_form[team].append(result)
        if len(team_form[team]) > n_games:
            team_form[team].pop(0)
        return np.mean(team_form[team]) if len(team_form[team]) > 0 else 0

    data['home_team_form'] = data.apply(lambda row: update_form(row, row['HomeTeam'], 1 if row['FTHG'] > row['FTAG'] else (0.5 if row['FTHG'] == row['FTAG'] else 0)), axis=1)
    data['away_team_form'] = data.apply(lambda row: update_form(row, row['AwayTeam'], 1 if row['FTAG'] > row['FTHG'] else (0.5 if row['FTHG'] == row['FTAG'] else 0)), axis=1)
    
    return data

# apply team form calculation
games = calculate_team_form(games)

# integrate player data
def add_player_statistics(game_data, player_data):
    """
    Adds average player ratings and injury count to game data.
    """
    player_data['Key_Player'] = player_data['Rating'] > 7.0
    
    team_avg_rating = player_data.groupby('Team-name')['Rating'].mean()
    
    game_data['home_team_rating'] = game_data['HomeTeam'].map(team_avg_rating)
    game_data['away_team_rating'] = game_data['AwayTeam'].map(team_avg_rating)
    
    player_data['Injured'] = player_data['YC'].apply(lambda x: 1 if x == 'injured' else 0)  # Assuming YC column tracks injury
    injury_count = player_data.groupby('Team-name')['Injured'].sum()
    
    game_data['home_team_injuries'] = game_data['HomeTeam'].map(injury_count)
    game_data['away_team_injuries'] = game_data['AwayTeam'].map(injury_count)
    
    return game_data

# apply player statistics
games = add_player_statistics(games, players)

# save data
games.to_csv('../data/la-liga-results-1995-2020-processed.csv', index=False)