In [None]:
import pandas as pd
import os
import re
import warnings
from statsbombpy import sb
import numpy as np

warnings.filterwarnings("ignore", category=UserWarning)

# competition_id: La Liga - 11, Bundeliga - 9, Premier League - 2, Serie A - 12, League 1 - 7
matches = sb.matches(competition_id=9, season_id=27)
teams = list(set(matches['home_team'].tolist()))

for team in teams:

    team_matches = matches[(matches['home_team'] == team) | (matches['away_team'] == team)].sort_values(by='match_date')
    team_match_ids = team_matches['match_id'].tolist()

    # output folder
    safe_team = re.sub(r'[^\w\s]', '', team).replace(" ", "_")
    output_dir = f"Bundesliga_2015-2016/{safe_team}"
    os.makedirs(output_dir, exist_ok=True)
    
    # Lista az összesített szezon statisztikák tárolására
    season_stats = []
    
    for match_id in team_match_ids:
        match = team_matches[team_matches['match_id'] == match_id]
        events = sb.events(match_id=match_id)
        print(match_iLista az összesített szezon statisztikák tárolásárad, match['match_date'].values[0], match['home_team'].values[0], match['away_team'].values[0], ":")
        lineup = sb.lineups(match_id=match_id)
        player_ids = lineup[team].player_id.to_list()
        player_names = lineup[team].player_name.to_list()
        player_list = list(zip(player_ids, player_names))
        stating_xi_events = events[events['type'] == "Starting XI"]
        lineup_team = stating_xi_events[stating_xi_events['team']  == team]
        new_df = lineup_team.copy()
        new_df['lineup'] = new_df['tactics'].apply(lambda x: x.get('lineup') if isinstance(x, dict) else None)
    
        # List for storing the statistics of a given match
        match_stats = []

        for player_id, player_name in player_list:
            
            events_player = events[events['player_id'] == player_id ]
            
            # 0. Minutes played:
            
            new_df['is_in_starting_xi'] = new_df['lineup'].apply(
                lambda lineup: any(player['player']['id'] == player_id for player in lineup) if isinstance(lineup, list) else False
            )
            
            new_df = new_df.reset_index(drop=True)
            is_starting_ = new_df.loc[0, 'is_in_starting_xi']
            
            is_starting = bool(is_starting_)
            is_starting
                    
            substitution_events = events[events['type'] == "Substitution"]
            player_start_time = 0
            player_end_time = 90
            if not is_starting:
                player_start_time = 90
            sub_in_event = substitution_events[(substitution_events['substitution_replacement_id'] == player_id)]
            sub_out_event = substitution_events[(substitution_events['player_id'] == player_id)]
            if not sub_in_event.empty:
                player_start_time = sub_in_event['minute'].iloc[0]  # Get the minute of substitution
            if not sub_out_event.empty:
                player_end_time = sub_out_event['minute'].iloc[0]  # Get the minute of substitution
            minutes_played_ = player_end_time - player_start_time
            minutes_played = int(minutes_played_)
    
            if minutes_played <= 0:
                continue
            
    
            # save
            player_stats = {
                'player_id': player_id,
                'player_name': player_name,
                'minutes_played': minutes_played,
            }
    
            shot_events = events_player[events_player['type'] == "Shot"]
    
            # 1. Number of shots:
            shot_count = shot_events.shape[0]
            player_stats['shot_count'] = shot_count
    
            # 2. Number of goals:
            goal_count = shot_events[shot_events['shot_outcome'] == "Goal"].shape[0]
            player_stats['goal_count'] = goal_count
    
            # 3. xG:
            xG_ = shot_events[["shot_statsbomb_xg"]].sum()
            xG = float(xG_.iloc[0])
            player_stats['xG'] = xG
    
            # 4. Shots on target:
            on_target = shot_events[shot_events['shot_outcome'].isin(["Goal", "Saved", "Saved To Post"])].shape[0]
            player_stats['on_target'] = on_target

            pass_events = events_player[events_player['type'] == "Pass"]
    
            # 5. Number of pass attempts:
            pass_count = pass_events.shape[0]
            player_stats['pass_count'] = pass_count
    
            # 6. Number of successful passes:
            wrong_passes = pass_events[pass_events['pass_outcome'].isin(["Incomplete", "Out", "Pass Offside"])].shape[0]
            successfull_passes = pass_count - wrong_passes
            player_stats['successfull_pass_count'] = successfull_passes
    
            # 7. Number of assis:
            assists = 0
            if 'pass_goal_assist' in pass_events.columns:
                assists = pass_events[pass_events['pass_goal_assist'] == True].shape[0]
            player_stats['assists'] = assists
    
            # 8. Number of key passes:
            shot_assists = pass_events[pass_events['pass_shot_assist'] == True].shape[0]
            key_passes = assists + shot_assists
            player_stats['key_passes'] = key_passes
    
            # 9. xA:
            pass_assisted_shot_id = pass_events[pass_events['pass_assisted_shot_id'].notna()]
            pass_assisted_shot_id[['pass_assisted_shot_id']]
            shot_ids = pass_assisted_shot_id['pass_assisted_shot_id'].tolist()
            ee = events[events['id'].isin(shot_ids)]
            xA_ = ee[["shot_statsbomb_xg"]].sum()
            xA = float(xA_.iloc[0])
            player_stats['xA'] = xA
    
            dribble_events = events_player[events_player['type'] == "Dribble"]
    
            # 10. Number of dribbles:
            dribble_count = dribble_events.shape[0]
            player_stats['dribble_count'] = dribble_count
    
            # 11. Number of successfull dribbles:
            successfull_dribbles = dribble_events[dribble_events['dribble_outcome'] == "Complete"].shape[0]
            player_stats['successfull_dribbles'] = successfull_dribbles
    
            carry_events = events_player[events_player['type'] == "Carry"]
    
            # 12. Carries: (at least 10 meters)
            carry_count = carry_events
            carry_count = carry_count.copy()
            carry_count['carry_length'] = carry_count.apply(
                lambda row: np.linalg.norm(np.array(row['carry_end_location']) - np.array(row['location'])), axis=1
            )
            carries = carry_count[['location', 'carry_end_location', 'carry_length']]
            long_carries = carries[carries['carry_length'] >= 10]
            player_stats['long_carries'] = long_carries.shape[0]
    
            foul_committed_events = events_player[events_player['type'] == "Foul Committed"]
    
            # 13. Number of committed fouls:
            foul_committed_count = foul_committed_events.shape[0]
            player_stats['foul_committed_count'] = foul_committed_count
    
            foul_won_events = events_player[events_player['type'] == "Foul Won"] 
    
            # 14. Number of duffered fouls:
            fouls_suffered_events = events_player[events_player['type'] == "Foul Won"]
            fouls_suffered_count = fouls_suffered_events.shape[0]
            player_stats['fouls_suffered_count'] = fouls_suffered_count
    
            # 15. Number of fouls suffered while defending:
            foul_won_defensive = 0
            if foul_won_events.shape[0] and 'foul_won_defensive' in foul_won_events.columns :
                foul_won_defensive = foul_won_events[foul_won_events['foul_won_defensive'] == True].shape[0]
            player_stats['foul_won_defensive'] = foul_won_defensive
    
            duel_events = events_player[events_player['type'] == "Duel"]
    
            # 16. Number of duels:
            duel_count = duel_events.shape[0]
            player_stats['duel_count'] = duel_count
            
            # 17. Number of won duels:
            won_duels = duel_events[duel_events['duel_outcome'].isin(["Won", "Success", "Success In Play", "Success Out"])].shape[0]
            player_stats['won_duels'] = won_duels
    
            # 18. Number of ball recoveries:
            ball_recovery_events = events_player[events_player['type'] == "Ball Recovery"]
            ball_recovery_count = ball_recovery_events.shape[0]
            player_stats['ball_recovery_count'] = ball_recovery_count
    
            # 19. Ball losses:
            dispossessed_events = events_player[events_player['type'] == "Dispossessed"]
            dispossessed_count = dispossessed_events.shape[0]
            player_stats['dispossessed_count'] = dispossessed_count
    
            # 20. Clearances:
            clearance_events = events_player[events_player['type'] == "Clearance"]
            clearance_count = clearance_events.shape[0]
            player_stats['clearance_count'] = clearance_count
    
            # 21. Interceptions:
            interception_events = events_player[events_player['type'] == "Interception"]
            interception_count = interception_events.shape[0]
            player_stats['interception_count'] = interception_count
    
            # 22. Blocks:
            block_events = events_player[events_player['type'] == "Block"]
            block_count = block_events.shape[0]
            player_stats['block_cBlokkolasok szamaount'] = block_count
    
            # 23. Takles:
            tackle_events = events_player[(events_player['type'] == "Duel") & (events_player['duel_type'] == "Tackle")]
            tackle_count = tackle_events.shape[0]
            player_stats['tackle_count'] = tackle_count
    
            # 24. Successfull takles:
            successful_tackles = tackle_events[tackle_events['duel_outcome'] == "Won"].shape[0]
            player_stats['successful_tackles'] = successful_tackles
    
            # 25. Pressings:
            pressure_events = events_player[events_player['type'] == "Pressure"]
            pressure_count = pressure_events.shape[0]
            player_stats['pressure_count'] = pressure_count
    
            # we append the player's data to the match data
            match_stats.append(player_stats)
    
        # we write the match data to a CSV file
        df_match_stats = pd.DataFrame(match_stats)
        match_date_str = match['match_date'].values[0].replace("-", "")
        match_filename = f"{output_dir}/match_{match_id}_{match_date_str}.csv"
        df_match_stats.to_csv(match_filename, index=False)
    
        # We add the match data to the season data
        season_stats.extend(match_stats)
    
    # We create a DataFrame from the season data
    df_season_stats = pd.DataFrame(season_stats)
    
    # We aggregate the statistics for each player
    season_aggregate_stats = df_season_stats.groupby(['player_id', 'player_name'], as_index=False).sum(numeric_only=True)

    # We write the aggregated statistics to a CSV file
    season_filename = f"{output_dir}/{safe_team}_season_stats.csv"
    season_aggregate_stats.to_csv(season_filename, index=False)
