In [None]:
# Use this cell for a single match

import pandas as pd
import ast

# Load the dataset
file_path = "Italy-Serie-A/2024-2025.csv"
df = pd.read_csv(file_path)

# Extract the season from the file name
season = file_path.split("/")[-1].replace(".csv", "")

# Filter for the specific match
match_id = 1834868
match_df = df[df['matchId'] == match_id]

# Filter for PreMatch event to find starters and total match minutes for home and away teams
home_formation = match_df[(match_df['period'] == 'PreMatch') & 
                           (match_df['type'] == 'FormationSet') & 
                           (match_df['h_a'] == 'h')].iloc[0]

away_formation = match_df[(match_df['period'] == 'PreMatch') & 
                           (match_df['type'] == 'FormationSet') & 
                           (match_df['h_a'] == 'a')].iloc[0]

# Parse qualifiers to get involved players (starters)
home_qualifiers = ast.literal_eval(home_formation['qualifiers'])
away_qualifiers = ast.literal_eval(away_formation['qualifiers'])

# Get involved players and take only the first 11 for starters
home_involved_players = next(q['value'] for q in home_qualifiers if q['type'] == 'InvolvedPlayers')
away_involved_players = next(q['value'] for q in away_qualifiers if q['type'] == 'InvolvedPlayers')

home_starters = list(map(int, home_involved_players.split(',')[:11]))
away_starters = list(map(int, away_involved_players.split(',')[:11]))

# Get the total match minutes (the full duration of the match)
total_match_minutes = home_formation['maxMinute']

# Filter substitution events
subs_off = match_df[match_df['type'] == 'SubstitutionOff']
subs_on = match_df[match_df['type'] == 'SubstitutionOn']

# Filter red card events
red_card_events = match_df[match_df['redCard'] == True]

# Create a dictionary for playerName using first occurrence of playerId
player_name_dict = dict(zip(df['playerId'], df['playerName']))

# Initialize results list
results = []

# Process home starters
for player_id in home_starters:
    sub_off_event = subs_off[subs_off['playerId'] == player_id]
    minutes_played = total_match_minutes if sub_off_event.empty else sub_off_event.iloc[0]['expandedMinute']
    
    # Check for red card
    red_card_event = red_card_events[red_card_events['playerId'] == player_id]
    if not red_card_event.empty:
        minutes_played = red_card_event.iloc[0]['expandedMinute']  # Red card minute overrides
    
    results.append({
        'season': season,
        'matchId': home_formation['matchId'],
        'playerId': player_id,
        'playerName': player_name_dict.get(player_id, 'Unknown'),
        'teamId': home_formation['teamId'],
        'minutesPlayed': minutes_played
    })

# Process away starters
for player_id in away_starters:
    sub_off_event = subs_off[subs_off['playerId'] == player_id]
    minutes_played = total_match_minutes if sub_off_event.empty else sub_off_event.iloc[0]['expandedMinute']
    
    # Check for red card
    red_card_event = red_card_events[red_card_events['playerId'] == player_id]
    if not red_card_event.empty:
        minutes_played = red_card_event.iloc[0]['expandedMinute']  # Red card minute overrides
    
    results.append({
        'season': season,
        'matchId': away_formation['matchId'],
        'playerId': player_id,
        'playerName': player_name_dict.get(player_id, 'Unknown'),
        'teamId': away_formation['teamId'],
        'minutesPlayed': minutes_played
    })

# Process substitute players
for _, sub_on_event in subs_on.iterrows():
    player_id = sub_on_event['playerId']
    sub_off_event = subs_off[subs_off['playerId'] == player_id]
    minutes_played = total_match_minutes - sub_on_event['expandedMinute'] if sub_off_event.empty else sub_off_event.iloc[0]['expandedMinute'] - sub_on_event['expandedMinute']
    
    # Check for red card
    red_card_event = red_card_events[red_card_events['playerId'] == player_id]
    if not red_card_event.empty:
        minutes_played = red_card_event.iloc[0]['expandedMinute'] - sub_on_event['expandedMinute']  # Red card minute overrides
    
    # Add player if not already added (prevent duplicates)
    if not any(result['playerId'] == player_id for result in results):
        results.append({
            'season': season,
            'matchId': sub_on_event['matchId'],
            'playerId': player_id,
            'playerName': player_name_dict.get(player_id, 'Unknown'),
            'teamId': sub_on_event['teamId'],
            'minutesPlayed': minutes_played
        })

# Convert results to a DataFrame
result_df = pd.DataFrame(results)

# Display the resulting table
print(result_df)

  df = pd.read_csv(file_path)


       season  matchId  playerId             playerName  teamId  minutesPlayed
0   2024-2025  1834868  447570.0            Zion Suzuki   24341             95
1   2024-2025  1834868  447492.0         Woyo Coulibaly   24341             95
2   2024-2025  1834868  343512.0        Emanuele Valeri   24341             95
3   2024-2025  1834868  367177.0             Simon Sohm   24341             78
4   2024-2025  1834868  396424.0          Botond Balogh   24341             95
5   2024-2025  1834868  447486.0     Alessandro Circati   24341             95
6   2024-2025  1834868  329769.0             Dennis Man   24341             67
7   2024-2025  1834868  362683.0         Nahuel Estévez   24341             95
8   2024-2025  1834868  447489.0        Ange-Yoan Bonny   24341             67
9   2024-2025  1834868  363978.0         Adrián Bernabé   24341             82
10  2024-2025  1834868  400090.0       Valentin Mihaila   24341             67
11  2024-2025  1834868  106277.0     Pietro Terracci

In [None]:
# This cell computes the played time for each player on each match in the entire dataset (spread out over multiple csv files)

import pandas as pd
import ast
import os

# Path to the main directory containing subfolders with CSV files
main_dir = os.getcwd()

# Output file to save results
output_file = 'minutes-played.csv'

# Check if output file exists
file_exists = os.path.isfile(output_file)

# Iterate through each subfolder in the main directory
for subfolder in os.listdir(main_dir):
    subfolder_path = os.path.join(main_dir, subfolder)

    # Check if it is a directory
    if os.path.isdir(subfolder_path):
        print(f"Processing folder: {subfolder}")

        # Iterate through all CSV files in the subfolder
        for file in os.listdir(subfolder_path):
            if file.endswith('.csv'):
                file_path = os.path.join(subfolder_path, file)
                print(f"Processing file: {file}")

                # Extract the season from the file name
                season = file.replace(".csv", "")

                # Read the CSV file
                df = pd.read_csv(file_path)

                # Add the 'season' column with the same value for all rows
                df['season'] = season
                # Create a dictionary for playerName using the first occurrence of playerId
                player_name_dict = dict(zip(df['playerId'], df['playerName']))

                # Initialize an empty list for all match results
                all_results = []

                # Iterate over each unique matchId in the dataset
                for match_id in df['matchId'].unique():
                    match_df = df[df['matchId'] == match_id]
                    
                    # Get the total match minutes (the full duration of the match) from the 'End' event in the second half
                    end_event = match_df[(match_df['period'] == 'SecondHalf') & 
                                        (match_df['type'] == 'End')]
                    if not end_event.empty:
                        total_match_minutes = end_event.iloc[0]['expandedMinute']
                    else:
                        continue  # Skip this match if the "End" event is missing

                    # Filter for Substitution events (both SubstitutionOn and SubstitutionOff) and Red Card events for the current match
                    subs_off = match_df[match_df['type'] == 'SubstitutionOff']
                    subs_on = match_df[match_df['type'] == 'SubstitutionOn']
                    red_card_events = match_df[match_df['redCard'] == True]

                    # Filter for PreMatch event to find starters for home and away teams
                    home_formation = match_df[(match_df['period'] == 'PreMatch') & 
                                            (match_df['type'] == 'FormationSet') & 
                                            (match_df['h_a'] == 'h')]

                    away_formation = match_df[(match_df['period'] == 'PreMatch') & 
                                            (match_df['type'] == 'FormationSet') & 
                                            (match_df['h_a'] == 'a')]

                    if home_formation.empty or away_formation.empty:
                        continue  # Skip if no formation info

                    # Parse qualifiers to get involved players (starters)
                    home_qualifiers = ast.literal_eval(home_formation.iloc[0]['qualifiers'])
                    away_qualifiers = ast.literal_eval(away_formation.iloc[0]['qualifiers'])

                    # Get involved players and take only the first 11 for starters
                    home_involved_players = next(q['value'] for q in home_qualifiers if q['type'] == 'InvolvedPlayers')
                    away_involved_players = next(q['value'] for q in away_qualifiers if q['type'] == 'InvolvedPlayers')

                    home_starters = list(map(int, home_involved_players.split(',')[:11]))
                    away_starters = list(map(int, away_involved_players.split(',')[:11]))

                    # Initialize a list for the current match's results
                    match_results = []

                    # Process home starters
                    for player_id in home_starters:
                        # Check if the player was substituted off
                        sub_off_event = subs_off[subs_off['playerId'] == player_id]
                        if not sub_off_event.empty:
                            minutes_played = sub_off_event.iloc[0]['expandedMinute']
                        else:
                            minutes_played = total_match_minutes

                        # Check for red card
                        red_card_event = red_card_events[red_card_events['playerId'] == player_id]
                        if not red_card_event.empty:
                            minutes_played = red_card_event.iloc[0]['expandedMinute']  # Red card minute overrides

                        match_results.append({
                            'season': season,
                            'matchId': match_id,
                            'playerId': player_id,
                            'playerName': player_name_dict.get(player_id, 'Unknown'),
                            'teamId': home_formation.iloc[0]['teamId'],
                            'minutesPlayed': minutes_played
                        })

                    # Process away starters
                    for player_id in away_starters:
                        # Check if the player was substituted off
                        sub_off_event = subs_off[subs_off['playerId'] == player_id]
                        if not sub_off_event.empty:
                            minutes_played = sub_off_event.iloc[0]['expandedMinute']
                        else:
                            minutes_played = total_match_minutes

                        # Check for red card
                        red_card_event = red_card_events[red_card_events['playerId'] == player_id]
                        if not red_card_event.empty:
                            minutes_played = red_card_event.iloc[0]['expandedMinute']  # Red card minute overrides

                        match_results.append({
                            'season': season,
                            'matchId': match_id,
                            'playerId': player_id,
                            'playerName': player_name_dict.get(player_id, 'Unknown'),
                            'teamId': away_formation.iloc[0]['teamId'],
                            'minutesPlayed': minutes_played
                        })

                    # Process substitute players
                    for _, sub_on_event in subs_on[subs_on['matchId'] == match_id].iterrows():
                        player_id = sub_on_event['playerId']
                        sub_off_event = subs_off[subs_off['playerId'] == player_id]
                        if not sub_off_event.empty:
                            minutes_played = sub_off_event.iloc[0]['expandedMinute'] - sub_on_event['expandedMinute']
                        else:
                            minutes_played = total_match_minutes - sub_on_event['expandedMinute']

                        # Check for red card
                        red_card_event = red_card_events[red_card_events['playerId'] == player_id]
                        if not red_card_event.empty:
                            minutes_played = red_card_event.iloc[0]['expandedMinute'] - sub_on_event['expandedMinute']  # Red card minute overrides
                        
                        # Add player if not already added (prevent duplicates)
                        if not any(result['playerId'] == player_id for result in match_results):
                            match_results.append({
                                'season': season,
                                'matchId': sub_on_event['matchId'],
                                'playerId': player_id,
                                'playerName': player_name_dict.get(player_id, 'Unknown'),
                                'teamId': sub_on_event['teamId'],
                                'minutesPlayed': minutes_played
                            })

                    # Add the match results to the overall list
                    all_results.extend(match_results)

                # Convert results to a DataFrame
                result_df = pd.DataFrame(all_results)

                # Save to the output CSV file
                if not file_exists:
                    # Create the file and save the data
                    result_df.to_csv(output_file, index=False, mode='w')
                    file_exists = True  # Now the file exists
                else:
                    # Append to the existing file
                    result_df.to_csv(output_file, index=False, header=False, mode='a')

                # Release memory for the current DataFrame
                del df, result_df
                print(f"Finished processing file: {file}")

# Inform user that all files were processed
print("All files processed successfully.")

Processing folder: .git
Processing folder: Belgium-Jupiler-Pro-League
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Brazil-Brasileirão
Processing file: 2020.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020.csv
Processing file: 2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021.csv
Processing file: 2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022.csv
Processing file: 2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023.csv
Processing file: 2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024.csv
Processing folder: England-Championship
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: England-League-One
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: England-League-Two
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: England-Premier-League
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: France-Ligue-1
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Germany-2-Bundesliga
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Germany-Bundesliga
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Italy-Serie-A
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Netherlands-Eredivisie
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Portugal-Liga-Portugal
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Scotland-Premiership
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Spain-LaLiga
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: Turkey-SuperLig
Processing file: 2020-2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020-2021.csv
Processing file: 2021-2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021-2022.csv
Processing file: 2022-2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022-2023.csv
Processing file: 2023-2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023-2024.csv
Processing file: 2024-2025.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024-2025.csv
Processing folder: USA-Major-League-Soccer
Processing file: 2020.csv


  df = pd.read_csv(file_path)


Finished processing file: 2020.csv
Processing file: 2021.csv


  df = pd.read_csv(file_path)


Finished processing file: 2021.csv
Processing file: 2022.csv


  df = pd.read_csv(file_path)


Finished processing file: 2022.csv
Processing file: 2023.csv


  df = pd.read_csv(file_path)


Finished processing file: 2023.csv
Processing file: 2024.csv


  df = pd.read_csv(file_path)


Finished processing file: 2024.csv
All files processed successfully.


In [None]:
# This cell computes played time for each match in a single csv file

# Load the dataset
file_path = "Italy-Serie-A/2024-2025.csv"
df = pd.read_csv(file_path)

# Extract the season from the file name
season = file_path.split("/")[-1].replace(".csv", "")

# Create a dictionary for playerName using first occurrence of playerId
player_name_dict = dict(zip(df['playerId'], df['playerName']))

# Initialize an empty list for all match results
all_results = []

# Iterate over each unique matchId in the dataset
for match_id in df['matchId'].unique():
    match_df = df[df['matchId'] == match_id]
    
    # Get the total match minutes (the full duration of the match) from the 'End' event in the second half
    end_event = match_df[(match_df['period'] == 'SecondHalf') & 
                          (match_df['type'] == 'End')].iloc[0]
    total_match_minutes = end_event['expandedMinute']

    # Filter for Substitution events (both SubstitutionOn and SubstitutionOff) and Red Card events for the current match
    subs_off = match_df[match_df['type'] == 'SubstitutionOff']
    subs_on = match_df[match_df['type'] == 'SubstitutionOn']
    red_card_events = match_df[match_df['redCard'] == True]

    # Filter for PreMatch event to find starters for home and away teams
    home_formation = match_df[(match_df['period'] == 'PreMatch') & 
                               (match_df['type'] == 'FormationSet') & 
                               (match_df['h_a'] == 'h')].iloc[0]

    away_formation = match_df[(match_df['period'] == 'PreMatch') & 
                               (match_df['type'] == 'FormationSet') & 
                               (match_df['h_a'] == 'a')].iloc[0]

    # Parse qualifiers to get involved players (starters)
    home_qualifiers = ast.literal_eval(home_formation['qualifiers'])
    away_qualifiers = ast.literal_eval(away_formation['qualifiers'])

    # Get involved players and take only the first 11 for starters
    home_involved_players = next(q['value'] for q in home_qualifiers if q['type'] == 'InvolvedPlayers')
    away_involved_players = next(q['value'] for q in away_qualifiers if q['type'] == 'InvolvedPlayers')

    home_starters = list(map(int, home_involved_players.split(',')[:11]))
    away_starters = list(map(int, away_involved_players.split(',')[:11]))

    # Initialize a list for the current match's results
    match_results = []

    # Process home starters
    for player_id in home_starters:
        # Check if the player was substituted off
        sub_off_event = subs_off[subs_off['playerId'] == player_id]
        if not sub_off_event.empty:
            minutes_played = sub_off_event.iloc[0]['expandedMinute']
        else:
            minutes_played = total_match_minutes

        # Check for red card
        red_card_event = red_card_events[red_card_events['playerId'] == player_id]
        if not red_card_event.empty:
            minutes_played = red_card_event.iloc[0]['expandedMinute']  # Red card minute overrides

        match_results.append({
            'season': season,
            'matchId': match_id,
            'playerId': player_id,
            'playerName': player_name_dict.get(player_id, 'Unknown'),
            'teamId': home_formation['teamId'],
            'minutesPlayed': minutes_played
        })

    # Process away starters
    for player_id in away_starters:
        # Check if the player was substituted off
        sub_off_event = subs_off[subs_off['playerId'] == player_id]
        if not sub_off_event.empty:
            minutes_played = sub_off_event.iloc[0]['expandedMinute']
        else:
            minutes_played = total_match_minutes

        # Check for red card
        red_card_event = red_card_events[red_card_events['playerId'] == player_id]
        if not red_card_event.empty:
            minutes_played = red_card_event.iloc[0]['expandedMinute']  # Red card minute overrides

        match_results.append({
            'season': season,
            'matchId': match_id,
            'playerId': player_id,
            'playerName': player_name_dict.get(player_id, 'Unknown'),
            'teamId': away_formation['teamId'],
            'minutesPlayed': minutes_played
        })

    # Process substitute players
    for _, sub_on_event in subs_on[subs_on['matchId'] == match_id].iterrows():
        player_id = sub_on_event['playerId']
        sub_off_event = subs_off[subs_off['playerId'] == player_id]
        if not sub_off_event.empty:
            minutes_played = sub_off_event.iloc[0]['expandedMinute'] - sub_on_event['expandedMinute']
        else:
            minutes_played = total_match_minutes - sub_on_event['expandedMinute']

        # Check for red card
        red_card_event = red_card_events[red_card_events['playerId'] == player_id]
        if not red_card_event.empty:
            minutes_played = red_card_event.iloc[0]['expandedMinute'] - sub_on_event['expandedMinute']  # Red card minute overrides
        
        # Add player if not already added (prevent duplicates)
        if not any(result['playerId'] == player_id for result in match_results):
            match_results.append({
                'season': season,
                'matchId': sub_on_event['matchId'],
                'playerId': player_id,
                'playerName': player_name_dict.get(player_id, 'Unknown'),
                'teamId': sub_on_event['teamId'],
                'minutesPlayed': minutes_played
            })

    # Add the match results to the overall list
    all_results.extend(match_results)

# Convert results to a DataFrame
result_df = pd.DataFrame(all_results)

# Display the resulting table
print(result_df)
