In [36]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mplsoccer import Pitch, Sbopen, VerticalPitch
from matplotlib.text import Text

In [37]:
# Get Euros 2024 games
parser = Sbopen()
df_games = parser.match(competition_id=55, season_id=282)
df_games.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 52 columns):
 #   Column                           Non-Null Count  Dtype         
---  ------                           --------------  -----         
 0   match_id                         51 non-null     int64         
 1   match_date                       51 non-null     datetime64[ns]
 2   kick_off                         51 non-null     datetime64[ns]
 3   home_score                       51 non-null     int64         
 4   away_score                       51 non-null     int64         
 5   match_status                     51 non-null     object        
 6   match_status_360                 51 non-null     object        
 7   last_updated                     51 non-null     datetime64[ns]
 8   last_updated_360                 51 non-null     datetime64[ns]
 9   match_week                       51 non-null     int64         
 10  competition_id                   51 non-null     int64         


In [38]:
match_id = 3942819
df = parser.event(match_id)[0]

# Get dribbles
dribbles_mask = (df['type_name'] == 'Dribble')
df_dribbles = df.loc[dribbles_mask]

# Get players who made dribbles and init playing time df
df_playing_time = df_dribbles[['match_id', 'player_name', 'team_name']].drop_duplicates()
df_playing_time['playing_time'] = 0

# Get first and second half extra time
df_end = df[df['type_name'] == 'Half End']
end_first_half = df_end.iloc[0]['minute'] * 60 + df_end.iloc[0]['second']
first_half_extra_time = end_first_half - (45 * 60)
end_second_half = df_end.iloc[1]['minute'] * 60 + df_end.iloc[1]['second']
second_half_extra_time = end_second_half - (90 * 60)

df_playing_time.head(20)

Unnamed: 0,match_id,player_name,team_name,playing_time
89,3942819,Kyle Walker,England,0
169,3942819,Phil Foden,England,0
352,3942819,Jude Bellingham,England,0
1342,3942819,Cody Mathès Gakpo,Netherlands,0
1653,3942819,Denzel Dumfries,Netherlands,0
2051,3942819,Xavi Simons,Netherlands,0
2129,3942819,Nathan Aké,Netherlands,0
2279,3942819,Harry Kane,England,0
2538,3942819,Wout Weghorst,Netherlands,0
3417,3942819,Declan Rice,England,0


In [39]:
# Get all substitutions
subs_mask = (df['type_name'] == 'Substitution')
df_subs = df.loc[subs_mask, ['type_name', 'period', 'minute', 'second', 'player_name', 'team_name', 'sub_type_name', 'substitution_replacement_name']]
df_subs.head(20)

Unnamed: 0,type_name,period,minute,second,player_name,team_name,sub_type_name,substitution_replacement_name
1100,Substitution,1,34,24,Memphis Depay,Netherlands,,Joey Veerman
1699,Substitution,2,45,0,Kieran Trippier,England,,Luke Shaw
1700,Substitution,2,45,0,Donyell Malen,Netherlands,,Wout Weghorst
3014,Substitution,2,79,28,Phil Foden,England,,Cole Palmer
3015,Substitution,2,79,41,Harry Kane,England,,Ollie Watkins
3392,Substitution,2,92,13,Kobbie Mainoo,England,,Conor Gallagher
3393,Substitution,2,92,17,Bukayo Saka,England,,Ezri Konsa Ngoyo
3394,Substitution,2,92,40,Xavi Simons,Netherlands,,Brian Brobbey
3395,Substitution,2,92,42,Denzel Dumfries,Netherlands,,Joshua Zirkzee


In [40]:
# Update playing time for players and subs
for i, player in df_playing_time.iterrows():
    player_name = player['player_name']
    
    # Case 1: Player played the full game (not in any substitution list)
    if player_name not in pd.concat([df_subs['player_name'], df_subs['substitution_replacement_name']]).values:
        df_playing_time.loc[
            (df_playing_time['match_id'] == match_id) & 
            (df_playing_time['player_name'] == player['player_name']), 
            'playing_time'
        ] = (90 * 60) + first_half_extra_time + second_half_extra_time
        
    # Case 2: Player was both substituted in and out
    elif (player_name in df_subs['player_name'].values) and (player_name in df_subs['substitution_replacement_name'].values):
        print(f"{player_name} was both substituted in and out")

        if df_subs[df_subs['player_name'] == player_name]['period'].iloc[0] == 1:
            playing_time = end_second_half - (df_subs[df_subs['player_name'] == player_name]['minute'].iloc[0] * 60 + df_subs[df_subs['player_name'] == player_name]['second'].iloc[0])
        

        # time_in = df_subs[df_subs['player_name'] == player_name]['minute'].iloc[0] * 60 + df_subs[df_subs['player_name'] == player_name]['second'].iloc[0]
        # time_out = df_subs[df_subs['substitution_replacement_name'] == player_name]['minute'].iloc[0] * 60 + df_subs[df_subs['substitution_replacement_name'] == player_name]['second'].iloc[0]
        # playing_time = time_out - time_in
        # df_playing_time.loc[
        #     (df_playing_time['match_id'] == match_id) & 
        #     (df_playing_time['player_name'] == player['player_name']), 
        #     'playing_time'
        # ] = playing_time
        
    # Case 3: Player was only substituted out
    elif player_name in df_subs['player_name'].values:
        print(f"{player_name} was substituted out")
        
    # Case 4: Player was only substituted in
    elif player_name in df_subs['substitution_replacement_name'].values:
        print(f"{player_name} was substituted in")

    # Calculate minutes played for players coming off

    # if player in df_subs['player_name'].unique():
    #     df_sub = df_subs[df_subs['player_name'] == player]
    #     print(df_sub['player_name'])
    #     if df_sub['period'].iloc[0] == 1:
    #         playing_time = df_sub['minute'].iloc[0] * 60 + df_sub['second'].iloc[0]
    #     else:
    #         playing_time = end_first_half + (df_sub['minute'].iloc[0] * 60 + df_sub['second'].iloc[0])
    #     print(playing_time)
    #     df_playing_time = pd.concat([df_playing_time, pd.DataFrame([{
    #             'player_name': player, 
    #             'team_name': df_sub['team_name'].iloc[0], 
    #             'playing_time': playing_time
    #         }])],
    #         ignore_index=True
    #     )

df_playing_time.head(20)

Phil Foden was substituted out
Denzel Dumfries was substituted out
Xavi Simons was substituted out
Harry Kane was substituted out
Wout Weghorst was substituted in


Unnamed: 0,match_id,player_name,team_name,playing_time
89,3942819,Kyle Walker,England,3080
169,3942819,Phil Foden,England,0
352,3942819,Jude Bellingham,England,3080
1342,3942819,Cody Mathès Gakpo,Netherlands,3080
1653,3942819,Denzel Dumfries,Netherlands,0
2051,3942819,Xavi Simons,Netherlands,0
2129,3942819,Nathan Aké,Netherlands,3080
2279,3942819,Harry Kane,England,0
2538,3942819,Wout Weghorst,Netherlands,0
3417,3942819,Declan Rice,England,3080


In [None]:
# Create empty dribbles dataframe
driblles_columns = ['match_id', 'period', 'timestamp', 'minute', 'second', 'possession', 'duration', 'match_id', 'type_name', 'outcome_name', 'team_name', 'player_name', 'x', 'y']
df_dribbles = pd.DataFrame(columns=driblles_columns)

# Create empty players dataframe to collect player names and minutes played
players_columns = ['player_name', 'team_name', 'minutes_played']
df_players = pd.DataFrame(columns=players_columns)

# Loop through matches
for match_id in df_games['match_id']:
    # Filter for dribbles
    df = parser.event(match_id)[0]
    dribble_events = df[df['type_name'] == 'Dribble']

    # Filter for substitutions

    df_dribbles = pd.concat([df_dribbles, dribble_events[driblles_columns]], ignore_index=True)

df_dribbles.info()