# &emsp; Exploring [Statsbomb](https://github.com/statsbomb/statsbombpy)

<br>
<br>


In [1]:
# ====================================== USER INTERACTION ============================================== #

# --- Set your directory to the main folder (for exporting data):
directory = '/Users/maximilian/Dropbox/Max/52_SoccerCausality'

# --- Export the Data?
do__export = True

# ====================================== USER INTERACTION ============================================== #

<br>

## &emsp; 0. Auxiliaries

In [3]:
# --- The usual libraries:
import pandas as pd
import numpy as np
from tqdm import tqdm

# --- Statsbomb:
from statsbombpy import sb


# --- Plotting
import seaborn as sns
#sns.set_style(style='darkgrid') # --- darkgrid; ticks; whitegrid
import matplotlib.pyplot as plt


import warnings
warnings.filterwarnings('ignore')

<br>

## &emsp; 1. Competitions

In [4]:
# =================================== 1.1 Get Competitions =================================== #

comps = sb.competitions()


In [5]:
# =================================== 1.2 Available Competitions =================================== #

pd.unique(comps['competition_name'])

array(['1. Bundesliga', 'African Cup of Nations', 'Champions League',
       'Copa America', 'Copa del Rey', "FA Women's Super League",
       'FIFA U20 World Cup', 'FIFA World Cup', 'Indian Super league',
       'La Liga', 'Liga Profesional', 'Ligue 1', 'Major League Soccer',
       'North American League', 'NWSL', 'Premier League', 'Serie A',
       'UEFA Euro', 'UEFA Europa League', "UEFA Women's Euro",
       "Women's World Cup"], dtype=object)

In [6]:
# =================================== 1.3 Available Seasons for selected Competitions =================================== #

for c in pd.unique(comps['competition_name']):
    print(f'\nAvailable Seasons for \'{c}\': {comps.loc[comps["competition_name"] == c,"season_name"].values}')





Available Seasons for '1. Bundesliga': ['2023/2024' '2015/2016']

Available Seasons for 'African Cup of Nations': ['2023']

Available Seasons for 'Champions League': ['2018/2019' '2017/2018' '2016/2017' '2015/2016' '2014/2015' '2013/2014'
 '2012/2013' '2011/2012' '2010/2011' '2009/2010' '2008/2009' '2006/2007'
 '2004/2005' '2003/2004' '1999/2000' '1972/1973' '1971/1972' '1970/1971']

Available Seasons for 'Copa America': ['2024']

Available Seasons for 'Copa del Rey': ['1983/1984' '1982/1983' '1977/1978']

Available Seasons for 'FA Women's Super League': ['2020/2021' '2019/2020' '2018/2019']

Available Seasons for 'FIFA U20 World Cup': ['1979']

Available Seasons for 'FIFA World Cup': ['2022' '2018' '1990' '1986' '1974' '1970' '1962' '1958']

Available Seasons for 'Indian Super league': ['2021/2022']

Available Seasons for 'La Liga': ['2020/2021' '2019/2020' '2018/2019' '2017/2018' '2016/2017' '2015/2016'
 '2014/2015' '2013/2014' '2012/2013' '2011/2012' '2010/2011' '2009/2010'
 '2008/

<br>

## 2. &emsp; Matches

In [7]:
# =================================== 2.1 Get Matches =================================== #

# -------------------------- USER INTERACTION -------------------------- #

# --- For which league do you want to get the Data?
my_league = pd.unique(comps['competition_name'])

# --- Want to get a particular Season? [None; 'YYYY'; 'YYYY/YYYY']
my_season = None

# -------------------------- USER INTERACTION -------------------------- #

matches = pd.DataFrame()
for ll in tqdm(my_league):

    # --- 2.1.1 Get the competition ID:
    comps_id = pd.unique(comps.loc[comps['competition_name'] == ll,'competition_id'])[0]
    
    # --- 2.1.2 Get a season ID:
    if my_season:
      season_id = pd.unique(comps.loc[(comps['competition_name'] == ll) & (comps["season_name"] == my_season),'season_id']).tolist()
    else:
      season_id = pd.unique(comps.loc[comps['competition_name'] == ll,'season_id'])


    # --- 2.1.3 Download the data:
    for ss in season_id:
      matches = pd.concat([matches,sb.matches(competition_id=comps_id, season_id=ss)], axis=0).reset_index(drop=True)


100%|███████████████████████████████████████████| 21/21 [00:05<00:00,  3.62it/s]


In [8]:
# =================================== 2.2 Preprocessing =================================== #

# --- 2.2.1 Sort on 'match_date' & 'kick_off':
matches = matches.sort_values(['match_date','kick_off']).reset_index(drop=True)

print(f'\nNumber of Matches by Season:\n')

pd.DataFrame({'No. Matches': matches.groupby(['competition'])['match_id'].count()}).sort_values('No. Matches',ascending=False)



Number of Matches by Season:



Unnamed: 0_level_0,No. Matches
competition,Unnamed: 1_level_1
Spain - La Liga,868
France - Ligue 1,435
England - Premier League,418
Italy - Serie A,381
Germany - 1. Bundesliga,340
England - FA Women's Super League,326
International - FIFA World Cup,147
International - Women's World Cup,116
India - Indian Super league,115
Europe - UEFA Euro,102


<br>

## 3. &emsp; Per-Match - Per-Substitution Statistics

For Position Categories see [StatsBombs GitHub Events-Manual](https://github.com/statsbomb/statsbombpy/blob/master/doc/Open%20Data%20Events%20v4.0.0.pdf).

In [9]:
features = ['clearances_total',        # --- sum 'clearance_body_part'
            'dribble_total',           # --- sum 'dribble_outcome'
            'dribble_success',         # --- sum 'dribble_outcome'['Complete']
            'duel_total',              # --- sum 'duel_outcome'
            'duel_success',            # --- sum 'duel_outcome'[['Won'+'Success'+'Success in Play'+'Success Out']]
            'fouls_committed',         # --- sum 'foul_committed_type'
            'interceptions_total',     # --- sum 'interception_outcome'
            'interceptions_success',   # --- sum 'interception_outcome'[['Won'+'Success'+'Success in Play'+'Success Out']]
            'pass_shot_assist',        # --- sum 'pass_shot_assist'
            'pass_goal_assist',        # --- sum 'pass_goal_assist'
            'pass_success_length__total',      # --- sum 'pass_length' / 0.9144 | 'pass_outcome' == NA ----> 1 yard = 0.9144 meters, and data comes in yards
            'pass_success_length__0_5',        # --- sum 'pass_length'  <= 5/0.9144  | 'pass_outcome' == NA ----> 1 yard = 0.9144 meters, and data comes in yards
            'pass_success_length__5_10',       # --- sum 'pass_length' (>  5/0.9144) & (<= 10/0.9144)  | 'pass_outcome' == NA ----> 1 yard = 0.9144 meters, and data comes in yards
            'pass_success_length__10_20',      # --- sum 'pass_length' (> 10/0.9144) & (<= 20/0.9144)  | 'pass_outcome' == NA ----> 1 yard = 0.9144 meters, and data comes in yards
            'pass_success_length__20_40',      # --- sum 'pass_length' (> 20/0.9144) & (<= 40/0.9144)  | 'pass_outcome' == NA ----> 1 yard = 0.9144 meters, and data comes in yards
            'pass_success_length__40',         # --- sum 'pass_length'  > 40/0.9144  | 'pass_outcome' == NA ----> 1 yard = 0.9144 meters, and data comes in yards
            'passes_total',            # --- sum 'pass_outcome'
            'passes_success',          # --- sum 'pass_outcome' == NA
            'shots_total',             # --- sum 'shot_outcome'
            'shots_target',            # --- sum 'shot_outcome' != ['Off T','Wayward']
            'goals_scored',            # --- sum 'shot_outcome' == 'Goal'
            'substitution_tactical',   # --- 'substitution_outcome' == 'Tactical'
            ]

colIDs = ['match_id','competition','season','team_off','team_off__id','team_def','team_def__id',
          'team_off__score60','team_def__score60',
          'substitution_replacement__off',   # --- 'substitution_replacement'
          'substitution_replacementNumber__off',   # --- 'substitution_replacement'.shape[0]
          'substitution_minute__off',        # --- 'minute'
          'substitution_minuteDelta__off',   # --- Minutes until min(Next Substitution,End of Match)
         ]

In [105]:
# ========================================= Collect Data: by Match ========================================= #


p_matchID = matches['match_id'].tolist()


# --- Conversion-Factor: yards-to-meters
conv_YtoM = 0.9144


# --- Instantiate the Dataframe:
df_MATCHES = pd.DataFrame(columns=colIDs + [f'{f}__{team}__{period}' for f in features for team in ['off','def'] for period in ['pre','post']])
tied__min60, multipleSubs, noSubs = [],[],[]


# --- Run over all matches:
for m in tqdm(p_matchID):


    # --- Get 'events' for match 'm':
    m_events = sb.events(match_id=m)


    # ------------------------------- Which Team is 'off', which is 'def'? ------------------------------- #
    m_teams = m_events['team'].unique()

    # --- --- Goals Scored by First Team:
    m_teams1__goals = m_events.loc[(m_events['minute'] < 60) & (m_events['team'] == m_teams[0]),:].loc[m_events['shot_outcome'] == 'Goal','shot_outcome'].dropna().shape[0]
    # --- --- Goals Scored by Second Team:
    m_teams2__goals = m_events.loc[(m_events['minute'] < 60) & (m_events['team'] == m_teams[1]),:].loc[m_events['shot_outcome'] == 'Goal','shot_outcome'].dropna().shape[0]

    # --- --- Is it a tie?
    dict_OffDef = {}
    if m_teams1__goals == m_teams2__goals:
        tied__min60.append(m)
        continue
    elif m_teams1__goals > m_teams2__goals:
        dict_OffDef = {'off':m_teams[1],'def':m_teams[0],
                       'score60__off':m_teams2__goals,'score60__def':m_teams1__goals}
    elif m_teams1__goals < m_teams2__goals:
        dict_OffDef = {'off':m_teams[0],'def':m_teams[1],
                       'score60__off':m_teams1__goals,'score60__def':m_teams2__goals}




    # ------------------------------- When did 'off' make Substitutions? ------------------------------- # 
    if 'substitution_outcome' in m_events.columns:
        minutesSub = m_events.loc[(m_events['substitution_outcome'] == 'Tactical') & (m_events['team'] == dict_OffDef['off']) & (m_events['minute'] > 59),'minute'].tolist()

        #print(f'Match-ID: {m} --- # Subs: {len(minutesSub)}')
        
        # --- No substitutions made by 'off'?
        if minutesSub == []:
            # --- Take 60th Minute, if no Substitutions were made
            minutesSub = [59]
        else:
            # --- Was there a Substitution in the Last Minute?
            minutesSub = [m for m in minutesSub if m < m_events['minute'].max()]
        
            
    else:
        # --- No substitutions made by 'off'?
        minutesSub = [59]


    if len(pd.unique(minutesSub)) > 1:
        multipleSubs.append(m)

    
    minutesSub = np.unique(minutesSub)

    # ------------------------------- Run over all Minutes that a Substitution was made! ------------------------------- #
    for mtN in range(len(minutesSub)):

        # --- Minute of Substitution:
        mt = minutesSub[mtN]
        # --- Minute of next Substitution -- or End of Game:
        if mt == minutesSub[-1]:
            mt_f1 = m_events['minute'].max()
        else:
            mt_f1 = minutesSub[mtN+1]
        

        # --- Match-State Identifier:
        m_mt__ID = f'{m}__{mt}'


        # --- Instantiate the Dataframe for the Current State of the Game:
        df_m = pd.DataFrame(np.nan, index = [m_mt__ID],
                            columns = colIDs + [f'{f}__{team}__{period}' for f in features for team in ['off','def'] for period in ['pre','post']])

        # --- II.2.0: 'match_id'
        df_m.loc[m_mt__ID,'match_id'] = int(m)
        # --- II.2.0: 'competition'
        df_m.loc[m_mt__ID,'competition'] = matches.loc[matches['match_id'] == m,'competition'].values[0]
        # --- II.2.0: 'season'
        df_m.loc[m_mt__ID,'season'] = matches.loc[matches['match_id'] == m,'season'].values[0]
        
    
    
        # ------------------------------- II.1 Run separately over 'home' & 'away' ------------------------------- #
        for team in ['def','off']:

            # --- Score in Minute 60:
            df_m.loc[m_mt__ID,f'team_{team}__score60'] = dict_OffDef[f'score60__{team}']
    
            
            # ------------------------------- II.2 Collect data from 'events' ------------------------------- #
            
            
            # --- II.2.0: Extract data for 'team':
            df_team = m_events.loc[m_events['team'] == dict_OffDef[team],:].copy()
            
            # --- II.2.0: 'team_id'
            df_m.loc[m_mt__ID,f'team_{team}__id'] = pd.unique(df_team['team_id'])
            # --- II.2.0: 'team_name'
            df_m.loc[m_mt__ID,f'team_{team}'] = dict_OffDef[team]

            
            # --- II.2.0: Index for 'pre' (<= 'mt') and 'post' (> 'mt')
            idx__pre = df_team[df_team['minute'] < mt].index
            idx__post = df_team[(df_team['minute'] > mt) & (df_team['minute'] < mt_f1)].index

            
            
            # --- II.1.1: 'clearances_total'
            if 'clearance_body_part' in df_team.columns:
                df_m.loc[m_mt__ID,f'clearances_total__{team}__pre'] = len(df_team.loc[idx__pre,'clearance_body_part'].dropna())
                df_m.loc[m_mt__ID,f'clearances_total__{team}__post'] = len(df_team.loc[idx__post,'clearance_body_part'].dropna())
            
            # --- II.1.2: 'dribble_total'
            if 'dribble_outcome' in df_team.columns:
                df_m.loc[m_mt__ID,f'dribble_total__{team}__pre'] = len(df_team.loc[idx__pre,'dribble_outcome'].dropna())
                df_m.loc[m_mt__ID,f'dribble_total__{team}__post'] = len(df_team.loc[idx__post,'dribble_outcome'].dropna())
            
            # --- II.1.3: 'dribble_success'
            if 'dribble_outcome' in df_team.columns:
                df_m.loc[m_mt__ID,f'dribble_success__{team}__pre'] = len(df_team.loc[idx__pre,:].loc[df_team['dribble_outcome'] == 'Complete','dribble_outcome'])
                df_m.loc[m_mt__ID,f'dribble_success__{team}__post'] = len(df_team.loc[idx__post,:].loc[df_team['dribble_outcome'] == 'Complete','dribble_outcome'])
            
            # --- II.1.4: 'duel_total'
            if 'duel_outcome' in df_team.columns:
                df_m.loc[m_mt__ID,f'duel_total__{team}__pre'] = len(df_team.loc[idx__pre,'duel_outcome'].dropna())
                df_m.loc[m_mt__ID,f'duel_total__{team}__post'] = len(df_team.loc[idx__pre,'duel_outcome'].dropna())
            
            # --- II.1.5: 'duel_success'
            if 'duel_outcome' in df_team.columns:
                df_m.loc[m_mt__ID,f'duel_success__{team}__pre'] = len(df_team.loc[idx__pre,:].loc[df_team['duel_outcome'].isin(['Won','Success','Success in Play','Success Out']),'duel_outcome'].dropna())
                df_m.loc[m_mt__ID,f'duel_success__{team}__post'] = len(df_team.loc[idx__post,:].loc[df_team['duel_outcome'].isin(['Won','Success','Success in Play','Success Out']),'duel_outcome'].dropna())
            
            # --- II.1.6: 'duel_total'
            if 'foul_committed_type' in df_team.columns:
                df_m.loc[m_mt__ID,f'fouls_committed__{team}__pre'] = len(df_team.loc[idx__pre,'foul_committed_type'].dropna())
                df_m.loc[m_mt__ID,f'fouls_committed__{team}__post'] = len(df_team.loc[idx__post,'foul_committed_type'].dropna())
            
            # --- II.1.7: 'interceptions_total'
            if 'interception_outcome' in df_team.columns:
                df_m.loc[m_mt__ID,f'interceptions_total__{team}__pre'] = len(df_team.loc[idx__pre,'interception_outcome'].dropna())
                df_m.loc[m_mt__ID,f'interceptions_total__{team}__post'] = len(df_team.loc[idx__post,'interception_outcome'].dropna())
            
            # --- II.1.8: 'interceptions_success'
            if 'interception_outcome' in df_team.columns:
                df_m.loc[m_mt__ID,f'interceptions_success__{team}__pre'] = len(df_team.loc[idx__pre,:].loc[df_team['interception_outcome'].isin(['Won','Success','Success in Play','Success Out']),'interception_outcome'].dropna())
                df_m.loc[m_mt__ID,f'interceptions_success__{team}__post'] = len(df_team.loc[idx__post,:].loc[df_team['interception_outcome'].isin(['Won','Success','Success in Play','Success Out']),'interception_outcome'].dropna())
            
            # --- II.1.9: 'pass_shot_assist'
            if 'pass_shot_assist' in df_team.columns:
                df_m.loc[m_mt__ID,f'pass_shot_assist__{team}__pre'] = len(df_team.loc[idx__pre,'pass_shot_assist'].dropna())
                df_m.loc[m_mt__ID,f'pass_shot_assist__{team}__post'] = len(df_team.loc[idx__post,'pass_shot_assist'].dropna())
            
            # --- II.1.10: 'pass_goal_assist'
            if 'pass_goal_assist' in df_team.columns:
                df_m.loc[m_mt__ID,f'pass_goal_assist__{team}__pre'] = len(df_team.loc[idx__pre,'pass_goal_assist'].dropna())
                df_m.loc[m_mt__ID,f'pass_goal_assist__{team}__post'] = len(df_team.loc[idx__post,'pass_goal_assist'].dropna())
            
            # --- --- Conditioning Step: condition on SUCCESSFUL PASSES!
            if ('pass_length' in df_team.columns) & ('pass_outcome' in df_team.columns):
                m_events__passSUCCESS__pre = df_team.loc[idx__pre,:].loc[df_team['pass_outcome'].isna(),'pass_length'].dropna().copy()
                m_events__passSUCCESS__post = df_team.loc[idx__post,:].loc[df_team['pass_outcome'].isna(),'pass_length'].dropna().copy()
            
                # --- II.1.11: 'pass_success_length__total'
                df_m.loc[m_mt__ID,f'pass_success_length__total__{team}__pre'] = m_events__passSUCCESS__pre.sum() / conv_YtoM
                df_m.loc[m_mt__ID,f'pass_success_length__total__{team}__post'] = m_events__passSUCCESS__post.sum() / conv_YtoM
            
                # --- II.1.12: 'pass_success_length__0_5'
                df_m.loc[m_mt__ID,f'pass_success_length__0_5__{team}__pre'] = m_events__passSUCCESS__pre.loc[m_events__passSUCCESS__pre <= 5/conv_YtoM].sum() / conv_YtoM
                df_m.loc[m_mt__ID,f'pass_success_length__0_5__{team}__post'] = m_events__passSUCCESS__post.loc[m_events__passSUCCESS__post <= 5/conv_YtoM].sum() / conv_YtoM
                
                # --- II.1.13: 'pass_success_length__5_10'
                df_m.loc[m_mt__ID,f'pass_success_length__5_10__{team}__pre'] = m_events__passSUCCESS__pre.loc[(m_events__passSUCCESS__pre > 5/conv_YtoM) & (m_events__passSUCCESS__pre <= 10/conv_YtoM)].sum() / conv_YtoM
                df_m.loc[m_mt__ID,f'pass_success_length__5_10__{team}__post'] = m_events__passSUCCESS__post.loc[(m_events__passSUCCESS__post > 5/conv_YtoM) & (m_events__passSUCCESS__post <= 10/conv_YtoM)].sum() / conv_YtoM
                
                # --- II.1.14: 'pass_success_length__10_20'
                df_m.loc[m_mt__ID,f'pass_success_length__10_20__{team}__pre'] = m_events__passSUCCESS__pre.loc[(m_events__passSUCCESS__pre > 10/conv_YtoM) & (m_events__passSUCCESS__pre <= 20/conv_YtoM)].sum() / conv_YtoM
                df_m.loc[m_mt__ID,f'pass_success_length__10_20__{team}__post'] = m_events__passSUCCESS__post.loc[(m_events__passSUCCESS__post > 10/conv_YtoM) & (m_events__passSUCCESS__post <= 20/conv_YtoM)].sum() / conv_YtoM
                
                # --- II.1.15: 'pass_success_length__20_40'
                df_m.loc[m_mt__ID,f'pass_success_length__20_40__{team}__pre'] = m_events__passSUCCESS__pre.loc[(m_events__passSUCCESS__pre > 20/conv_YtoM) & (m_events__passSUCCESS__pre <= 40/conv_YtoM)].sum() / conv_YtoM
                df_m.loc[m_mt__ID,f'pass_success_length__20_40__{team}__post'] = m_events__passSUCCESS__post.loc[(m_events__passSUCCESS__post > 20/conv_YtoM) & (m_events__passSUCCESS__post <= 40/conv_YtoM)].sum() / conv_YtoM
                
                # --- II.1.16: 'pass_success_length__40'
                df_m.loc[m_mt__ID,f'pass_success_length__40__{team}__pre'] = m_events__passSUCCESS__pre.loc[m_events__passSUCCESS__pre > 40/conv_YtoM].sum() / conv_YtoM
                df_m.loc[m_mt__ID,f'pass_success_length__40__{team}__post'] = m_events__passSUCCESS__post.loc[m_events__passSUCCESS__post > 40/conv_YtoM].sum() / conv_YtoM
                
                # --- II.1.17: 'passes_total'
                df_m.loc[m_mt__ID,f'passes_total__{team}__pre'] = df_team.loc[idx__pre,'pass_length'].dropna().shape[0]
                df_m.loc[m_mt__ID,f'passes_total__{team}__post'] = df_team.loc[idx__post,'pass_length'].dropna().shape[0]
                
                # --- II.1.18: 'passes_success'
                df_m.loc[m_mt__ID,f'passes_success__{team}__pre'] = m_events__passSUCCESS__pre.shape[0]
                df_m.loc[m_mt__ID,f'passes_success__{team}__post'] = m_events__passSUCCESS__post.shape[0]
            
            # --- II.1.19: 'shots_total'
            if 'shot_outcome' in df_team.columns:
                df_m.loc[m_mt__ID,f'shots_total__{team}__pre'] = df_team.loc[idx__pre,'shot_outcome'].dropna().shape[0]
                df_m.loc[m_mt__ID,f'shots_total__{team}__post'] = df_team.loc[idx__post,'shot_outcome'].dropna().shape[0]
            
            # --- II.1.20: 'shots_target'
            if 'shot_outcome' in df_team.columns:
                df_m.loc[m_mt__ID,f'shots_target__{team}__pre'] = df_team.loc[idx__pre,:].loc[~(df_team['shot_outcome'].isin(['Off T','Wayward'])),'shot_outcome'].dropna().shape[0]
                df_m.loc[m_mt__ID,f'shots_target__{team}__post'] = df_team.loc[idx__post,:].loc[~(df_team['shot_outcome'].isin(['Off T','Wayward'])),'shot_outcome'].dropna().shape[0]
            
            # --- II.1.21: 'goals_scored'
            if 'shot_outcome' in df_team.columns:
                df_m.loc[m_mt__ID,f'goals_scored__{team}__pre'] = df_team.loc[idx__pre,:].loc[df_team['shot_outcome'] == 'Goal','shot_outcome'].dropna().shape[0]
                df_m.loc[m_mt__ID,f'goals_scored__{team}__post'] = df_team.loc[idx__post,:].loc[df_team['shot_outcome'] == 'Goal','shot_outcome'].dropna().shape[0]
            
            # --- II.1.22: 'substitution_tactical'
            if 'substitution_outcome' in df_team.columns:
                df_m.loc[m_mt__ID,f'substitution_tactical__{team}__pre'] = df_team.loc[idx__pre,:].loc[df_team['substitution_outcome'] == 'Tactical','substitution_outcome'].dropna().shape[0]
                df_m.loc[m_mt__ID,f'substitution_tactical__{team}__post'] = df_team.loc[idx__post,:].loc[df_team['substitution_outcome'] == 'Tactical','substitution_outcome'].dropna().shape[0]
            
                if (team == 'off') & any(df_team.loc[df_team['substitution_outcome'] == 'Tactical','minute'] > 59):
                    df_m.loc[m_mt__ID,'substitution_minute__off'] = int(mt)
                    df_m.loc[m_mt__ID,'substitution_minuteDelta__off'] = int(mt_f1 - mt)

                    # --- --- Were there two substitutions taking place in the same minute?
                    subsN = df_team.loc[(df_team['substitution_outcome'] == 'Tactical') & (df_team['minute'] == mt),'substitution_replacement'].shape[0]
                    if subsN > 1:
                        df_m__orig = df_m.copy()
                        # --- --- --- Duplicate the current dataframe (subsN-1) times:
                        for n in range(1,subsN):
                            df_m = pd.concat([df_m,df_m__orig],axis=0)
                        # --- --- --- Fill all replacements:  
                        for n in range(subsN):
                            df_m.iloc[n,df_m.columns.tolist().index('substitution_replacementNumber__off')] = subsN
                            df_m.iloc[n,df_m.columns.tolist().index('substitution_replacement__off')] = df_team.loc[(df_team['substitution_outcome'] == 'Tactical') & (df_team['minute'] == mt),'substitution_replacement'].iloc[n]
                    else:
                        df_m.loc[m_mt__ID,'substitution_replacementNumber__off'] = df_team.loc[(df_team['substitution_outcome'] == 'Tactical') & (df_team['minute'] == mt),'substitution_replacement'].shape[0]
                        df_m.loc[m_mt__ID,'substitution_replacement__off'] = df_team.loc[(df_team['substitution_outcome'] == 'Tactical') & (df_team['minute'] == mt),'substitution_replacement'].iloc[0]

                    del subsN

                else:
                    df_m.loc[m_mt__ID,'substitution_minute__off'] = int(59)
                    df_m.loc[m_mt__ID,'substitution_minuteDelta__off'] = int(m_events['minute'].max() - 59)
                    
            else:
                df_m.loc[m_mt__ID,f'substitution_tactical__{team}__pre'] = 0
                df_m.loc[m_mt__ID,f'substitution_tactical__{team}__post'] = 0
                noSub.append(m)
                if team == 'off':
                    df_m.loc[m_mt__ID,'substitution_minute__off'] = int(59)
                    df_m.loc[m_mt__ID,'substitution_minuteDelta__off'] = int(m_events['minute'].max() - 59)



        # --- Collect the Stats for Match 'm':
        df_MATCHES = pd.concat([df_MATCHES,df_m],axis=0)


    #if m == 3888854:
    #    sys.exit()

    #if df_MATCHES.shape[0] > 0:
    #    break


# --- Export
if do__export:
    df_MATCHES = df_MATCHES.reset_index().rename(columns={'index':'match_status'})
    df_MATCHES.to_csv(f'{directory}/10_data/data_StatsBomb__byMatch_bySub.csv',index=False)

100%|███████████████████████████████████████| 3464/3464 [21:26<00:00,  2.69it/s]
