In [4]:
import pandas as pd
import numpy as np
import pyarrow.feather as feather
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:

# Display the current settings
print("Current maximum number of rows: ", pd.get_option('display.max_rows'))
print("Current maximum number of columns: ", pd.get_option('display.max_columns'))

# Set new maximum number of rows and columns
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Display the updated settings
print("Updated maximum number of rows: ", pd.get_option('display.max_rows'))
print("Updated maximum number of columns: ", pd.get_option('display.max_columns'))

Current maximum number of rows:  500
Current maximum number of columns:  500
Updated maximum number of rows:  500
Updated maximum number of columns:  500


# 2024

In [7]:
# Load all datasets
defense_data = pd.read_csv('FBRef Data/2024/Defense_2024.csv')
goal_creating_actions_data = pd.read_csv('FBRef Data/2024/Goal_Creating_Actions_2024.csv')
miscellaneous_data = pd.read_csv('FBRef Data/2024/Miscellaneous_2024.csv')
passing_data = pd.read_csv('FBRef Data/2024/Passing_2024.csv')
passing_type_data = pd.read_csv('FBRef Data/2024/Passing_Type_2024.csv')
players_standard_data = pd.read_csv('FBRef Data/2024/Players_Standard_2024.csv')
possession_data = pd.read_csv('FBRef Data/2024/Possession_2024.csv')
shooting_data = pd.read_csv('FBRef Data/2024/Shooting_2024.csv')

# Fill missing values with 0 for all datasets
datasets = [defense_data, goal_creating_actions_data, miscellaneous_data, passing_data,
            passing_type_data, players_standard_data, possession_data, shooting_data]

datasets = [df.fillna(0,inplace=True) for df in datasets]


In [8]:
defense_data=defense_data[defense_data['90s']>=10]
defense_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Tackles,Tackles_Won,Tackles_Def_3rd,Tackles_Mid_3rd,Tackles_Att_3rd,Dribblers_Tackled,Dribblers_Challenged,Dribblers_Tackle_W%,Dribblers_Tackle_Lost,Blocks,Shots_Blocked,Passes_Blocked,Interceptions,Tackles+Interceptions,Clearances,Errors_Shots,Matches
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,29.0,19,20.0,7.0,2.0,20.0,34.0,58.8,14.0,9.0,5.0,4.0,8,37.0,27.0,0.0,Matches
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,32.0,18,13.0,13.0,6.0,16.0,32.0,50.0,16.0,26.0,1.0,25.0,2,34.0,4.0,0.0,Matches
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,64.0,35,36.0,23.0,5.0,26.0,45.0,57.8,19.0,51.0,32.0,19.0,39,103.0,109.0,2.0,Matches
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,21.0,14,8.0,10.0,3.0,8.0,18.0,44.4,10.0,12.0,1.0,11.0,12,33.0,18.0,0.0,Matches
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,85.0,52,43.0,34.0,8.0,38.0,96.0,39.6,58.0,29.0,6.0,23.0,61,146.0,61.0,3.0,Matches


In [9]:
defense_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Tackles', 'Tackles_Won', 'Tackles_Def_3rd', 'Tackles_Mid_3rd',
       'Tackles_Att_3rd', 'Dribblers_Tackled', 'Dribblers_Challenged',
       'Dribblers_Tackle_W%', 'Dribblers_Tackle_Lost', 'Blocks',
       'Shots_Blocked', 'Passes_Blocked', 'Interceptions',
       'Tackles+Interceptions', 'Clearances', 'Errors_Shots', 'Matches'],
      dtype='object')

In [10]:
import pandas as pd

# Function to process defense_data
def process_defense_data(df):
    columns_to_process = ['Tackles', 'Tackles_Won', 'Tackles_Def_3rd', 'Tackles_Mid_3rd',
                          'Tackles_Att_3rd', 'Dribblers_Tackled', 'Dribblers_Challenged',
                          'Dribblers_Tackle_Lost', 'Blocks', 'Shots_Blocked', 'Passes_Blocked_Def',
                          'Interceptions', 'Tackles+Interceptions', 'Clearances', 'Errors_Shots']
    df = df.rename(columns={'Passes_Blocked': 'Passes_Blocked_Def'})
    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches'])

    return df



# Apply the processing function to defense_data
processed_defense_data = process_defense_data(defense_data)


processed_defense_data.head()


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,58.8,2.12,1.39,1.46,0.51,0.15,1.46,2.48,1.02,0.66,0.36,0.29,0.58,2.7,1.97,0.0
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,50.0,2.27,1.28,0.92,0.92,0.43,1.13,2.27,1.13,1.84,0.07,1.77,0.14,2.41,0.28,0.0
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,57.8,2.07,1.13,1.17,0.74,0.16,0.84,1.46,0.61,1.65,1.04,0.61,1.26,3.33,3.53,0.06
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,44.4,1.24,0.83,0.47,0.59,0.18,0.47,1.07,0.59,0.71,0.06,0.65,0.71,1.95,1.07,0.0
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,39.6,2.67,1.64,1.35,1.07,0.25,1.19,3.02,1.82,0.91,0.19,0.72,1.92,4.59,1.92,0.09


In [11]:
goal_creating_actions_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Shot_Creating_Action', 'Shot_Creating_Action_per90',
       'Pass_Live_Shot', 'Pass_Dead_Shot', 'Take_Ons_Shot', 'Shot-Shot',
       'Fouls_drawn_Shot', 'Defensive_Shot', 'Goal_Creating_Action',
       'Goal_Creating_Action_90', 'Pass_Live_Goal', 'Pass_Dead_Goal',
       'Take_Ons_Goal', 'Shot_Goal', 'Fouls_Drawn_Goal', 'Defensive_Goal',
       'Matches'],
      dtype='object')

In [12]:
goal_creating_actions_data=goal_creating_actions_data[goal_creating_actions_data['90s']>=10]
goal_creating_actions_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Shot_Creating_Action,Shot_Creating_Action_per90,Pass_Live_Shot,Pass_Dead_Shot,Take_Ons_Shot,Shot-Shot,Fouls_drawn_Shot,Defensive_Shot,Goal_Creating_Action,Goal_Creating_Action_90,Pass_Live_Goal,Pass_Dead_Goal,Take_Ons_Goal,Shot_Goal,Fouls_Drawn_Goal,Defensive_Goal,Matches
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,23.0,1.68,16.0,4.0,0.0,0.0,3.0,0.0,2.0,0.15,2.0,0.0,0.0,0.0,0.0,0.0,Matches
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,53.0,3.76,41.0,1.0,8.0,3.0,0.0,0.0,8.0,0.57,6.0,0.0,2.0,0.0,0.0,0.0,Matches
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,24.0,0.78,19.0,1.0,0.0,3.0,1.0,0.0,1.0,0.03,0.0,0.0,0.0,1.0,0.0,0.0,Matches
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,27.0,1.6,27.0,0.0,0.0,0.0,0.0,0.0,3.0,0.18,3.0,0.0,0.0,0.0,0.0,0.0,Matches
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,81.0,2.55,67.0,3.0,2.0,1.0,2.0,6.0,5.0,0.16,3.0,0.0,1.0,0.0,0.0,1.0,Matches


In [13]:
import pandas as pd

# Function to process defense_data
def process_goal_data(df):
    columns_to_process = ['Pass_Live_Shot', 'Pass_Dead_Shot', 'Take_Ons_Shot', 'Shot-Shot',
       'Fouls_drawn_Shot', 'Defensive_Shot', 'Pass_Live_Goal', 'Pass_Dead_Goal',
       'Take_Ons_Goal', 'Shot_Goal', 'Fouls_Drawn_Goal', 'Defensive_Goal']

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Shot_Creating_Action','Goal_Creating_Action','Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_goal_creating_actions_data = process_goal_data(goal_creating_actions_data)


processed_goal_creating_actions_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,1.68,0.15,1.17,0.29,0.0,0.0,0.22,0.0,0.15,0.0,0.0,0.0,0.0,0.0
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,3.76,0.57,2.91,0.07,0.57,0.21,0.0,0.0,0.43,0.0,0.14,0.0,0.0,0.0
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,0.78,0.03,0.61,0.03,0.0,0.1,0.03,0.0,0.0,0.0,0.0,0.03,0.0,0.0
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,1.6,0.18,1.6,0.0,0.0,0.0,0.0,0.0,0.18,0.0,0.0,0.0,0.0,0.0
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,2.55,0.16,2.11,0.09,0.06,0.03,0.06,0.19,0.09,0.0,0.03,0.0,0.0,0.03


In [14]:
miscellaneous_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Competition_Name',
       'Age', 'Born', 'Matches_Played', 'Yellow_Cards', 'Red_Cards',
       'Second_Yellow_Card', 'Fouls_Committed', 'Fouls_Drawn', 'Offsides',
       'Crosses', 'Interceptions', 'Tackles_Won', 'Penalty_Kicks_Won',
       'Penalty_Kicks_Conceded', 'Own_Goals', 'Ball_Recoveries', 'Aerials_Won',
       'Aerials_Lost', 'Percentage_of_Aerials_Won', 'Matches'],
      dtype='object')

In [15]:
passing_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Passes_Total_Cmp', 'Passes_Total_Att', 'Passes_Total_Cmp%',
       'Passes_TotDist', 'Passes_PrgDist', 'Passes_Short_Cmp',
       'Passes_Short_Att', 'Passes_Short_Cmp%', 'Passes_Medium_Cmp',
       'Passes_Medium_Att', 'Passes_Medium_Cmp%', 'Passes_Long_Cmp',
       'Passes_Long_Att', 'Passes_Long_Cmp%', 'Assists', 'xAG', 'xA', 'A-xAG',
       'Key_Passes', 'Passes_1/3', 'Passes_Penalty_Area',
       'Crosses_Penalty_Area', 'Progressive_Passes', 'Matches'],
      dtype='object')

In [16]:
passing_data=passing_data[passing_data['90s']>=10]
passing_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Passes_Total_Cmp,Passes_Total_Att,Passes_Total_Cmp%,Passes_TotDist,Passes_PrgDist,Passes_Short_Cmp,Passes_Short_Att,Passes_Short_Cmp%,Passes_Medium_Cmp,Passes_Medium_Att,Passes_Medium_Cmp%,Passes_Long_Cmp,Passes_Long_Att,Passes_Long_Cmp%,Assists,xAG,xA,A-xAG,Key_Passes,Passes_1/3,Passes_Penalty_Area,Crosses_Penalty_Area,Progressive_Passes,Matches
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,450.0,581.0,77.5,7402.0,2789.0,220.0,248.0,88.7,188.0,235.0,80.0,34.0,63.0,54.0,1,0.8,0.9,0.2,7.0,25.0,13.0,2.0,43.0,Matches
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,365.0,472.0,77.3,4890.0,1506.0,206.0,240.0,85.8,105.0,130.0,80.8,19.0,32.0,59.4,2,1.9,2.0,0.1,22.0,30.0,14.0,3.0,56.0,Matches
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,1552.0,1836.0,84.5,29618.0,9672.0,487.0,548.0,88.9,893.0,976.0,91.5,141.0,252.0,56.0,0,0.3,0.6,-0.3,8.0,129.0,3.0,0.0,137.0,Matches
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,796.0,895.0,88.9,12470.0,3008.0,393.0,433.0,90.8,330.0,360.0,91.7,41.0,54.0,75.9,0,0.5,1.1,-0.5,6.0,87.0,5.0,2.0,78.0,Matches
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,1551.0,1836.0,84.5,27382.0,8613.0,629.0,707.0,89.0,711.0,802.0,88.7,150.0,233.0,64.4,1,2.2,1.9,-1.2,30.0,188.0,23.0,3.0,194.0,Matches


In [17]:
import pandas as pd

# Function to process defense_data
def process_pass_data(df):
    columns_to_process = ['Passes_Total_Cmp', 'Passes_Total_Att', 'Passes_TotDist',
        'Passes_PrgDist', 'Passes_Short_Cmp','Passes_Short_Att', 'Passes_Medium_Cmp',
       'Passes_Medium_Att', 'Passes_Long_Cmp','Passes_Long_Att',  'Assists', 'xAG', 'xA', 'A-xAG',
       'Key_Passes', 'Passes_1/3', 'Passes_Penalty_Area',
       'Crosses_Penalty_Area', 'Progressive_Passes']

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_passing_data = process_pass_data(passing_data)


processed_passing_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,77.5,88.7,80.0,54.0,32.85,42.41,540.29,203.58,16.06,18.1,13.72,17.15,2.48,4.6,0.07,0.06,0.07,0.01,0.51,1.82,0.95,0.15,3.14
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,77.3,85.8,80.8,59.4,25.89,33.48,346.81,106.81,14.61,17.02,7.45,9.22,1.35,2.27,0.14,0.13,0.14,0.01,1.56,2.13,0.99,0.21,3.97
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,84.5,88.9,91.5,56.0,50.23,59.42,958.51,313.01,15.76,17.73,28.9,31.59,4.56,8.16,0.0,0.01,0.02,-0.01,0.26,4.17,0.1,0.0,4.43
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,88.9,90.8,91.7,75.9,47.1,52.96,737.87,177.99,23.25,25.62,19.53,21.3,2.43,3.2,0.0,0.03,0.07,-0.03,0.36,5.15,0.3,0.12,4.62
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,84.5,89.0,88.7,64.4,48.77,57.74,861.07,270.85,19.78,22.23,22.36,25.22,4.72,7.33,0.03,0.07,0.06,-0.04,0.94,5.91,0.72,0.09,6.1


In [18]:
passing_type_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Passes_Attempted', 'Live_Ball_Passes', 'Dead_Ball_Passes',
       'Free_Kick_Passes', 'Through_Balls', 'Switches', 'Crosses',
       'Throw_Ins_Taken', 'Corner_Kicks', 'In_Corner_Kicks',
       'Out_Corner_Kicks', 'Str_Corner_Kicks', 'Passes_Cmp', 'Passes_Off',
       'Passes_Blocked', 'Matches'],
      dtype='object')

In [19]:
passing_type_data=passing_type_data[passing_type_data['90s']>=10]
passing_type_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Passes_Attempted,Live_Ball_Passes,Dead_Ball_Passes,Free_Kick_Passes,Through_Balls,Switches,Crosses,Throw_Ins_Taken,Corner_Kicks,In_Corner_Kicks,Out_Corner_Kicks,Str_Corner_Kicks,Passes_Cmp,Passes_Off,Passes_Blocked,Matches
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,581.0,453.0,127.0,11.0,2.0,3.0,13,116.0,0.0,0.0,0.0,0.0,450.0,1.0,23.0,Matches
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,472.0,439.0,29.0,3.0,5.0,1.0,22,12.0,6.0,2.0,3.0,0.0,365.0,4.0,21.0,Matches
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,1836.0,1650.0,178.0,75.0,2.0,13.0,3,29.0,0.0,0.0,0.0,0.0,1552.0,8.0,20.0,Matches
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,895.0,875.0,18.0,17.0,0.0,1.0,3,1.0,0.0,0.0,0.0,0.0,796.0,2.0,17.0,Matches
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,1836.0,1743.0,88.0,68.0,6.0,13.0,34,14.0,0.0,0.0,0.0,0.0,1551.0,5.0,25.0,Matches


In [20]:
import pandas as pd

# Function to process defense_data
def process_passing_data(df):
    columns_to_process = ['Passes_Attempted', 'Live_Ball_Passes', 'Dead_Ball_Passes',
       'Free_Kick_Passes', 'Through_Balls', 'Switches', 'Crosses',
       'Throw_Ins_Taken', 'Corner_Kicks', 'In_Corner_Kicks',
       'Out_Corner_Kicks', 'Str_Corner_Kicks', 'Passes_Cmp', 'Passes_Offside',
       'Passes_Blocked_Off']
    df = df.rename(columns={'Passes_Blocked': 'Passes_Blocked_Off','Passes_Off':'Passes_Offside'})
    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_passing_type_data = process_passing_data(passing_type_data)


processed_passing_type_data.head()


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,42.41,33.07,9.27,0.8,0.15,0.22,0.95,8.47,0.0,0.0,0.0,0.0,32.85,0.07,1.68
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,33.48,31.13,2.06,0.21,0.35,0.07,1.56,0.85,0.43,0.14,0.21,0.0,25.89,0.28,1.49
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,59.42,53.4,5.76,2.43,0.06,0.42,0.1,0.94,0.0,0.0,0.0,0.0,50.23,0.26,0.65
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,52.96,51.78,1.07,1.01,0.0,0.06,0.18,0.06,0.0,0.0,0.0,0.0,47.1,0.12,1.01
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,57.74,54.81,2.77,2.14,0.19,0.41,1.07,0.44,0.0,0.0,0.0,0.0,48.77,0.16,0.79


In [21]:
players_standard_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       'MP', 'Starts', 'Min', '90s', 'Goals', 'Assists', 'G+A', 'G-PK', 'PK',
       'PK_Attempted', 'Yellow', 'Red', 'xG', 'npxG', 'xAG', 'npxG+xAG',
       'Prg_Carries', 'Prg_Passes', 'Prg_Passes_Received', 'Goals_per90',
       'Assits_per90', 'G+A_per90', 'G-PK_per90', 'G+A-PK_per90', 'xG_per90',
       'xAG_per90', 'xG+xAG_per90', 'npxG_per90', 'npxG+xAG_per90', 'Matches'],
      dtype='object')

In [22]:
players_standard_data=players_standard_data[players_standard_data['90s']>=10]
players_standard_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,MP,Starts,Min,90s,Goals,Assists,G+A,G-PK,PK,PK_Attempted,Yellow,Red,xG,npxG,xAG,npxG+xAG,Prg_Carries,Prg_Passes,Prg_Passes_Received,Goals_per90,Assits_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Matches
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,20,13,1237,13.7,0,1,1,0,0,0,1,0,0.0,0.0,0.8,0.9,22.0,43.0,26.0,0.0,0.07,0.07,0.0,0.07,0.0,0.06,0.06,0.0,0.06,Matches
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,30,14,1267,14.1,2,2,4,2,0,0,3,1,2.0,2.0,1.9,3.8,37.0,56.0,91.0,0.14,0.14,0.28,0.14,0.28,0.14,0.13,0.27,0.14,0.27,Matches
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,31,31,2781,30.9,4,0,4,3,1,1,5,0,3.4,2.6,0.3,2.9,36.0,137.0,9.0,0.13,0.0,0.13,0.1,0.1,0.11,0.01,0.12,0.09,0.09,Matches
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,27,17,1519,16.9,0,0,0,0,0,0,2,0,0.8,0.8,0.5,1.3,9.0,78.0,20.0,0.0,0.0,0.0,0.0,0.0,0.05,0.03,0.08,0.05,0.08,Matches
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,33,32,2860,31.8,2,1,3,2,0,0,4,0,1.1,1.1,2.2,3.3,38.0,194.0,51.0,0.06,0.03,0.09,0.06,0.09,0.04,0.07,0.1,0.04,0.1,Matches


In [23]:
# Function to process defense_data
def process_standard_data(df):
    columns_to_process = [ 'Yellow', 'Red',
       'Prg_Carries', 'Prg_Passes', 'Prg_Passes_Received',]

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Goals', 'G+A', 'G-PK',  'xG', 'npxG', 'xAG', 'npxG+xAG','Assits_per90','Assists','Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_players_standard_data = process_standard_data(players_standard_data)


processed_players_standard_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,MP,Starts,Min,90s,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,20,13,1237,13.7,0,0,0.0,0.07,0.0,0.07,0.0,0.06,0.06,0.0,0.06,0.07,0.0,1.61,3.14,1.9
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,30,14,1267,14.1,0,0,0.14,0.28,0.14,0.28,0.14,0.13,0.27,0.14,0.27,0.21,0.07,2.62,3.97,6.45
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,31,31,2781,30.9,1,1,0.13,0.13,0.1,0.1,0.11,0.01,0.12,0.09,0.09,0.16,0.0,1.17,4.43,0.29
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,27,17,1519,16.9,0,0,0.0,0.0,0.0,0.0,0.05,0.03,0.08,0.05,0.08,0.12,0.0,0.53,4.62,1.18
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,33,32,2860,31.8,0,0,0.06,0.09,0.06,0.09,0.04,0.07,0.1,0.04,0.1,0.13,0.0,1.19,6.1,1.6


In [24]:
possession_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Touches', 'Touches_Def_Pen', 'Touches_Def_3rd',
       'Touches_Mid_3rd', 'Touches_Att_3rd', 'Touches_Att_Pen',
       'Tocuhes_Live_Balls', 'Take_Ons_Attempted', 'Take_Ons_Succ',
       'Take_Ons_Succ%', 'Tackled_Take_Ons', 'Tackled_Take_Ons%', 'Carries',
       'Total_Distance', 'Progressive_Distance_Carried', 'Progressive_Carries',
       '1/3_Carries', 'Carries_Penalty_Area', 'Miscontrols', 'Dispossessed',
       'Passes_Received', 'Progressive_Passes_Received', 'Matches'],
      dtype='object')

In [25]:
possession_data=possession_data[possession_data['90s']>=10]
possession_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Touches,Touches_Def_Pen,Touches_Def_3rd,Touches_Mid_3rd,Touches_Att_3rd,Touches_Att_Pen,Tocuhes_Live_Balls,Take_Ons_Attempted,Take_Ons_Succ,Take_Ons_Succ%,Tackled_Take_Ons,Tackled_Take_Ons%,Carries,Total_Distance,Progressive_Distance_Carried,Progressive_Carries,1/3_Carries,Carries_Penalty_Area,Miscontrols,Dispossessed,Passes_Received,Progressive_Passes_Received,Matches
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,711.0,43.0,252.0,303.0,165.0,11.0,711.0,34.0,14.0,41.2,12.0,35.3,364.0,2174.0,1121.0,22.0,12.0,7.0,13.0,8.0,371.0,26.0,Matches
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,675.0,11.0,108.0,301.0,293.0,47.0,675.0,77.0,34.0,44.2,41.0,53.2,406.0,2721.0,1387.0,37.0,29.0,9.0,41.0,38.0,457.0,91.0,Matches
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,2185.0,293.0,976.0,1119.0,114.0,35.0,2184.0,15.0,8.0,53.3,7.0,46.7,1506.0,8663.0,4921.0,36.0,19.0,0.0,23.0,4.0,1403.0,9.0,Matches
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,1022.0,21.0,168.0,647.0,218.0,7.0,1022.0,19.0,7.0,36.8,11.0,57.9,823.0,4361.0,1683.0,9.0,33.0,0.0,23.0,11.0,780.0,20.0,Matches
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,2164.0,105.0,553.0,1288.0,349.0,10.0,2164.0,65.0,44.0,67.7,18.0,27.7,1649.0,8707.0,3892.0,38.0,49.0,3.0,32.0,30.0,1502.0,51.0,Matches


In [26]:
import pandas as pd

# Function to process defense_data
def process_possession_data(df):
    columns_to_process = [ 'Touches', 'Touches_Def_Pen', 'Touches_Def_3rd',
       'Touches_Mid_3rd', 'Touches_Att_3rd', 'Touches_Att_Pen',
       'Tocuhes_Live_Balls', 'Take_Ons_Attempted', 'Take_Ons_Succ',
       'Tackled_Take_Ons', 'Carries','Total_Distance', 'Progressive_Distance_Carried',
        'Progressive_Carries','1/3_Carries', 'Carries_Penalty_Area', 'Miscontrols',
        'Dispossessed','Passes_Received', 'Progressive_Passes_Received']

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_possession_data= process_possession_data(possession_data)


processed_possession_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,41.2,35.3,51.9,3.14,18.39,22.12,12.04,0.8,51.9,2.48,1.02,0.88,26.57,158.69,81.82,1.61,0.88,0.51,0.95,0.58,27.08,1.9
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,44.2,53.2,47.87,0.78,7.66,21.35,20.78,3.33,47.87,5.46,2.41,2.91,28.79,192.98,98.37,2.62,2.06,0.64,2.91,2.7,32.41,6.45
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,53.3,46.7,70.71,9.48,31.59,36.21,3.69,1.13,70.68,0.49,0.26,0.23,48.74,280.36,159.26,1.17,0.61,0.0,0.74,0.13,45.4,0.29
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,36.8,57.9,60.47,1.24,9.94,38.28,12.9,0.41,60.47,1.12,0.41,0.65,48.7,258.05,99.59,0.53,1.95,0.0,1.36,0.65,46.15,1.18
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,67.7,27.7,68.05,3.3,17.39,40.5,10.97,0.31,68.05,2.04,1.38,0.57,51.86,273.81,122.39,1.19,1.54,0.09,1.01,0.94,47.23,1.6


In [27]:
shooting_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Position', 'Squad', 'Competition',
       'Age', 'Born', '90s', 'Goals', 'Shots_total', 'Shots_on_target',
       'Shots_on_target_%', 'Shots_total_per90', 'Shots_on_target_per90',
       'Goals_per_shot', 'Goals_per_shot_on_target', 'Average_shot_distance',
       'Shots_free_kicks', 'Pens_Scored', 'Pens_Attempted', 'XG', 'Npxg',
       'Npxg_per_shot', 'Xg_net', 'Npxg_net', 'Matches'],
      dtype='object')

In [28]:
shooting_data=shooting_data[shooting_data['90s']>=10]
shooting_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Position,Squad,Competition,Age,Born,90s,Goals,Shots_total,Shots_on_target,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net,Matches
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,0,2,0,0.0,0.15,0.0,0.0,0.0,23.9,0.0,0,0,0.0,0.0,0.02,0.0,0.0,Matches
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,2,18,7,38.9,1.28,0.5,0.11,0.29,18.4,0.0,0,0,2.0,2.0,0.11,0.0,0.0,Matches
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,4,21,7,33.3,0.68,0.23,0.14,0.43,15.0,0.0,1,1,3.4,2.6,0.13,0.6,0.4,Matches
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,0,5,0,0.0,0.3,0.0,0.0,0.0,21.6,0.0,0,0,0.8,0.8,0.17,-0.8,-0.8,Matches
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,2,26,11,42.3,0.82,0.35,0.08,0.18,26.5,0.0,0,0,1.1,1.1,0.04,0.9,0.9,Matches


In [29]:
import pandas as pd


# Drop specified columns
processed_shooting_data = shooting_data.drop(columns=['Goals', 'Shots_total', 'Shots_on_target', 'Matches'])

# Fill missing values with 0
processed_shooting_data.fillna(0, inplace=True)

# Rename columns for consistency
processed_shooting_data.rename(columns={'Position': 'Pos', 'Competition': 'Comp'}, inplace=True)

# Display the processed shooting_data
processed_shooting_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,0.0,0.15,0.0,0.0,0.0,23.9,0.0,0,0,0.0,0.0,0.02,0.0,0.0
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,38.9,1.28,0.5,0.11,0.29,18.4,0.0,0,0,2.0,2.0,0.11,0.0,0.0
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,33.3,0.68,0.23,0.14,0.43,15.0,0.0,1,1,3.4,2.6,0.13,0.6,0.4
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,0.0,0.3,0.0,0.0,0.0,21.6,0.0,0,0,0.8,0.8,0.17,-0.8,-0.8
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,42.3,0.82,0.35,0.08,0.18,26.5,0.0,0,0,1.1,1.1,0.04,0.9,0.9


In [30]:
processed_passing_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,77.5,88.7,80.0,54.0,32.85,42.41,540.29,203.58,16.06,18.1,13.72,17.15,2.48,4.6,0.07,0.06,0.07,0.01,0.51,1.82,0.95,0.15,3.14
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,77.3,85.8,80.8,59.4,25.89,33.48,346.81,106.81,14.61,17.02,7.45,9.22,1.35,2.27,0.14,0.13,0.14,0.01,1.56,2.13,0.99,0.21,3.97
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,84.5,88.9,91.5,56.0,50.23,59.42,958.51,313.01,15.76,17.73,28.9,31.59,4.56,8.16,0.0,0.01,0.02,-0.01,0.26,4.17,0.1,0.0,4.43
5,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,88.9,90.8,91.7,75.9,47.1,52.96,737.87,177.99,23.25,25.62,19.53,21.3,2.43,3.2,0.0,0.03,0.07,-0.03,0.36,5.15,0.3,0.12,4.62
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,84.5,89.0,88.7,64.4,48.77,57.74,861.07,270.85,19.78,22.23,22.36,25.22,4.72,7.33,0.03,0.07,0.06,-0.04,0.94,5.91,0.72,0.09,6.1


In [31]:
# Drop duplicate columns from the processed datasets
datasets_to_drop_duplicates = [processed_goal_creating_actions_data, processed_passing_data, processed_passing_type_data,
                               processed_players_standard_data, processed_possession_data, processed_shooting_data]

for df in datasets_to_drop_duplicates:
    df.drop(columns=['Unnamed: 0', 'Squad'], errors='ignore', inplace=True)

# Combine the preprocessed datasets
combined_data_2024 = pd.merge(processed_defense_data, processed_goal_creating_actions_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')

combined_data_2024 = pd.merge(combined_data_2024, processed_passing_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')

combined_data_2024 = pd.merge(combined_data_2024, processed_passing_type_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')

combined_data_2024 = pd.merge(combined_data_2024, processed_players_standard_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')

combined_data_2024 = pd.merge(combined_data_2024, processed_possession_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')

combined_data_2024 = pd.merge(combined_data_2024, processed_shooting_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2024 = combined_data_2024.drop_duplicates(subset=['Player', 'Nation', 'Pos','Squad' ,'Age', 'Comp', 'Born', '90s'])

# Fill any remaining missing values with 0
combined_data_2024.fillna(0, inplace=True)
# Save the combined data for 2023 to a new CSV file

combined_data_2024.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,MP,Starts,Min,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
0,0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,58.8,2.12,1.39,1.46,0.51,0.15,1.46,2.48,1.02,0.66,0.36,0.29,0.58,2.7,1.97,0.0,1.68,0.15,1.17,0.29,0.0,0.0,0.22,0.0,0.15,0.0,0.0,0.0,0.0,0.0,77.5,88.7,80.0,54.0,32.85,42.41,540.29,203.58,16.06,18.1,13.72,17.15,2.48,4.6,0.07,0.06,0.07,0.01,0.51,1.82,0.95,0.15,3.14,42.41,33.07,9.27,0.8,0.15,0.22,0.95,8.47,0.0,0.0,0.0,0.0,32.85,0.07,1.68,20,13,1237,0,0,0.0,0.07,0.0,0.07,0.0,0.06,0.06,0.0,0.06,0.07,0.0,1.61,3.14,1.9,41.2,35.3,51.9,3.14,18.39,22.12,12.04,0.8,51.9,2.48,1.02,0.88,26.57,158.69,81.82,1.61,0.88,0.51,0.95,0.58,27.08,1.9,0.0,0.15,0.0,0.0,0.0,23.9,0.0,0,0,0.0,0.0,0.02,0.0,0.0
1,1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,50.0,2.27,1.28,0.92,0.92,0.43,1.13,2.27,1.13,1.84,0.07,1.77,0.14,2.41,0.28,0.0,3.76,0.57,2.91,0.07,0.57,0.21,0.0,0.0,0.43,0.0,0.14,0.0,0.0,0.0,77.3,85.8,80.8,59.4,25.89,33.48,346.81,106.81,14.61,17.02,7.45,9.22,1.35,2.27,0.14,0.13,0.14,0.01,1.56,2.13,0.99,0.21,3.97,33.48,31.13,2.06,0.21,0.35,0.07,1.56,0.85,0.43,0.14,0.21,0.0,25.89,0.28,1.49,30,14,1267,0,0,0.14,0.28,0.14,0.28,0.14,0.13,0.27,0.14,0.27,0.21,0.07,2.62,3.97,6.45,44.2,53.2,47.87,0.78,7.66,21.35,20.78,3.33,47.87,5.46,2.41,2.91,28.79,192.98,98.37,2.62,2.06,0.64,2.91,2.7,32.41,6.45,38.9,1.28,0.5,0.11,0.29,18.4,0.0,0,0,2.0,2.0,0.11,0.0,0.0
2,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,57.8,2.07,1.13,1.17,0.74,0.16,0.84,1.46,0.61,1.65,1.04,0.61,1.26,3.33,3.53,0.06,0.78,0.03,0.61,0.03,0.0,0.1,0.03,0.0,0.0,0.0,0.0,0.03,0.0,0.0,84.5,88.9,91.5,56.0,50.23,59.42,958.51,313.01,15.76,17.73,28.9,31.59,4.56,8.16,0.0,0.01,0.02,-0.01,0.26,4.17,0.1,0.0,4.43,59.42,53.4,5.76,2.43,0.06,0.42,0.1,0.94,0.0,0.0,0.0,0.0,50.23,0.26,0.65,31,31,2781,1,1,0.13,0.13,0.1,0.1,0.11,0.01,0.12,0.09,0.09,0.16,0.0,1.17,4.43,0.29,53.3,46.7,70.71,9.48,31.59,36.21,3.69,1.13,70.68,0.49,0.26,0.23,48.74,280.36,159.26,1.17,0.61,0.0,0.74,0.13,45.4,0.29,33.3,0.68,0.23,0.14,0.43,15.0,0.0,1,1,3.4,2.6,0.13,0.6,0.4
3,5,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,44.4,1.24,0.83,0.47,0.59,0.18,0.47,1.07,0.59,0.71,0.06,0.65,0.71,1.95,1.07,0.0,1.6,0.18,1.6,0.0,0.0,0.0,0.0,0.0,0.18,0.0,0.0,0.0,0.0,0.0,88.9,90.8,91.7,75.9,47.1,52.96,737.87,177.99,23.25,25.62,19.53,21.3,2.43,3.2,0.0,0.03,0.07,-0.03,0.36,5.15,0.3,0.12,4.62,52.96,51.78,1.07,1.01,0.0,0.06,0.18,0.06,0.0,0.0,0.0,0.0,47.1,0.12,1.01,27,17,1519,0,0,0.0,0.0,0.0,0.0,0.05,0.03,0.08,0.05,0.08,0.12,0.0,0.53,4.62,1.18,36.8,57.9,60.47,1.24,9.94,38.28,12.9,0.41,60.47,1.12,0.41,0.65,48.7,258.05,99.59,0.53,1.95,0.0,1.36,0.65,46.15,1.18,0.0,0.3,0.0,0.0,0.0,21.6,0.0,0,0,0.8,0.8,0.17,-0.8,-0.8
4,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,39.6,2.67,1.64,1.35,1.07,0.25,1.19,3.02,1.82,0.91,0.19,0.72,1.92,4.59,1.92,0.09,2.55,0.16,2.11,0.09,0.06,0.03,0.06,0.19,0.09,0.0,0.03,0.0,0.0,0.03,84.5,89.0,88.7,64.4,48.77,57.74,861.07,270.85,19.78,22.23,22.36,25.22,4.72,7.33,0.03,0.07,0.06,-0.04,0.94,5.91,0.72,0.09,6.1,57.74,54.81,2.77,2.14,0.19,0.41,1.07,0.44,0.0,0.0,0.0,0.0,48.77,0.16,0.79,33,32,2860,0,0,0.06,0.09,0.06,0.09,0.04,0.07,0.1,0.04,0.1,0.13,0.0,1.19,6.1,1.6,67.7,27.7,68.05,3.3,17.39,40.5,10.97,0.31,68.05,2.04,1.38,0.57,51.86,273.81,122.39,1.19,1.54,0.09,1.01,0.94,47.23,1.6,42.3,0.82,0.35,0.08,0.18,26.5,0.0,0,0,1.1,1.1,0.04,0.9,0.9


In [32]:
combined_data_2024.drop(columns=['Unnamed: 0'], errors='ignore', inplace=True)
combined_data_2024.to_csv('FBRef Data/2024/Combined_Preprocessed_Data_2024.csv', index=False)
combined_data_2024.head()

Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,MP,Starts,Min,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
0,Max Aarons,eng ENG,DF,Bournemouth,eng Premier League,23.0,2000.0,13.7,58.8,2.12,1.39,1.46,0.51,0.15,1.46,2.48,1.02,0.66,0.36,0.29,0.58,2.7,1.97,0.0,1.68,0.15,1.17,0.29,0.0,0.0,0.22,0.0,0.15,0.0,0.0,0.0,0.0,0.0,77.5,88.7,80.0,54.0,32.85,42.41,540.29,203.58,16.06,18.1,13.72,17.15,2.48,4.6,0.07,0.06,0.07,0.01,0.51,1.82,0.95,0.15,3.14,42.41,33.07,9.27,0.8,0.15,0.22,0.95,8.47,0.0,0.0,0.0,0.0,32.85,0.07,1.68,20,13,1237,0,0,0.0,0.07,0.0,0.07,0.0,0.06,0.06,0.0,0.06,0.07,0.0,1.61,3.14,1.9,41.2,35.3,51.9,3.14,18.39,22.12,12.04,0.8,51.9,2.48,1.02,0.88,26.57,158.69,81.82,1.61,0.88,0.51,0.95,0.58,27.08,1.9,0.0,0.15,0.0,0.0,0.0,23.9,0.0,0,0,0.0,0.0,0.02,0.0,0.0
1,Brenden Aaronson,us USA,"MF,FW",Union Berlin,de Bundesliga,22.0,2000.0,14.1,50.0,2.27,1.28,0.92,0.92,0.43,1.13,2.27,1.13,1.84,0.07,1.77,0.14,2.41,0.28,0.0,3.76,0.57,2.91,0.07,0.57,0.21,0.0,0.0,0.43,0.0,0.14,0.0,0.0,0.0,77.3,85.8,80.8,59.4,25.89,33.48,346.81,106.81,14.61,17.02,7.45,9.22,1.35,2.27,0.14,0.13,0.14,0.01,1.56,2.13,0.99,0.21,3.97,33.48,31.13,2.06,0.21,0.35,0.07,1.56,0.85,0.43,0.14,0.21,0.0,25.89,0.28,1.49,30,14,1267,0,0,0.14,0.28,0.14,0.28,0.14,0.13,0.27,0.14,0.27,0.21,0.07,2.62,3.97,6.45,44.2,53.2,47.87,0.78,7.66,21.35,20.78,3.33,47.87,5.46,2.41,2.91,28.79,192.98,98.37,2.62,2.06,0.64,2.91,2.7,32.41,6.45,38.9,1.28,0.5,0.11,0.29,18.4,0.0,0,0,2.0,2.0,0.11,0.0,0.0
2,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,57.8,2.07,1.13,1.17,0.74,0.16,0.84,1.46,0.61,1.65,1.04,0.61,1.26,3.33,3.53,0.06,0.78,0.03,0.61,0.03,0.0,0.1,0.03,0.0,0.0,0.0,0.0,0.03,0.0,0.0,84.5,88.9,91.5,56.0,50.23,59.42,958.51,313.01,15.76,17.73,28.9,31.59,4.56,8.16,0.0,0.01,0.02,-0.01,0.26,4.17,0.1,0.0,4.43,59.42,53.4,5.76,2.43,0.06,0.42,0.1,0.94,0.0,0.0,0.0,0.0,50.23,0.26,0.65,31,31,2781,1,1,0.13,0.13,0.1,0.1,0.11,0.01,0.12,0.09,0.09,0.16,0.0,1.17,4.43,0.29,53.3,46.7,70.71,9.48,31.59,36.21,3.69,1.13,70.68,0.49,0.26,0.23,48.74,280.36,159.26,1.17,0.61,0.0,0.74,0.13,45.4,0.29,33.3,0.68,0.23,0.14,0.43,15.0,0.0,1,1,3.4,2.6,0.13,0.6,0.4
3,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,44.4,1.24,0.83,0.47,0.59,0.18,0.47,1.07,0.59,0.71,0.06,0.65,0.71,1.95,1.07,0.0,1.6,0.18,1.6,0.0,0.0,0.0,0.0,0.0,0.18,0.0,0.0,0.0,0.0,0.0,88.9,90.8,91.7,75.9,47.1,52.96,737.87,177.99,23.25,25.62,19.53,21.3,2.43,3.2,0.0,0.03,0.07,-0.03,0.36,5.15,0.3,0.12,4.62,52.96,51.78,1.07,1.01,0.0,0.06,0.18,0.06,0.0,0.0,0.0,0.0,47.1,0.12,1.01,27,17,1519,0,0,0.0,0.0,0.0,0.0,0.05,0.03,0.08,0.05,0.08,0.12,0.0,0.53,4.62,1.18,36.8,57.9,60.47,1.24,9.94,38.28,12.9,0.41,60.47,1.12,0.41,0.65,48.7,258.05,99.59,0.53,1.95,0.0,1.36,0.65,46.15,1.18,0.0,0.3,0.0,0.0,0.0,21.6,0.0,0,0,0.8,0.8,0.17,-0.8,-0.8
4,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,39.6,2.67,1.64,1.35,1.07,0.25,1.19,3.02,1.82,0.91,0.19,0.72,1.92,4.59,1.92,0.09,2.55,0.16,2.11,0.09,0.06,0.03,0.06,0.19,0.09,0.0,0.03,0.0,0.0,0.03,84.5,89.0,88.7,64.4,48.77,57.74,861.07,270.85,19.78,22.23,22.36,25.22,4.72,7.33,0.03,0.07,0.06,-0.04,0.94,5.91,0.72,0.09,6.1,57.74,54.81,2.77,2.14,0.19,0.41,1.07,0.44,0.0,0.0,0.0,0.0,48.77,0.16,0.79,33,32,2860,0,0,0.06,0.09,0.06,0.09,0.04,0.07,0.1,0.04,0.1,0.13,0.0,1.19,6.1,1.6,67.7,27.7,68.05,3.3,17.39,40.5,10.97,0.31,68.05,2.04,1.38,0.57,51.86,273.81,122.39,1.19,1.54,0.09,1.01,0.94,47.23,1.6,42.3,0.82,0.35,0.08,0.18,26.5,0.0,0,0,1.1,1.1,0.04,0.9,0.9


In [33]:
combined_data_2024.shape

(1611, 131)

In [34]:
columns=combined_data_2024.columns.tolist()
columns

['Player',
 'Nation',
 'Pos',
 'Squad',
 'Comp',
 'Age',
 'Born',
 '90s',
 'Dribblers_Tackle_W%',
 'Tackles_per90',
 'Tackles_Won_per90',
 'Tackles_Def_3rd_per90',
 'Tackles_Mid_3rd_per90',
 'Tackles_Att_3rd_per90',
 'Dribblers_Tackled_per90',
 'Dribblers_Challenged_per90',
 'Dribblers_Tackle_Lost_per90',
 'Blocks_per90',
 'Shots_Blocked_per90',
 'Passes_Blocked_Def_per90',
 'Interceptions_per90',
 'Tackles+Interceptions_per90',
 'Clearances_per90',
 'Errors_Shots_per90',
 'Shot_Creating_Action_per90',
 'Goal_Creating_Action_90',
 'Pass_Live_Shot_per90',
 'Pass_Dead_Shot_per90',
 'Take_Ons_Shot_per90',
 'Shot-Shot_per90',
 'Fouls_drawn_Shot_per90',
 'Defensive_Shot_per90',
 'Pass_Live_Goal_per90',
 'Pass_Dead_Goal_per90',
 'Take_Ons_Goal_per90',
 'Shot_Goal_per90',
 'Fouls_Drawn_Goal_per90',
 'Defensive_Goal_per90',
 'Passes_Total_Cmp%',
 'Passes_Short_Cmp%',
 'Passes_Medium_Cmp%',
 'Passes_Long_Cmp%',
 'Passes_Total_Cmp_per90',
 'Passes_Total_Att_per90',
 'Passes_TotDist_per90',
 'Pas

# 2023


In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set display options for pandas
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Load all datasets for 2023
defense_data = pd.read_csv('FBRef Data/2023/Defense_2023.csv')
goal_creating_actions_data = pd.read_csv('FBRef Data/2023/Goal_Creating_Actions_2023.csv')
miscellaneous_data = pd.read_csv('FBRef Data/2023/Miscellaneous_2023.csv')
passing_data = pd.read_csv('FBRef Data/2023/Passing_2023.csv')
passing_type_data = pd.read_csv('FBRef Data/2023/Passing_Type_2023.csv')
players_standard_data = pd.read_csv('FBRef Data/2023/Players_Standard_2023.csv')
possession_data = pd.read_csv('FBRef Data/2023/Possession_2023.csv')
shooting_data = pd.read_csv('FBRef Data/2023/Shooting_2023.csv')

# Fill missing values with 0 for all datasets
datasets = [defense_data, goal_creating_actions_data, miscellaneous_data, passing_data,
            passing_type_data, players_standard_data, possession_data, shooting_data]

for df in datasets:
    df.fillna(0, inplace=True)

# Filter out players with less than 10 90s
defense_data = defense_data[defense_data['90s'] >= 10]
goal_creating_actions_data = goal_creating_actions_data[goal_creating_actions_data['90s'] >= 10]
passing_data = passing_data[passing_data['90s'] >= 10]
passing_type_data = passing_type_data[passing_type_data['90s'] >= 10]
players_standard_data = players_standard_data[players_standard_data['90s'] >= 10]
possession_data = possession_data[possession_data['90s'] >= 10]
shooting_data = shooting_data[shooting_data['90s'] >= 10]
defense_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Tackles', 'Tackles_Won', 'Tackles_Def_3rd', 'Tackles_Mid_3rd',
       'Tackles_Att_3rd', 'Dribblers_Tackled', 'Dribblers_Challenged',
       'Dribblers_Tackle_W%', 'Dribblers_Tackle_Lost', 'Blocks',
       'Shots_Blocked', 'Passes_Blocked', 'Interceptions',
       'Tackles+Interceptions', 'Clearances', 'Errors_Shots', 'Matches'],
      dtype='object')

In [36]:
import pandas as pd

# Function to process defense_data
def process_defense_data(df):
    columns_to_process = ['Tackles', 'Tackles_Won', 'Tackles_Def_3rd', 'Tackles_Mid_3rd',
                          'Tackles_Att_3rd', 'Dribblers_Tackled', 'Dribblers_Challenged',
                          'Dribblers_Tackle_Lost', 'Blocks', 'Shots_Blocked', 'Passes_Blocked_Def',
                          'Interceptions', 'Tackles+Interceptions', 'Clearances', 'Errors_Shots']
    df = df.rename(columns={'Passes_Blocked': 'Passes_Blocked_Def'})
    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches'])

    return df



# Apply the processing function to defense_data
processed_defense_data = process_defense_data(defense_data)


processed_defense_data.head()


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90
0,0,Brenden Aaronson,us USA,"MF,FW",Leeds United,eng Premier League,21,2000,26.4,32.6,1.7,0.68,0.61,0.83,0.27,0.57,1.74,1.17,1.63,0.11,1.52,0.19,1.89,0.23,0.04
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,34,1987,37.0,75.0,2.22,1.35,1.27,0.89,0.05,1.05,1.41,0.35,1.76,0.73,1.03,1.73,3.95,3.14,0.03
5,5,Himad Abdelli,dz ALG,"MF,FW",Angers,fr Ligue 1,22,1999,23.7,46.3,2.7,1.6,0.93,1.35,0.42,1.31,2.83,1.52,1.27,0.04,1.22,1.18,3.88,0.63,0.0
6,6,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,22,2000,32.2,55.8,1.43,0.81,0.56,0.71,0.16,0.9,1.61,0.71,1.18,0.28,0.9,1.12,2.55,0.81,0.0
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,29,1993,26.0,42.4,3.54,2.0,2.27,1.15,0.12,1.5,3.54,2.04,1.81,0.31,1.5,1.31,4.85,1.5,0.0


In [37]:
goal_creating_actions_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Shot_Creating_Action', 'Shot_Creating_Action_per90',
       'Pass_Live_Shot', 'Pass_Dead_Shot', 'Take_Ons_Shot', 'Shot-Shot',
       'Fouls_drawn_Shot', 'Defensive_Shot', 'Goal_Creating_Action',
       'Goal_Creating_Action_90', 'Pass_Live_Goal', 'Pass_Dead_Goal',
       'Take_Ons_Goal', 'Shot_Goal', 'Fouls_Drawn_Goal', 'Defensive_Goal',
       'Matches'],
      dtype='object')

In [38]:
import pandas as pd

# Function to process defense_data
def process_goal_data(df):
    columns_to_process = ['Pass_Live_Shot', 'Pass_Dead_Shot', 'Take_Ons_Shot', 'Shot-Shot',
       'Fouls_drawn_Shot', 'Defensive_Shot', 'Pass_Live_Goal', 'Pass_Dead_Goal',
       'Take_Ons_Goal', 'Shot_Goal', 'Fouls_Drawn_Goal', 'Defensive_Goal']

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Shot_Creating_Action','Goal_Creating_Action','Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_goal_creating_actions_data = process_goal_data(goal_creating_actions_data)


processed_goal_creating_actions_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90
0,0,Brenden Aaronson,us USA,"MF,FW",Leeds United,eng Premier League,21,2000,26.4,3.6,0.27,2.16,0.64,0.27,0.19,0.3,0.04,0.11,0.04,0.0,0.04,0.04,0.04
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,34,1987,37.0,1.16,0.19,0.92,0.0,0.05,0.14,0.0,0.05,0.11,0.0,0.05,0.03,0.0,0.0
5,5,Himad Abdelli,dz ALG,"MF,FW",Angers,fr Ligue 1,22,1999,23.7,2.91,0.13,2.49,0.17,0.13,0.04,0.0,0.08,0.13,0.0,0.0,0.0,0.0,0.0
6,6,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,22,2000,32.2,1.55,0.03,1.34,0.0,0.06,0.06,0.06,0.03,0.03,0.0,0.0,0.0,0.0,0.0
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,29,1993,26.0,1.27,0.04,1.04,0.08,0.08,0.04,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0


In [39]:
passing_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Passes_Total_Cmp', 'Passes_Total_Att', 'Passes_Total_Cmp%',
       'Passes_TotDist', 'Passes_PrgDist', 'Passes_Short_Cmp',
       'Passes_Short_Att', 'Passes_Short_Cmp%', 'Passes_Medium_Cmp',
       'Passes_Medium_Att', 'Passes_Medium_Cmp%', 'Passes_Long_Cmp',
       'Passes_Long_Att', 'Passes_Long_Cmp%', 'Assits', 'xAG', 'xA', 'A-xAG',
       'Key_Passes', 'Passes_1/3', 'Passes_Penalty_Area',
       'Crosses_Penalty_Area', 'Progressive_Passes', 'Matches'],
      dtype='object')

In [40]:
import pandas as pd

# Function to process defense_data
def process_pass_data(df):
    columns_to_process = ['Passes_Total_Cmp', 'Passes_Total_Att', 'Passes_TotDist',
        'Passes_PrgDist', 'Passes_Short_Cmp','Passes_Short_Att', 'Passes_Medium_Cmp',
       'Passes_Medium_Att', 'Passes_Long_Cmp','Passes_Long_Att',  'Assists', 'xAG', 'xA', 'A-xAG',
       'Key_Passes', 'Passes_1/3', 'Passes_Penalty_Area',
       'Crosses_Penalty_Area', 'Progressive_Passes']

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches','Assits'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_passing_data = process_pass_data(passing_data)


processed_passing_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,xAG_per90,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90
0,0,Brenden Aaronson,us USA,"MF,FW",Leeds United,eng Premier League,21,2000,26.4,74.3,81.8,76.9,38.5,22.42,30.19,287.01,82.65,13.11,16.02,5.68,7.39,0.95,2.46,0.16,0.1,-0.05,1.74,1.78,0.61,0.15,3.26
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,34,1987,37.0,82.7,85.8,90.1,55.6,45.38,54.89,891.0,362.35,13.24,15.43,27.19,30.19,4.19,7.54,0.03,0.02,0.03,0.35,4.19,0.14,0.0,5.81
5,5,Himad Abdelli,dz ALG,"MF,FW",Angers,fr Ligue 1,22,1999,23.7,83.0,86.8,85.2,72.2,44.01,53.04,771.01,196.16,19.62,22.62,17.76,20.84,5.15,7.13,0.12,0.11,-0.04,1.52,5.23,1.1,0.34,6.37
6,6,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,22,2000,32.2,91.8,93.0,94.8,78.6,56.49,61.52,890.65,218.01,27.86,29.94,23.88,25.19,2.86,3.63,0.05,0.07,-0.02,0.53,5.65,0.53,0.09,5.12
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,29,1993,26.0,88.2,92.7,91.1,69.4,50.31,57.04,857.19,291.5,22.81,24.62,21.62,23.73,4.27,6.15,0.05,0.04,-0.01,0.54,5.54,0.42,0.0,5.5


In [41]:
passing_type_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Passes_Attempted', 'Live_Ball_Passes', 'Dead_Ball_Passes',
       'Free_Kick_Passes', 'Through_Balls', 'Switches', 'Crosses',
       'Throw_Ins_Taken', 'Corner_Kicks', 'In_Corner_Kicks',
       'Out_Corner_Kicks', 'Str_Corner_Kicks', 'Passes_Cmp', 'Passes_Off',
       'Passes_Blocked', 'Matches'],
      dtype='object')

In [42]:
import pandas as pd

# Function to process defense_data
def process_passing_data(df):
    columns_to_process = ['Passes_Attempted', 'Live_Ball_Passes', 'Dead_Ball_Passes',
       'Free_Kick_Passes', 'Through_Balls', 'Switches', 'Crosses',
       'Throw_Ins_Taken', 'Corner_Kicks', 'In_Corner_Kicks',
       'Out_Corner_Kicks', 'Str_Corner_Kicks', 'Passes_Cmp', 'Passes_Offside',
       'Passes_Blocked_Off']
    df = df.rename(columns={'Passes_Blocked': 'Passes_Blocked_Off','Passes_Off':'Passes_Offside'})
    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_passing_type_data = process_passing_data(passing_type_data)


processed_passing_type_data.head()


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90
0,0,Brenden Aaronson,us USA,"MF,FW",Leeds United,eng Premier League,21,2000,26.4,30.19,26.93,3.22,0.98,0.3,0.08,2.73,0.19,1.44,0.64,0.53,0.04,22.42,0.04,1.02
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,34,1987,37.0,54.89,50.86,3.84,3.14,0.0,0.81,0.14,0.27,0.0,0.0,0.0,0.0,45.38,0.19,0.54
5,5,Himad Abdelli,dz ALG,"MF,FW",Angers,fr Ligue 1,22,1999,23.7,53.04,50.34,2.53,0.84,0.34,0.25,1.48,0.68,0.42,0.13,0.04,0.0,44.01,0.17,0.93
6,6,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,22,2000,32.2,61.52,59.84,1.4,1.34,0.16,0.16,0.31,0.06,0.0,0.0,0.0,0.0,56.49,0.28,0.47
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,29,1993,26.0,57.04,55.04,1.88,1.81,0.38,0.31,0.23,0.04,0.04,0.0,0.0,0.0,50.31,0.12,0.54


In [43]:
players_standard_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       'MP', 'Starts', 'Min', '90s', 'Goals', 'Assists', 'G+A', 'G-PK', 'PK',
       'PKatt', 'Yellow', 'Red', 'xG', 'npxG', 'xAG', 'npxG+xAG',
       'Prg_Carries', 'Prg_Passes', 'Prg_Passes_Received', 'Goals_per90',
       'Assits_per90', 'G+A_per90', 'G-PK_per90', 'G+A-PK_per90', 'xG_per90',
       'xAG_per90', 'xG+xAG_per90', 'npxG_per90', 'npxG+xAG_per90', 'Matches'],
      dtype='object')

In [44]:
# Function to process defense_data
def process_standard_data(df):
    columns_to_process = [ 'Yellow', 'Red','Assists',
       'Prg_Carries', 'Prg_Passes', 'Prg_Passes_Received',]

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Goals', 'G+A', 'G-PK',  'xG', 'npxG', 'xAG', 'npxG+xAG','Assits_per90','Matches'])
    df = df.rename(columns={'PKatt': 'PK_Attempted'})
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_players_standard_data = process_standard_data(players_standard_data)


processed_players_standard_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,MP,Starts,Min,90s,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Assists_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90
0,0,Brenden Aaronson,us USA,"MF,FW",Leeds United,eng Premier League,21,2000,36,28,2372,26.4,0,0,0.04,0.15,0.04,0.15,0.15,0.16,0.31,0.15,0.31,0.08,0.0,0.11,1.63,3.26,5.72
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,34,1987,37,37,3330,37.0,0,0,0.03,0.08,0.03,0.08,0.06,0.03,0.09,0.06,0.09,0.16,0.0,0.05,1.08,5.81,0.27
5,5,Himad Abdelli,dz ALG,"MF,FW",Angers,fr Ligue 1,22,1999,30,24,2137,23.7,0,0,0.08,0.17,0.08,0.17,0.06,0.12,0.18,0.06,0.18,0.13,0.0,0.08,2.45,6.37,4.68
6,6,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,22,2000,33,33,2894,32.2,0,0,0.03,0.06,0.03,0.06,0.03,0.05,0.08,0.03,0.08,0.19,0.03,0.03,1.02,5.12,1.37
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,29,1993,29,28,2342,26.0,0,0,0.0,0.04,0.0,0.04,0.02,0.05,0.07,0.02,0.07,0.27,0.04,0.04,0.5,5.5,0.54


In [45]:
possession_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Touches', 'Touches_Def_Pen', 'Touches_Def_3rd',
       'Touches_Mid_3rd', 'Touches_Att_3rd', 'Touches_Att_Pen',
       'Tocuhes_Live_Balls', 'Take_Ons_Attempted', 'Take_Ons_Succ',
       'Take_Ons_Succ%', 'Tackled_Take_Ons', 'Tackled_Take_Ons%', 'Carries',
       'Total_Distance', 'Progressive_Distance_Carried', 'Progressive_Carries',
       '1/3_Carries', 'Carries_Penalty_Area', 'Miscontrols', 'Dispossessed',
       'Passes_Received', 'Progressive_Passes_Received', 'Matches'],
      dtype='object')

In [46]:
import pandas as pd

# Function to process defense_data
def process_possession_data(df):
    columns_to_process = [ 'Touches', 'Touches_Def_Pen', 'Touches_Def_3rd',
       'Touches_Mid_3rd', 'Touches_Att_3rd', 'Touches_Att_Pen',
       'Tocuhes_Live_Balls', 'Take_Ons_Attempted', 'Take_Ons_Succ',
       'Tackled_Take_Ons', 'Carries','Total_Distance', 'Progressive_Distance_Carried',
        'Progressive_Carries','1/3_Carries', 'Carries_Penalty_Area', 'Miscontrols',
        'Dispossessed','Passes_Received', 'Progressive_Passes_Received']

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_possession_data= process_possession_data(possession_data)


processed_possession_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90
0,0,Brenden Aaronson,us USA,"MF,FW",Leeds United,eng Premier League,21,2000,26.4,34.0,48.0,43.3,0.45,4.39,18.75,20.98,2.73,43.3,3.79,1.29,1.82,25.98,138.11,58.03,1.63,1.29,0.49,2.69,3.11,29.05,5.72
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,34,1987,37.0,56.8,37.8,66.46,6.51,29.41,34.46,3.32,1.05,66.46,1.0,0.57,0.38,46.57,293.81,165.49,1.08,0.57,0.08,0.73,0.62,40.51,0.27
5,5,Himad Abdelli,dz ALG,"MF,FW",Angers,fr Ligue 1,22,1999,23.7,56.6,36.8,63.97,1.1,10.0,38.57,17.26,0.97,63.97,3.21,1.81,1.18,49.96,289.49,138.99,2.45,2.53,0.3,1.52,0.84,44.98,4.68
6,6,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,22,2000,32.2,59.2,26.5,69.07,2.05,12.55,46.65,10.56,0.5,69.07,1.52,0.9,0.4,57.42,299.53,107.36,1.02,1.74,0.09,1.21,0.99,52.95,1.37
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,29,1993,26.0,56.1,34.1,68.42,2.92,22.35,41.12,5.81,0.15,68.42,1.58,0.88,0.54,53.23,233.42,99.88,0.5,1.12,0.0,1.12,0.73,48.35,0.54


In [47]:
shooting_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Position', 'Squad', 'Competition',
       'Age', 'Born', '90s', 'Goals', 'Shots_total', 'Shots_on_target',
       'Shots_on_target_%', 'Shots_total_per90', 'Shots_on_target_per90',
       'Goals_per_shot', 'Goals_per_shot_on_target', 'Average_shot_distance',
       'Shots_free_kicks', 'Pens_Scored', 'Pens_Attempted', 'XG', 'Npxg',
       'Npxg_per_shot', 'Xg_net', 'Npxg_net', 'Matches'],
      dtype='object')

In [48]:
import pandas as pd


# Drop specified columns
processed_shooting_data = shooting_data.drop(columns=['Goals', 'Shots_total', 'Shots_on_target', 'Matches'])

# Fill missing values with 0
processed_shooting_data.fillna(0, inplace=True)

# Rename columns for consistency
processed_shooting_data.rename(columns={'Position': 'Pos', 'Competition': 'Comp'}, inplace=True)

# Display the processed shooting_data
processed_shooting_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
0,0,Brenden Aaronson,us USA,"MF,FW",Leeds United,eng Premier League,21,2000,26.4,22.0,1.56,0.34,0.02,0.11,18.4,2.0,0,0,3.9,3.9,0.09,-2.9,-2.9
4,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,34,1987,37.0,9.4,0.86,0.08,0.03,0.33,15.0,0.0,0,0,2.4,2.4,0.07,-1.4,-1.4
5,5,Himad Abdelli,dz ALG,"MF,FW",Angers,fr Ligue 1,22,1999,23.7,29.2,1.01,0.29,0.08,0.29,22.4,2.0,0,0,1.4,1.4,0.06,0.6,0.6
6,6,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,22,2000,32.2,30.8,0.4,0.12,0.08,0.25,19.1,0.0,0,0,0.9,0.9,0.07,0.1,0.1
7,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,29,1993,26.0,15.4,0.5,0.08,0.0,0.0,24.0,0.0,0,0,0.5,0.5,0.04,-0.5,-0.5


In [49]:
# Drop duplicate columns from the processed datasets
datasets_to_drop_duplicates = [processed_goal_creating_actions_data, processed_passing_data, processed_passing_type_data,
                               processed_players_standard_data, processed_possession_data, processed_shooting_data]

for df in datasets_to_drop_duplicates:
    df.drop(columns=['Unnamed: 0', 'Squad'], errors='ignore', inplace=True)

# Combine the preprocessed datasets
combined_data_2023 = pd.merge(processed_defense_data, processed_goal_creating_actions_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2023 = pd.merge(combined_data_2023, processed_passing_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2023 = pd.merge(combined_data_2023, processed_passing_type_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2023 = pd.merge(combined_data_2023, processed_players_standard_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2023 = pd.merge(combined_data_2023, processed_possession_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2023 = pd.merge(combined_data_2023, processed_shooting_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')

combined_data_2023 = combined_data_2023.drop_duplicates(subset=['Player', 'Nation', 'Pos','Squad' ,'Age', 'Comp', 'Born', '90s'])
# Fill any remaining missing values with 0
combined_data_2023.fillna(0, inplace=True)
# Save the combined data for 2023 to a new CSV file
combined_data_2023.to_csv('FBRef Data/2023/Combined_Preprocessed_Data_2023.csv', index=False)
combined_data_2023.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,MP,Starts,Min,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Assists_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
0,0,Brenden Aaronson,us USA,"MF,FW",Leeds United,eng Premier League,21,2000,26.4,32.6,1.7,0.68,0.61,0.83,0.27,0.57,1.74,1.17,1.63,0.11,1.52,0.19,1.89,0.23,0.04,3.6,0.27,2.16,0.64,0.27,0.19,0.3,0.04,0.11,0.04,0.0,0.04,0.04,0.04,74.3,81.8,76.9,38.5,22.42,30.19,287.01,82.65,13.11,16.02,5.68,7.39,0.95,2.46,0.16,0.1,-0.05,1.74,1.78,0.61,0.15,3.26,30.19,26.93,3.22,0.98,0.3,0.08,2.73,0.19,1.44,0.64,0.53,0.04,22.42,0.04,1.02,36,28,2372,0,0,0.04,0.15,0.04,0.15,0.15,0.16,0.31,0.15,0.31,0.08,0.0,0.11,1.63,3.26,5.72,34.0,48.0,43.3,0.45,4.39,18.75,20.98,2.73,43.3,3.79,1.29,1.82,25.98,138.11,58.03,1.63,1.29,0.49,2.69,3.11,29.05,5.72,22.0,1.56,0.34,0.02,0.11,18.4,2.0,0,0,3.9,3.9,0.09,-2.9,-2.9
1,4,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,34,1987,37.0,75.0,2.22,1.35,1.27,0.89,0.05,1.05,1.41,0.35,1.76,0.73,1.03,1.73,3.95,3.14,0.03,1.16,0.19,0.92,0.0,0.05,0.14,0.0,0.05,0.11,0.0,0.05,0.03,0.0,0.0,82.7,85.8,90.1,55.6,45.38,54.89,891.0,362.35,13.24,15.43,27.19,30.19,4.19,7.54,0.03,0.02,0.03,0.35,4.19,0.14,0.0,5.81,54.89,50.86,3.84,3.14,0.0,0.81,0.14,0.27,0.0,0.0,0.0,0.0,45.38,0.19,0.54,37,37,3330,0,0,0.03,0.08,0.03,0.08,0.06,0.03,0.09,0.06,0.09,0.16,0.0,0.05,1.08,5.81,0.27,56.8,37.8,66.46,6.51,29.41,34.46,3.32,1.05,66.46,1.0,0.57,0.38,46.57,293.81,165.49,1.08,0.57,0.08,0.73,0.62,40.51,0.27,9.4,0.86,0.08,0.03,0.33,15.0,0.0,0,0,2.4,2.4,0.07,-1.4,-1.4
2,5,Himad Abdelli,dz ALG,"MF,FW",Angers,fr Ligue 1,22,1999,23.7,46.3,2.7,1.6,0.93,1.35,0.42,1.31,2.83,1.52,1.27,0.04,1.22,1.18,3.88,0.63,0.0,2.91,0.13,2.49,0.17,0.13,0.04,0.0,0.08,0.13,0.0,0.0,0.0,0.0,0.0,83.0,86.8,85.2,72.2,44.01,53.04,771.01,196.16,19.62,22.62,17.76,20.84,5.15,7.13,0.12,0.11,-0.04,1.52,5.23,1.1,0.34,6.37,53.04,50.34,2.53,0.84,0.34,0.25,1.48,0.68,0.42,0.13,0.04,0.0,44.01,0.17,0.93,30,24,2137,0,0,0.08,0.17,0.08,0.17,0.06,0.12,0.18,0.06,0.18,0.13,0.0,0.08,2.45,6.37,4.68,56.6,36.8,63.97,1.1,10.0,38.57,17.26,0.97,63.97,3.21,1.81,1.18,49.96,289.49,138.99,2.45,2.53,0.3,1.52,0.84,44.98,4.68,29.2,1.01,0.29,0.08,0.29,22.4,2.0,0,0,1.4,1.4,0.06,0.6,0.6
3,6,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,22,2000,32.2,55.8,1.43,0.81,0.56,0.71,0.16,0.9,1.61,0.71,1.18,0.28,0.9,1.12,2.55,0.81,0.0,1.55,0.03,1.34,0.0,0.06,0.06,0.06,0.03,0.03,0.0,0.0,0.0,0.0,0.0,91.8,93.0,94.8,78.6,56.49,61.52,890.65,218.01,27.86,29.94,23.88,25.19,2.86,3.63,0.05,0.07,-0.02,0.53,5.65,0.53,0.09,5.12,61.52,59.84,1.4,1.34,0.16,0.16,0.31,0.06,0.0,0.0,0.0,0.0,56.49,0.28,0.47,33,33,2894,0,0,0.03,0.06,0.03,0.06,0.03,0.05,0.08,0.03,0.08,0.19,0.03,0.03,1.02,5.12,1.37,59.2,26.5,69.07,2.05,12.55,46.65,10.56,0.5,69.07,1.52,0.9,0.4,57.42,299.53,107.36,1.02,1.74,0.09,1.21,0.99,52.95,1.37,30.8,0.4,0.12,0.08,0.25,19.1,0.0,0,0,0.9,0.9,0.07,0.1,0.1
4,7,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,29,1993,26.0,42.4,3.54,2.0,2.27,1.15,0.12,1.5,3.54,2.04,1.81,0.31,1.5,1.31,4.85,1.5,0.0,1.27,0.04,1.04,0.08,0.08,0.04,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,88.2,92.7,91.1,69.4,50.31,57.04,857.19,291.5,22.81,24.62,21.62,23.73,4.27,6.15,0.05,0.04,-0.01,0.54,5.54,0.42,0.0,5.5,57.04,55.04,1.88,1.81,0.38,0.31,0.23,0.04,0.04,0.0,0.0,0.0,50.31,0.12,0.54,29,28,2342,0,0,0.0,0.04,0.0,0.04,0.02,0.05,0.07,0.02,0.07,0.27,0.04,0.04,0.5,5.5,0.54,56.1,34.1,68.42,2.92,22.35,41.12,5.81,0.15,68.42,1.58,0.88,0.54,53.23,233.42,99.88,0.5,1.12,0.0,1.12,0.73,48.35,0.54,15.4,0.5,0.08,0.0,0.0,24.0,0.0,0,0,0.5,0.5,0.04,-0.5,-0.5


In [50]:
combined_data_2023.drop(columns=['Unnamed: 0'], errors='ignore', inplace=True)
combined_data_2023.to_csv('FBRef Data/2023/Combined_Preprocessed_Data_2023.csv', index=False)
combined_data_2023.head()

Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,MP,Starts,Min,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Assists_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
0,Brenden Aaronson,us USA,"MF,FW",Leeds United,eng Premier League,21,2000,26.4,32.6,1.7,0.68,0.61,0.83,0.27,0.57,1.74,1.17,1.63,0.11,1.52,0.19,1.89,0.23,0.04,3.6,0.27,2.16,0.64,0.27,0.19,0.3,0.04,0.11,0.04,0.0,0.04,0.04,0.04,74.3,81.8,76.9,38.5,22.42,30.19,287.01,82.65,13.11,16.02,5.68,7.39,0.95,2.46,0.16,0.1,-0.05,1.74,1.78,0.61,0.15,3.26,30.19,26.93,3.22,0.98,0.3,0.08,2.73,0.19,1.44,0.64,0.53,0.04,22.42,0.04,1.02,36,28,2372,0,0,0.04,0.15,0.04,0.15,0.15,0.16,0.31,0.15,0.31,0.08,0.0,0.11,1.63,3.26,5.72,34.0,48.0,43.3,0.45,4.39,18.75,20.98,2.73,43.3,3.79,1.29,1.82,25.98,138.11,58.03,1.63,1.29,0.49,2.69,3.11,29.05,5.72,22.0,1.56,0.34,0.02,0.11,18.4,2.0,0,0,3.9,3.9,0.09,-2.9,-2.9
1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,34,1987,37.0,75.0,2.22,1.35,1.27,0.89,0.05,1.05,1.41,0.35,1.76,0.73,1.03,1.73,3.95,3.14,0.03,1.16,0.19,0.92,0.0,0.05,0.14,0.0,0.05,0.11,0.0,0.05,0.03,0.0,0.0,82.7,85.8,90.1,55.6,45.38,54.89,891.0,362.35,13.24,15.43,27.19,30.19,4.19,7.54,0.03,0.02,0.03,0.35,4.19,0.14,0.0,5.81,54.89,50.86,3.84,3.14,0.0,0.81,0.14,0.27,0.0,0.0,0.0,0.0,45.38,0.19,0.54,37,37,3330,0,0,0.03,0.08,0.03,0.08,0.06,0.03,0.09,0.06,0.09,0.16,0.0,0.05,1.08,5.81,0.27,56.8,37.8,66.46,6.51,29.41,34.46,3.32,1.05,66.46,1.0,0.57,0.38,46.57,293.81,165.49,1.08,0.57,0.08,0.73,0.62,40.51,0.27,9.4,0.86,0.08,0.03,0.33,15.0,0.0,0,0,2.4,2.4,0.07,-1.4,-1.4
2,Himad Abdelli,dz ALG,"MF,FW",Angers,fr Ligue 1,22,1999,23.7,46.3,2.7,1.6,0.93,1.35,0.42,1.31,2.83,1.52,1.27,0.04,1.22,1.18,3.88,0.63,0.0,2.91,0.13,2.49,0.17,0.13,0.04,0.0,0.08,0.13,0.0,0.0,0.0,0.0,0.0,83.0,86.8,85.2,72.2,44.01,53.04,771.01,196.16,19.62,22.62,17.76,20.84,5.15,7.13,0.12,0.11,-0.04,1.52,5.23,1.1,0.34,6.37,53.04,50.34,2.53,0.84,0.34,0.25,1.48,0.68,0.42,0.13,0.04,0.0,44.01,0.17,0.93,30,24,2137,0,0,0.08,0.17,0.08,0.17,0.06,0.12,0.18,0.06,0.18,0.13,0.0,0.08,2.45,6.37,4.68,56.6,36.8,63.97,1.1,10.0,38.57,17.26,0.97,63.97,3.21,1.81,1.18,49.96,289.49,138.99,2.45,2.53,0.3,1.52,0.84,44.98,4.68,29.2,1.01,0.29,0.08,0.29,22.4,2.0,0,0,1.4,1.4,0.06,0.6,0.6
3,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,22,2000,32.2,55.8,1.43,0.81,0.56,0.71,0.16,0.9,1.61,0.71,1.18,0.28,0.9,1.12,2.55,0.81,0.0,1.55,0.03,1.34,0.0,0.06,0.06,0.06,0.03,0.03,0.0,0.0,0.0,0.0,0.0,91.8,93.0,94.8,78.6,56.49,61.52,890.65,218.01,27.86,29.94,23.88,25.19,2.86,3.63,0.05,0.07,-0.02,0.53,5.65,0.53,0.09,5.12,61.52,59.84,1.4,1.34,0.16,0.16,0.31,0.06,0.0,0.0,0.0,0.0,56.49,0.28,0.47,33,33,2894,0,0,0.03,0.06,0.03,0.06,0.03,0.05,0.08,0.03,0.08,0.19,0.03,0.03,1.02,5.12,1.37,59.2,26.5,69.07,2.05,12.55,46.65,10.56,0.5,69.07,1.52,0.9,0.4,57.42,299.53,107.36,1.02,1.74,0.09,1.21,0.99,52.95,1.37,30.8,0.4,0.12,0.08,0.25,19.1,0.0,0,0,0.9,0.9,0.07,0.1,0.1
4,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,29,1993,26.0,42.4,3.54,2.0,2.27,1.15,0.12,1.5,3.54,2.04,1.81,0.31,1.5,1.31,4.85,1.5,0.0,1.27,0.04,1.04,0.08,0.08,0.04,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,88.2,92.7,91.1,69.4,50.31,57.04,857.19,291.5,22.81,24.62,21.62,23.73,4.27,6.15,0.05,0.04,-0.01,0.54,5.54,0.42,0.0,5.5,57.04,55.04,1.88,1.81,0.38,0.31,0.23,0.04,0.04,0.0,0.0,0.0,50.31,0.12,0.54,29,28,2342,0,0,0.0,0.04,0.0,0.04,0.02,0.05,0.07,0.02,0.07,0.27,0.04,0.04,0.5,5.5,0.54,56.1,34.1,68.42,2.92,22.35,41.12,5.81,0.15,68.42,1.58,0.88,0.54,53.23,233.42,99.88,0.5,1.12,0.0,1.12,0.73,48.35,0.54,15.4,0.5,0.08,0.0,0.0,24.0,0.0,0,0,0.5,0.5,0.04,-0.5,-0.5


In [51]:
combined_data_2023.shape

(1631, 131)

# 2022

In [52]:
# Load all datasets for 2022
defense_data = pd.read_csv('FBRef Data/2022/Defense_2022.csv')
goal_creating_actions_data = pd.read_csv('FBRef Data/2022/Goal_Creating_Actions_2022.csv')
miscellaneous_data = pd.read_csv('FBRef Data/2022/Miscellaneous_2022.csv')
passing_data = pd.read_csv('FBRef Data/2022/Passing_2022.csv')
passing_type_data = pd.read_csv('FBRef Data/2022/Passing_Type_2022.csv')
players_standard_data = pd.read_csv('FBRef Data/2022/Players_Standard_2022.csv')
possession_data = pd.read_csv('FBRef Data/2022/Possession_2022.csv')
shooting_data = pd.read_csv('FBRef Data/2022/Shooting_2022.csv')

# Fill missing values with 0 for all datasets
datasets = [defense_data, goal_creating_actions_data, miscellaneous_data, passing_data,
            passing_type_data, players_standard_data, possession_data, shooting_data]

for df in datasets:
    df.fillna(0, inplace=True)

# Filter out players with less than 10 90s
defense_data = defense_data[defense_data['90s'] >= 10]
goal_creating_actions_data = goal_creating_actions_data[goal_creating_actions_data['90s'] >= 10]
passing_data = passing_data[passing_data['90s'] >= 10]
passing_type_data = passing_type_data[passing_type_data['90s'] >= 10]
players_standard_data = players_standard_data[players_standard_data['90s'] >= 10]
possession_data = possession_data[possession_data['90s'] >= 10]
shooting_data = shooting_data[shooting_data['90s'] >= 10]

defense_data.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Tackles', 'Tackles_Won', 'Tackles_Def_3rd', 'Tackles_Mid_3rd',
       'Tackles_Att_3rd', 'Dribblers_Tackled', 'Dribblers_Challenged',
       'Dribblers_Tackle_W%', 'Dribblers_Tackle_Lost', 'Blocks',
       'Shots_Blocked', 'Passes_Blocked', 'Interceptions',
       'Tackles+Interceptions', 'Clearances', 'Errors_Shots', 'Matches'],
      dtype='object')

In [53]:
import pandas as pd

# Function to process defense_data
def process_defense_data(df):
    columns_to_process = ['Tackles', 'Tackles_Won', 'Tackles_Def_3rd', 'Tackles_Mid_3rd',
                          'Tackles_Att_3rd', 'Dribblers_Tackled', 'Dribblers_Challenged',
                          'Dribblers_Tackle_Lost', 'Blocks', 'Shots_Blocked', 'Passes_Blocked_Def',
                          'Interceptions', 'Tackles+Interceptions', 'Clearances', 'Errors_Shots']
    df = df.rename(columns={'Passes_Blocked': 'Passes_Blocked_Def'})
    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches'])

    return df



# Apply the processing function to defense_data
processed_defense_data = process_defense_data(defense_data)


processed_defense_data.head()


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90
0,0,Max Aarons,eng ENG,DF,Norwich City,eng Premier League,21,2000,32.0,67.3,2.0,1.38,1.53,0.47,0.0,1.16,1.72,0.56,1.22,0.59,0.62,0.88,2.88,3.0,0.03
1,1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,33.1,50.0,1.45,0.82,1.03,0.42,0.0,0.48,0.97,0.48,1.51,0.76,0.76,2.05,3.5,3.14,0.0
2,2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,27.4,35.1,1.57,0.84,0.91,0.58,0.07,0.73,2.08,1.35,0.66,0.04,0.62,1.53,3.1,0.55,0.0
3,3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,32.8,42.8,3.35,1.83,1.34,1.62,0.4,2.16,5.06,2.9,1.77,0.03,1.74,1.68,5.03,0.4,0.0
7,7,Tammy Abraham,eng ENG,FW,Roma,it Serie A,23,1997,34.3,40.9,0.79,0.38,0.2,0.41,0.17,0.26,0.64,0.38,0.5,0.09,0.41,0.15,0.93,0.79,0.0


In [54]:
import pandas as pd

# Function to process defense_data
def process_goal_data(df):
    columns_to_process = ['Pass_Live_Shot', 'Pass_Dead_Shot', 'Take_Ons_Shot', 'Shot-Shot',
       'Fouls_drawn_Shot', 'Defensive_Shot', 'Pass_Live_Goal', 'Pass_Dead_Goal',
       'Take_Ons_Goal', 'Shot_Goal', 'Fouls_Drawn_Goal', 'Defensive_Goal']

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Shot_Creating_Action','Goal_Creating_Action','Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_goal_creating_actions_data = process_goal_data(goal_creating_actions_data)


processed_goal_creating_actions_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90
0,0,Max Aarons,eng ENG,DF,Norwich City,eng Premier League,21,2000,32.0,1.44,0.09,1.06,0.09,0.06,0.09,0.09,0.03,0.09,0.0,0.0,0.0,0.0,0.0
1,1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,33.1,0.66,0.03,0.51,0.0,0.06,0.03,0.0,0.06,0.03,0.0,0.0,0.0,0.0,0.0
2,2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,27.4,1.83,0.11,1.5,0.0,0.0,0.15,0.15,0.04,0.11,0.0,0.0,0.0,0.0,0.0
3,3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,32.8,2.53,0.27,2.04,0.06,0.09,0.03,0.21,0.09,0.24,0.0,0.0,0.0,0.03,0.0
7,7,Tammy Abraham,eng ENG,FW,Roma,it Serie A,23,1997,34.3,2.74,0.41,1.81,0.03,0.17,0.5,0.2,0.03,0.17,0.0,0.06,0.09,0.09,0.0


In [55]:
import pandas as pd

# Function to process defense_data
def process_pass_data(df):
    columns_to_process = ['Passes_Total_Cmp', 'Passes_Total_Att', 'Passes_TotDist',
        'Passes_PrgDist', 'Passes_Short_Cmp','Passes_Short_Att', 'Passes_Medium_Cmp',
       'Passes_Medium_Att', 'Passes_Long_Cmp','Passes_Long_Att',  'Assists', 'xAG', 'xA', 'A-xAG',
       'Key_Passes', 'Passes_1/3', 'Passes_Penalty_Area',
       'Crosses_Penalty_Area', 'Progressive_Passes']

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches','Assits'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_passing_data = process_pass_data(passing_data)


processed_passing_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,xAG_per90,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90
0,0,Max Aarons,eng ENG,DF,Norwich City,eng Premier League,21,2000,32.0,75.4,90.5,75.0,35.6,34.59,45.91,518.38,191.66,20.16,22.28,11.97,15.97,1.78,5.0,0.05,0.05,0.01,0.62,1.56,1.16,0.28,3.66
1,1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,33.1,81.4,88.5,90.9,54.5,38.79,47.67,770.88,289.61,12.54,14.17,21.6,23.78,4.38,8.04,0.02,0.02,-0.02,0.27,2.87,0.21,0.0,3.6
2,2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,27.4,90.2,92.9,93.7,83.7,56.02,62.08,931.31,199.12,27.3,29.38,23.91,25.51,3.94,4.71,0.03,0.03,-0.03,0.62,3.18,0.47,0.04,3.94
3,3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,32.8,80.8,89.2,86.3,64.4,40.91,50.61,724.6,208.45,18.48,20.7,16.31,18.9,4.63,7.2,0.11,0.08,-0.05,1.07,4.48,0.7,0.27,5.21
7,7,Tammy Abraham,eng ENG,FW,Roma,it Serie A,23,1997,34.3,69.7,73.1,74.4,71.8,14.14,20.29,198.83,47.29,8.54,11.69,3.91,5.25,0.82,1.14,0.14,0.07,-0.02,1.11,1.02,0.79,0.09,2.04


In [56]:
import pandas as pd

# Function to process defense_data
def process_passing_data(df):
    columns_to_process = ['Passes_Attempted', 'Live_Ball_Passes', 'Dead_Ball_Passes',
       'Free_Kick_Passes', 'Through_Balls', 'Switches', 'Crosses',
       'Throw_Ins_Taken', 'Corner_Kicks', 'In_Corner_Kicks',
       'Out_Corner_Kicks', 'Str_Corner_Kicks', 'Passes_Cmp', 'Passes_Offside',
       'Passes_Blocked_Off']
    df = df.rename(columns={'Passes_Blocked': 'Passes_Blocked_Off','Passes_Off':'Passes_Offside'})
    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_passing_type_data = process_passing_data(passing_type_data)


processed_passing_type_data.head()


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90
0,0,Max Aarons,eng ENG,DF,Norwich City,eng Premier League,21,2000,32.0,45.91,35.31,10.38,0.53,0.16,0.12,1.56,9.84,0.0,0.0,0.0,0.0,34.59,0.22,1.25
1,1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,33.1,47.67,44.44,3.05,2.69,0.0,0.97,0.12,0.27,0.0,0.0,0.0,0.0,38.79,0.18,0.79
2,2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,27.4,62.08,61.24,0.77,0.62,0.04,0.11,0.51,0.15,0.0,0.0,0.0,0.0,56.02,0.07,1.06
3,3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,32.8,50.61,49.3,1.07,0.76,0.21,0.64,1.13,0.24,0.06,0.0,0.03,0.0,40.91,0.24,1.49
7,7,Tammy Abraham,eng ENG,FW,Roma,it Serie A,23,1997,34.3,20.29,18.28,1.95,0.17,0.15,0.03,0.47,0.06,0.0,0.0,0.0,0.0,14.14,0.06,0.58


In [57]:
players_standard_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,MP,Starts,Min,90s,Goals,Assists,G+A,G-PK,PK,PKatt,Yellow,Red,xG,npxG,xAG,npxG+xAG,Prg_Carries,Prg_Passes,Prg_Passes_Received,Goals_per90,Assits_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Matches
0,0,Max Aarons,eng ENG,DF,Norwich City,eng Premier League,21,2000,34,32,2881,32.0,0,2,2,0,0,0,8,0,0.8,0.8,1.7,2.5,78,117,85,0.0,0.06,0.06,0.0,0.06,0.03,0.05,0.08,0.03,0.08,Matches
1,1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,34,34,2983,33.1,2,0,2,2,0,0,5,1,1.2,1.2,0.8,2.0,23,119,14,0.06,0.0,0.06,0.06,0.06,0.04,0.02,0.06,0.04,0.06,Matches
2,2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,31,29,2462,27.4,1,0,1,1,0,0,12,3,0.7,0.7,0.9,1.6,24,108,47,0.04,0.0,0.04,0.04,0.04,0.03,0.03,0.06,0.03,0.06,Matches
3,3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,34,34,2956,32.8,0,2,2,0,0,0,9,0,1.5,1.5,3.7,5.1,30,171,73,0.0,0.06,0.06,0.0,0.06,0.04,0.11,0.16,0.04,0.16,Matches
7,7,Tammy Abraham,eng ENG,FW,Roma,it Serie A,23,1997,37,36,3084,34.3,17,4,21,14,3,3,9,0,20.7,18.4,4.8,23.2,41,70,199,0.5,0.12,0.61,0.41,0.53,0.6,0.14,0.74,0.54,0.68,Matches


In [58]:
# Function to process defense_data
def process_standard_data(df,df2):
    columns_to_process = [ 'Yellow', 'Red',
       'Prg_Carries', 'Prg_Passes', 'Prg_Passes_Received',]

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Goals', 'G+A', 'G-PK',  'xG', 'npxG', 'xAG', 'npxG+xAG','Assists','Matches'])
    df = df.rename(columns={'PKatt': 'PK_Attempted','Assits_per90':'Assists_per90'})
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_players_standard_data = process_standard_data(players_standard_data,processed_passing_type_data)


processed_players_standard_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,MP,Starts,Min,90s,PK,PK_Attempted,Goals_per90,Assists_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90
0,0,Max Aarons,eng ENG,DF,Norwich City,eng Premier League,21,2000,34,32,2881,32.0,0,0,0.0,0.06,0.06,0.0,0.06,0.03,0.05,0.08,0.03,0.08,0.25,0.0,2.44,3.66,2.66
1,1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,34,34,2983,33.1,0,0,0.06,0.0,0.06,0.06,0.06,0.04,0.02,0.06,0.04,0.06,0.15,0.03,0.69,3.6,0.42
2,2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,31,29,2462,27.4,0,0,0.04,0.0,0.04,0.04,0.04,0.03,0.03,0.06,0.03,0.06,0.44,0.11,0.88,3.94,1.72
3,3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,34,34,2956,32.8,0,0,0.0,0.06,0.06,0.0,0.06,0.04,0.11,0.16,0.04,0.16,0.27,0.0,0.91,5.21,2.23
7,7,Tammy Abraham,eng ENG,FW,Roma,it Serie A,23,1997,37,36,3084,34.3,3,3,0.5,0.12,0.61,0.41,0.53,0.6,0.14,0.74,0.54,0.68,0.26,0.0,1.2,2.04,5.8


In [59]:
import pandas as pd

# Function to process defense_data
def process_possession_data(df):
    columns_to_process = [ 'Touches', 'Touches_Def_Pen', 'Touches_Def_3rd',
       'Touches_Mid_3rd', 'Touches_Att_3rd', 'Touches_Att_Pen',
       'Tocuhes_Live_Balls', 'Take_Ons_Attempted', 'Take_Ons_Succ',
       'Tackled_Take_Ons', 'Carries','Total_Distance', 'Progressive_Distance_Carried',
        'Progressive_Carries','1/3_Carries', 'Carries_Penalty_Area', 'Miscontrols',
        'Dispossessed','Passes_Received', 'Progressive_Passes_Received']

    # Check for 90s column and divide relevant columns by 90s
    if '90s' in df.columns:
        for col in columns_to_process:
            if col in df.columns:
                df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)
                df = df.drop(columns=[col])

    # Drop Matches column
    df = df.drop(columns=['Matches'])
    df.fillna(0,inplace=True)
    return df



# Apply the processing function to defense_data
processed_possession_data= process_possession_data(possession_data)


processed_possession_data.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[col + '_per90'] = np.round(df[col].astype(float) / df['90s'].astype(float),2)


Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90
0,0,Max Aarons,eng ENG,DF,Norwich City,eng Premier League,21,2000,32.0,50.0,50.0,57.53,5.0,22.53,21.97,13.75,0.72,57.53,2.12,1.06,1.06,29.78,201.69,105.53,2.44,1.47,0.38,1.19,0.72,32.19,2.66
1,1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,33.1,77.8,22.2,57.98,6.4,30.73,25.47,2.54,0.73,57.98,0.54,0.42,0.12,36.07,194.89,107.49,0.69,0.57,0.0,0.85,0.39,36.31,0.42
2,2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,27.4,63.4,36.6,71.09,1.42,20.33,41.82,9.53,0.58,71.09,1.5,0.95,0.55,50.58,239.78,98.07,0.88,0.84,0.11,1.53,1.09,54.05,1.72
3,3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,32.8,63.9,36.1,61.95,0.73,12.32,39.57,10.88,0.4,61.95,1.86,1.19,0.67,39.97,171.62,71.74,0.91,1.19,0.09,1.59,1.22,42.99,2.23
7,7,Tammy Abraham,eng ENG,FW,Roma,it Serie A,23,1997,34.3,49.4,50.6,31.02,0.93,2.13,14.81,14.43,5.04,30.93,2.24,1.11,1.14,14.4,75.57,32.92,1.2,0.9,0.64,2.51,1.2,21.46,5.8


In [60]:
import pandas as pd


# Drop specified columns
processed_shooting_data = shooting_data.drop(columns=['Goals', 'Shots_total', 'Shots_on_target', 'Matches'])

# Fill missing values with 0
processed_shooting_data.fillna(0, inplace=True)

# Rename columns for consistency
processed_shooting_data.rename(columns={'Position': 'Pos', 'Competition': 'Comp'}, inplace=True)

# Display the processed shooting_data
processed_shooting_data.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
0,0,Max Aarons,eng ENG,DF,Norwich City,eng Premier League,21,2000,32.0,15.4,0.41,0.06,0.0,0.0,19.9,0,0,0,0.8,0.8,0.06,-0.8,-0.8
1,1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,33.1,33.3,0.54,0.18,0.11,0.33,18.3,0,0,0,1.2,1.2,0.07,0.8,0.8
2,2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,27.4,27.8,0.66,0.18,0.06,0.2,21.5,0,0,0,0.7,0.7,0.04,0.3,0.3
3,3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,32.8,24.1,0.88,0.21,0.0,0.0,24.2,0,0,0,1.5,1.5,0.05,-1.5,-1.5
7,7,Tammy Abraham,eng ENG,FW,Roma,it Serie A,23,1997,34.3,38.0,2.68,1.02,0.15,0.4,12.9,3,3,3,20.7,18.4,0.2,-3.7,-4.4


In [61]:
# Drop duplicate columns from the processed datasets
datasets_to_drop_duplicates = [processed_goal_creating_actions_data, processed_passing_data, processed_passing_type_data,
                               processed_players_standard_data, processed_possession_data, processed_shooting_data]

for df in datasets_to_drop_duplicates:
    df.drop(columns=['Unnamed: 0', 'Squad'], errors='ignore', inplace=True)

# Combine the preprocessed datasets
combined_data_2022 = pd.merge(processed_defense_data, processed_goal_creating_actions_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2022 = pd.merge(combined_data_2022, processed_passing_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2022 = pd.merge(combined_data_2022, processed_passing_type_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2022 = pd.merge(combined_data_2022, processed_players_standard_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2022 = pd.merge(combined_data_2022, processed_possession_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2022 = pd.merge(combined_data_2022, processed_shooting_data, on=['Player', 'Nation', 'Pos', 'Age', 'Comp', 'Born', '90s'], how='outer')
combined_data_2022 = combined_data_2022.drop_duplicates(subset=['Player', 'Nation', 'Pos','Squad' ,'Age', 'Comp', 'Born', '90s'])
# Fill any remaining missing values with 0
combined_data_2022.fillna(0, inplace=True)
# Save the combined data for 2022 to a new CSV file
combined_data_2022.to_csv('FBRef Data/2022/Combined_Preprocessed_Data_2022.csv', index=False)
combined_data_2022.head()

Unnamed: 0.1,Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,MP,Starts,Min,PK,PK_Attempted,Goals_per90,Assists_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
0,0,Max Aarons,eng ENG,DF,Norwich City,eng Premier League,21,2000,32.0,67.3,2.0,1.38,1.53,0.47,0.0,1.16,1.72,0.56,1.22,0.59,0.62,0.88,2.88,3.0,0.03,1.44,0.09,1.06,0.09,0.06,0.09,0.09,0.03,0.09,0.0,0.0,0.0,0.0,0.0,75.4,90.5,75.0,35.6,34.59,45.91,518.38,191.66,20.16,22.28,11.97,15.97,1.78,5.0,0.05,0.05,0.01,0.62,1.56,1.16,0.28,3.66,45.91,35.31,10.38,0.53,0.16,0.12,1.56,9.84,0.0,0.0,0.0,0.0,34.59,0.22,1.25,34,32,2881,0,0,0.0,0.06,0.06,0.0,0.06,0.03,0.05,0.08,0.03,0.08,0.25,0.0,2.44,3.66,2.66,50.0,50.0,57.53,5.0,22.53,21.97,13.75,0.72,57.53,2.12,1.06,1.06,29.78,201.69,105.53,2.44,1.47,0.38,1.19,0.72,32.19,2.66,15.4,0.41,0.06,0.0,0.0,19.9,0,0,0,0.8,0.8,0.06,-0.8,-0.8
1,1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,33.1,50.0,1.45,0.82,1.03,0.42,0.0,0.48,0.97,0.48,1.51,0.76,0.76,2.05,3.5,3.14,0.0,0.66,0.03,0.51,0.0,0.06,0.03,0.0,0.06,0.03,0.0,0.0,0.0,0.0,0.0,81.4,88.5,90.9,54.5,38.79,47.67,770.88,289.61,12.54,14.17,21.6,23.78,4.38,8.04,0.02,0.02,-0.02,0.27,2.87,0.21,0.0,3.6,47.67,44.44,3.05,2.69,0.0,0.97,0.12,0.27,0.0,0.0,0.0,0.0,38.79,0.18,0.79,34,34,2983,0,0,0.06,0.0,0.06,0.06,0.06,0.04,0.02,0.06,0.04,0.06,0.15,0.03,0.69,3.6,0.42,77.8,22.2,57.98,6.4,30.73,25.47,2.54,0.73,57.98,0.54,0.42,0.12,36.07,194.89,107.49,0.69,0.57,0.0,0.85,0.39,36.31,0.42,33.3,0.54,0.18,0.11,0.33,18.3,0,0,0,1.2,1.2,0.07,0.8,0.8
2,2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,27.4,35.1,1.57,0.84,0.91,0.58,0.07,0.73,2.08,1.35,0.66,0.04,0.62,1.53,3.1,0.55,0.0,1.83,0.11,1.5,0.0,0.0,0.15,0.15,0.04,0.11,0.0,0.0,0.0,0.0,0.0,90.2,92.9,93.7,83.7,56.02,62.08,931.31,199.12,27.3,29.38,23.91,25.51,3.94,4.71,0.03,0.03,-0.03,0.62,3.18,0.47,0.04,3.94,62.08,61.24,0.77,0.62,0.04,0.11,0.51,0.15,0.0,0.0,0.0,0.0,56.02,0.07,1.06,31,29,2462,0,0,0.04,0.0,0.04,0.04,0.04,0.03,0.03,0.06,0.03,0.06,0.44,0.11,0.88,3.94,1.72,63.4,36.6,71.09,1.42,20.33,41.82,9.53,0.58,71.09,1.5,0.95,0.55,50.58,239.78,98.07,0.88,0.84,0.11,1.53,1.09,54.05,1.72,27.8,0.66,0.18,0.06,0.2,21.5,0,0,0,0.7,0.7,0.04,0.3,0.3
3,3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,32.8,42.8,3.35,1.83,1.34,1.62,0.4,2.16,5.06,2.9,1.77,0.03,1.74,1.68,5.03,0.4,0.0,2.53,0.27,2.04,0.06,0.09,0.03,0.21,0.09,0.24,0.0,0.0,0.0,0.03,0.0,80.8,89.2,86.3,64.4,40.91,50.61,724.6,208.45,18.48,20.7,16.31,18.9,4.63,7.2,0.11,0.08,-0.05,1.07,4.48,0.7,0.27,5.21,50.61,49.3,1.07,0.76,0.21,0.64,1.13,0.24,0.06,0.0,0.03,0.0,40.91,0.24,1.49,34,34,2956,0,0,0.0,0.06,0.06,0.0,0.06,0.04,0.11,0.16,0.04,0.16,0.27,0.0,0.91,5.21,2.23,63.9,36.1,61.95,0.73,12.32,39.57,10.88,0.4,61.95,1.86,1.19,0.67,39.97,171.62,71.74,0.91,1.19,0.09,1.59,1.22,42.99,2.23,24.1,0.88,0.21,0.0,0.0,24.2,0,0,0,1.5,1.5,0.05,-1.5,-1.5
4,7,Tammy Abraham,eng ENG,FW,Roma,it Serie A,23,1997,34.3,40.9,0.79,0.38,0.2,0.41,0.17,0.26,0.64,0.38,0.5,0.09,0.41,0.15,0.93,0.79,0.0,2.74,0.41,1.81,0.03,0.17,0.5,0.2,0.03,0.17,0.0,0.06,0.09,0.09,0.0,69.7,73.1,74.4,71.8,14.14,20.29,198.83,47.29,8.54,11.69,3.91,5.25,0.82,1.14,0.14,0.07,-0.02,1.11,1.02,0.79,0.09,2.04,20.29,18.28,1.95,0.17,0.15,0.03,0.47,0.06,0.0,0.0,0.0,0.0,14.14,0.06,0.58,37,36,3084,3,3,0.5,0.12,0.61,0.41,0.53,0.6,0.14,0.74,0.54,0.68,0.26,0.0,1.2,2.04,5.8,49.4,50.6,31.02,0.93,2.13,14.81,14.43,5.04,30.93,2.24,1.11,1.14,14.4,75.57,32.92,1.2,0.9,0.64,2.51,1.2,21.46,5.8,38.0,2.68,1.02,0.15,0.4,12.9,3,3,3,20.7,18.4,0.2,-3.7,-4.4


In [62]:
combined_data_2022.drop(columns=['Unnamed: 0'], errors='ignore', inplace=True)
combined_data_2022.to_csv('FBRef Data/2022/Combined_Preprocessed_Data_2022.csv', index=False)
combined_data_2022.head()

Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,MP,Starts,Min,PK,PK_Attempted,Goals_per90,Assists_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
0,Max Aarons,eng ENG,DF,Norwich City,eng Premier League,21,2000,32.0,67.3,2.0,1.38,1.53,0.47,0.0,1.16,1.72,0.56,1.22,0.59,0.62,0.88,2.88,3.0,0.03,1.44,0.09,1.06,0.09,0.06,0.09,0.09,0.03,0.09,0.0,0.0,0.0,0.0,0.0,75.4,90.5,75.0,35.6,34.59,45.91,518.38,191.66,20.16,22.28,11.97,15.97,1.78,5.0,0.05,0.05,0.01,0.62,1.56,1.16,0.28,3.66,45.91,35.31,10.38,0.53,0.16,0.12,1.56,9.84,0.0,0.0,0.0,0.0,34.59,0.22,1.25,34,32,2881,0,0,0.0,0.06,0.06,0.0,0.06,0.03,0.05,0.08,0.03,0.08,0.25,0.0,2.44,3.66,2.66,50.0,50.0,57.53,5.0,22.53,21.97,13.75,0.72,57.53,2.12,1.06,1.06,29.78,201.69,105.53,2.44,1.47,0.38,1.19,0.72,32.19,2.66,15.4,0.41,0.06,0.0,0.0,19.9,0,0,0,0.8,0.8,0.06,-0.8,-0.8
1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,33.1,50.0,1.45,0.82,1.03,0.42,0.0,0.48,0.97,0.48,1.51,0.76,0.76,2.05,3.5,3.14,0.0,0.66,0.03,0.51,0.0,0.06,0.03,0.0,0.06,0.03,0.0,0.0,0.0,0.0,0.0,81.4,88.5,90.9,54.5,38.79,47.67,770.88,289.61,12.54,14.17,21.6,23.78,4.38,8.04,0.02,0.02,-0.02,0.27,2.87,0.21,0.0,3.6,47.67,44.44,3.05,2.69,0.0,0.97,0.12,0.27,0.0,0.0,0.0,0.0,38.79,0.18,0.79,34,34,2983,0,0,0.06,0.0,0.06,0.06,0.06,0.04,0.02,0.06,0.04,0.06,0.15,0.03,0.69,3.6,0.42,77.8,22.2,57.98,6.4,30.73,25.47,2.54,0.73,57.98,0.54,0.42,0.12,36.07,194.89,107.49,0.69,0.57,0.0,0.85,0.39,36.31,0.42,33.3,0.54,0.18,0.11,0.33,18.3,0,0,0,1.2,1.2,0.07,0.8,0.8
2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,27.4,35.1,1.57,0.84,0.91,0.58,0.07,0.73,2.08,1.35,0.66,0.04,0.62,1.53,3.1,0.55,0.0,1.83,0.11,1.5,0.0,0.0,0.15,0.15,0.04,0.11,0.0,0.0,0.0,0.0,0.0,90.2,92.9,93.7,83.7,56.02,62.08,931.31,199.12,27.3,29.38,23.91,25.51,3.94,4.71,0.03,0.03,-0.03,0.62,3.18,0.47,0.04,3.94,62.08,61.24,0.77,0.62,0.04,0.11,0.51,0.15,0.0,0.0,0.0,0.0,56.02,0.07,1.06,31,29,2462,0,0,0.04,0.0,0.04,0.04,0.04,0.03,0.03,0.06,0.03,0.06,0.44,0.11,0.88,3.94,1.72,63.4,36.6,71.09,1.42,20.33,41.82,9.53,0.58,71.09,1.5,0.95,0.55,50.58,239.78,98.07,0.88,0.84,0.11,1.53,1.09,54.05,1.72,27.8,0.66,0.18,0.06,0.2,21.5,0,0,0,0.7,0.7,0.04,0.3,0.3
3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,32.8,42.8,3.35,1.83,1.34,1.62,0.4,2.16,5.06,2.9,1.77,0.03,1.74,1.68,5.03,0.4,0.0,2.53,0.27,2.04,0.06,0.09,0.03,0.21,0.09,0.24,0.0,0.0,0.0,0.03,0.0,80.8,89.2,86.3,64.4,40.91,50.61,724.6,208.45,18.48,20.7,16.31,18.9,4.63,7.2,0.11,0.08,-0.05,1.07,4.48,0.7,0.27,5.21,50.61,49.3,1.07,0.76,0.21,0.64,1.13,0.24,0.06,0.0,0.03,0.0,40.91,0.24,1.49,34,34,2956,0,0,0.0,0.06,0.06,0.0,0.06,0.04,0.11,0.16,0.04,0.16,0.27,0.0,0.91,5.21,2.23,63.9,36.1,61.95,0.73,12.32,39.57,10.88,0.4,61.95,1.86,1.19,0.67,39.97,171.62,71.74,0.91,1.19,0.09,1.59,1.22,42.99,2.23,24.1,0.88,0.21,0.0,0.0,24.2,0,0,0,1.5,1.5,0.05,-1.5,-1.5
4,Tammy Abraham,eng ENG,FW,Roma,it Serie A,23,1997,34.3,40.9,0.79,0.38,0.2,0.41,0.17,0.26,0.64,0.38,0.5,0.09,0.41,0.15,0.93,0.79,0.0,2.74,0.41,1.81,0.03,0.17,0.5,0.2,0.03,0.17,0.0,0.06,0.09,0.09,0.0,69.7,73.1,74.4,71.8,14.14,20.29,198.83,47.29,8.54,11.69,3.91,5.25,0.82,1.14,0.14,0.07,-0.02,1.11,1.02,0.79,0.09,2.04,20.29,18.28,1.95,0.17,0.15,0.03,0.47,0.06,0.0,0.0,0.0,0.0,14.14,0.06,0.58,37,36,3084,3,3,0.5,0.12,0.61,0.41,0.53,0.6,0.14,0.74,0.54,0.68,0.26,0.0,1.2,2.04,5.8,49.4,50.6,31.02,0.93,2.13,14.81,14.43,5.04,30.93,2.24,1.11,1.14,14.4,75.57,32.92,1.2,0.9,0.64,2.51,1.2,21.46,5.8,38.0,2.68,1.02,0.15,0.4,12.9,3,3,3,20.7,18.4,0.2,-3.7,-4.4


In [63]:
combined_data_2022.shape

(1641, 131)

In [64]:
columns=combined_data_2022.columns.tolist()
columns

['Player',
 'Nation',
 'Pos',
 'Squad',
 'Comp',
 'Age',
 'Born',
 '90s',
 'Dribblers_Tackle_W%',
 'Tackles_per90',
 'Tackles_Won_per90',
 'Tackles_Def_3rd_per90',
 'Tackles_Mid_3rd_per90',
 'Tackles_Att_3rd_per90',
 'Dribblers_Tackled_per90',
 'Dribblers_Challenged_per90',
 'Dribblers_Tackle_Lost_per90',
 'Blocks_per90',
 'Shots_Blocked_per90',
 'Passes_Blocked_Def_per90',
 'Interceptions_per90',
 'Tackles+Interceptions_per90',
 'Clearances_per90',
 'Errors_Shots_per90',
 'Shot_Creating_Action_per90',
 'Goal_Creating_Action_90',
 'Pass_Live_Shot_per90',
 'Pass_Dead_Shot_per90',
 'Take_Ons_Shot_per90',
 'Shot-Shot_per90',
 'Fouls_drawn_Shot_per90',
 'Defensive_Shot_per90',
 'Pass_Live_Goal_per90',
 'Pass_Dead_Goal_per90',
 'Take_Ons_Goal_per90',
 'Shot_Goal_per90',
 'Fouls_Drawn_Goal_per90',
 'Defensive_Goal_per90',
 'Passes_Total_Cmp%',
 'Passes_Short_Cmp%',
 'Passes_Medium_Cmp%',
 'Passes_Long_Cmp%',
 'Passes_Total_Cmp_per90',
 'Passes_Total_Att_per90',
 'Passes_TotDist_per90',
 'Pas

# Check if same structure

In [65]:
import pandas as pd



# Get the list of columns for each dataset
columns_2024 = set(combined_data_2024.columns)
columns_2023 = set(combined_data_2023.columns)
columns_2022 = set(combined_data_2022.columns)

# Find columns that are in 2024 but not in 2023 or 2022
diff_2024_2023 = columns_2024 - columns_2023
diff_2024_2022 = columns_2024 - columns_2022

# Find columns that are in 2023 but not in 2024 or 2022
diff_2023_2024 = columns_2023 - columns_2024
diff_2023_2022 = columns_2023 - columns_2022

# Find columns that are in 2022 but not in 2024 or 2023
diff_2022_2024 = columns_2022 - columns_2024
diff_2022_2023 = columns_2022 - columns_2023

# Create a DataFrame with padded columns to the same length
max_len = max(len(diff_2024_2023), len(diff_2024_2022), len(diff_2023_2024), len(diff_2023_2022), len(diff_2022_2024), len(diff_2022_2023))

diff_df = pd.DataFrame({
    '2024_not_in_2023': list(diff_2024_2023) + [None] * (max_len - len(diff_2024_2023)),
    '2024_not_in_2022': list(diff_2024_2022) + [None] * (max_len - len(diff_2024_2022)),
    '2023_not_in_2024': list(diff_2023_2024) + [None] * (max_len - len(diff_2023_2024)),
    '2023_not_in_2022': list(diff_2023_2022) + [None] * (max_len - len(diff_2023_2022)),
    '2022_not_in_2024': list(diff_2022_2024) + [None] * (max_len - len(diff_2022_2024)),
    '2022_not_in_2023': list(diff_2022_2023) + [None] * (max_len - len(diff_2022_2023))
})


# Display the differences
diff_df


Unnamed: 0,2024_not_in_2023,2024_not_in_2022,2023_not_in_2024,2023_not_in_2022,2022_not_in_2024,2022_not_in_2023


# Common Players

In [66]:
# Identifying players that are present in all three combined_datasets

'''# Convert player names to lowercase for consistent comparison
combined_data_2022['Player'] = combined_data_2022['Player'].str.lower()
combined_data_2023['Player'] = combined_data_2023['Player'].str.lower()
combined_data_2024['Player'] = combined_data_2024['Player'].str.lower()'''

# Find common players in all three combined_datasets
common_players = set(combined_data_2022['Player']).intersection(combined_data_2023['Player']).intersection(combined_data_2024['Player'])

# Filter the combined_datasets to include only these common players
filtered_combined_data_2022 = combined_data_2022[combined_data_2022['Player'].isin(common_players)]
filtered_combined_data_2023 = combined_data_2023[combined_data_2023['Player'].isin(common_players)]
filtered_combined_data_2024 = combined_data_2024[combined_data_2024['Player'].isin(common_players)]

# Display the number of common players found and the first few rows of each filtered combined_dataset
num_common_players = len(common_players)
filtered_combined_data_2022_head = filtered_combined_data_2022.head()
filtered_combined_data_2023_head = filtered_combined_data_2023.head()
filtered_combined_data_2024_head = filtered_combined_data_2024.head()

num_common_players


745

In [67]:
# Define the desired order of columns
new_column_order = [
    'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born', '90s',
    'MP', 'Starts', 'Min'
] + [col for col in filtered_combined_data_2024.columns if col not in [
    'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born', '90s', 'MP', 'Starts', 'Min'
]]

# Reorder the DataFrame columns
filtered_combined_data_2024 = filtered_combined_data_2024[new_column_order]

# Remove commas from the 'Min' column and convert to integer
filtered_combined_data_2024['Min'] = filtered_combined_data_2024['Min'].str.replace(',', '').astype(int)



# Display the first few rows of the updated DataFrame
filtered_combined_data_2024.head()


Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,MP,Starts,Min,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
2,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,31,31,2781,57.8,2.07,1.13,1.17,0.74,0.16,0.84,1.46,0.61,1.65,1.04,0.61,1.26,3.33,3.53,0.06,0.78,0.03,0.61,0.03,0.0,0.1,0.03,0.0,0.0,0.0,0.0,0.03,0.0,0.0,84.5,88.9,91.5,56.0,50.23,59.42,958.51,313.01,15.76,17.73,28.9,31.59,4.56,8.16,0.0,0.01,0.02,-0.01,0.26,4.17,0.1,0.0,4.43,59.42,53.4,5.76,2.43,0.06,0.42,0.1,0.94,0.0,0.0,0.0,0.0,50.23,0.26,0.65,1,1,0.13,0.13,0.1,0.1,0.11,0.01,0.12,0.09,0.09,0.16,0.0,1.17,4.43,0.29,53.3,46.7,70.71,9.48,31.59,36.21,3.69,1.13,70.68,0.49,0.26,0.23,48.74,280.36,159.26,1.17,0.61,0.0,0.74,0.13,45.4,0.29,33.3,0.68,0.23,0.14,0.43,15.0,0.0,1,1,3.4,2.6,0.13,0.6,0.4
3,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,27,17,1519,44.4,1.24,0.83,0.47,0.59,0.18,0.47,1.07,0.59,0.71,0.06,0.65,0.71,1.95,1.07,0.0,1.6,0.18,1.6,0.0,0.0,0.0,0.0,0.0,0.18,0.0,0.0,0.0,0.0,0.0,88.9,90.8,91.7,75.9,47.1,52.96,737.87,177.99,23.25,25.62,19.53,21.3,2.43,3.2,0.0,0.03,0.07,-0.03,0.36,5.15,0.3,0.12,4.62,52.96,51.78,1.07,1.01,0.0,0.06,0.18,0.06,0.0,0.0,0.0,0.0,47.1,0.12,1.01,0,0,0.0,0.0,0.0,0.0,0.05,0.03,0.08,0.05,0.08,0.12,0.0,0.53,4.62,1.18,36.8,57.9,60.47,1.24,9.94,38.28,12.9,0.41,60.47,1.12,0.41,0.65,48.7,258.05,99.59,0.53,1.95,0.0,1.36,0.65,46.15,1.18,0.0,0.3,0.0,0.0,0.0,21.6,0.0,0,0,0.8,0.8,0.17,-0.8,-0.8
4,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,33,32,2860,39.6,2.67,1.64,1.35,1.07,0.25,1.19,3.02,1.82,0.91,0.19,0.72,1.92,4.59,1.92,0.09,2.55,0.16,2.11,0.09,0.06,0.03,0.06,0.19,0.09,0.0,0.03,0.0,0.0,0.03,84.5,89.0,88.7,64.4,48.77,57.74,861.07,270.85,19.78,22.23,22.36,25.22,4.72,7.33,0.03,0.07,0.06,-0.04,0.94,5.91,0.72,0.09,6.1,57.74,54.81,2.77,2.14,0.19,0.41,1.07,0.44,0.0,0.0,0.0,0.0,48.77,0.16,0.79,0,0,0.06,0.09,0.06,0.09,0.04,0.07,0.1,0.04,0.1,0.13,0.0,1.19,6.1,1.6,67.7,27.7,68.05,3.3,17.39,40.5,10.97,0.31,68.05,2.04,1.38,0.57,51.86,273.81,122.39,1.19,1.54,0.09,1.01,0.94,47.23,1.6,42.3,0.82,0.35,0.08,0.18,26.5,0.0,0,0,1.1,1.1,0.04,0.9,0.9
8,Francesco Acerbi,it ITA,DF,Inter,it Serie A,35.0,1988.0,26.5,29,26,2388,78.6,0.83,0.49,0.6,0.23,0.0,0.42,0.53,0.11,0.75,0.49,0.26,1.21,2.04,2.91,0.04,0.83,0.04,0.72,0.0,0.0,0.11,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,90.7,92.5,94.3,73.7,60.94,67.21,1106.64,346.19,22.68,24.53,32.3,34.26,4.98,6.75,0.04,0.07,0.05,-0.03,0.26,2.19,0.19,0.08,3.28,67.21,64.87,2.26,1.62,0.0,0.42,0.34,0.3,0.0,0.0,0.0,0.0,60.94,0.08,0.19,0,0,0.11,0.15,0.11,0.15,0.06,0.07,0.13,0.06,0.13,0.04,0.0,0.79,3.28,0.91,66.7,33.3,74.6,8.26,35.51,35.7,3.7,0.94,74.6,0.11,0.08,0.04,45.7,218.08,116.26,0.79,0.42,0.0,0.15,0.04,54.57,0.91,40.0,0.57,0.23,0.2,0.5,12.0,0.0,0,0,1.6,1.6,0.11,1.4,1.4
9,Marcos Acuña,ar ARG,DF,Sevilla,es La Liga,31.0,1991.0,14.4,21,18,1292,64.3,2.29,1.46,1.39,0.83,0.07,0.62,0.97,0.35,0.83,0.14,0.69,0.28,2.57,1.74,0.0,3.42,0.28,1.94,1.04,0.21,0.0,0.14,0.07,0.07,0.21,0.0,0.0,0.0,0.0,74.1,87.5,79.9,51.4,43.61,58.82,842.08,342.22,18.06,20.62,18.19,22.78,6.53,12.71,0.14,0.1,0.15,0.03,1.6,3.61,1.32,0.83,4.1,58.82,46.67,12.01,2.22,0.07,0.76,5.69,8.47,1.32,0.49,0.83,0.0,43.61,0.14,1.11,0,0,0.07,0.21,0.07,0.21,0.02,0.11,0.13,0.02,0.13,0.49,0.0,1.6,4.1,2.92,47.6,38.1,68.33,2.29,18.75,33.06,17.57,0.69,68.33,1.46,0.69,0.56,32.85,157.36,85.0,1.6,1.39,0.28,1.39,1.11,37.92,2.92,44.4,0.63,0.28,0.11,0.25,27.9,0.0,0,0,0.3,0.3,0.03,0.7,0.7


In [68]:
# Define the desired order of columns
new_column_order = [
    'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born', '90s',
    'MP', 'Starts', 'Min'
] + [col for col in filtered_combined_data_2024.columns if col not in [
    'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born', '90s', 'MP', 'Starts', 'Min'
]]

# Reorder the DataFrame columns
filtered_combined_data_2023 = filtered_combined_data_2023[new_column_order]

# Remove commas from the 'Min' column and convert to integer
filtered_combined_data_2023['Min'] = filtered_combined_data_2023['Min'].str.replace(',', '').astype(int)



# Display the first few rows of the updated DataFrame
filtered_combined_data_2023.head()


Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,MP,Starts,Min,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,34,1987,37.0,37,37,3330,75.0,2.22,1.35,1.27,0.89,0.05,1.05,1.41,0.35,1.76,0.73,1.03,1.73,3.95,3.14,0.03,1.16,0.19,0.92,0.0,0.05,0.14,0.0,0.05,0.11,0.0,0.05,0.03,0.0,0.0,82.7,85.8,90.1,55.6,45.38,54.89,891.0,362.35,13.24,15.43,27.19,30.19,4.19,7.54,0.05,0.03,0.02,0.03,0.35,4.19,0.14,0.0,5.81,54.89,50.86,3.84,3.14,0.0,0.81,0.14,0.27,0.0,0.0,0.0,0.0,45.38,0.19,0.54,0,0,0.03,0.08,0.03,0.08,0.06,0.03,0.09,0.06,0.09,0.16,0.0,1.08,5.81,0.27,56.8,37.8,66.46,6.51,29.41,34.46,3.32,1.05,66.46,1.0,0.57,0.38,46.57,293.81,165.49,1.08,0.57,0.08,0.73,0.62,40.51,0.27,9.4,0.86,0.08,0.03,0.33,15.0,0.0,0,0,2.4,2.4,0.07,-1.4,-1.4
3,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,22,2000,32.2,33,33,2894,55.8,1.43,0.81,0.56,0.71,0.16,0.9,1.61,0.71,1.18,0.28,0.9,1.12,2.55,0.81,0.0,1.55,0.03,1.34,0.0,0.06,0.06,0.06,0.03,0.03,0.0,0.0,0.0,0.0,0.0,91.8,93.0,94.8,78.6,56.49,61.52,890.65,218.01,27.86,29.94,23.88,25.19,2.86,3.63,0.03,0.05,0.07,-0.02,0.53,5.65,0.53,0.09,5.12,61.52,59.84,1.4,1.34,0.16,0.16,0.31,0.06,0.0,0.0,0.0,0.0,56.49,0.28,0.47,0,0,0.03,0.06,0.03,0.06,0.03,0.05,0.08,0.03,0.08,0.19,0.03,1.02,5.12,1.37,59.2,26.5,69.07,2.05,12.55,46.65,10.56,0.5,69.07,1.52,0.9,0.4,57.42,299.53,107.36,1.02,1.74,0.09,1.21,0.99,52.95,1.37,30.8,0.4,0.12,0.08,0.25,19.1,0.0,0,0,0.9,0.9,0.07,0.1,0.1
4,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,29,1993,26.0,29,28,2342,42.4,3.54,2.0,2.27,1.15,0.12,1.5,3.54,2.04,1.81,0.31,1.5,1.31,4.85,1.5,0.0,1.27,0.04,1.04,0.08,0.08,0.04,0.0,0.04,0.04,0.0,0.0,0.0,0.0,0.0,88.2,92.7,91.1,69.4,50.31,57.04,857.19,291.5,22.81,24.62,21.62,23.73,4.27,6.15,0.04,0.05,0.04,-0.01,0.54,5.54,0.42,0.0,5.5,57.04,55.04,1.88,1.81,0.38,0.31,0.23,0.04,0.04,0.0,0.0,0.0,50.31,0.12,0.54,0,0,0.0,0.04,0.0,0.04,0.02,0.05,0.07,0.02,0.07,0.27,0.04,0.5,5.5,0.54,56.1,34.1,68.42,2.92,22.35,41.12,5.81,0.15,68.42,1.58,0.88,0.54,53.23,233.42,99.88,0.5,1.12,0.0,1.12,0.73,48.35,0.54,15.4,0.5,0.08,0.0,0.0,24.0,0.0,0,0,0.5,0.5,0.04,-0.5,-0.5
7,Francesco Acerbi,it ITA,DF,Inter,it Serie A,34,1988,26.9,31,25,2425,74.1,1.23,0.71,0.82,0.26,0.15,0.74,1.0,0.26,0.93,0.67,0.26,1.52,2.75,3.16,0.07,1.37,0.04,1.26,0.04,0.0,0.07,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.0,88.5,91.8,91.9,73.8,54.09,61.12,1004.98,300.74,20.26,22.08,27.4,29.81,5.54,7.51,0.07,0.04,0.04,0.03,0.59,3.2,0.52,0.11,3.72,61.12,57.47,3.46,1.52,0.04,0.37,0.82,0.78,0.0,0.0,0.0,0.0,54.09,0.19,0.15,0,0,0.0,0.07,0.0,0.07,0.04,0.04,0.08,0.04,0.08,0.15,0.0,1.45,3.72,1.19,58.3,33.3,70.26,8.4,31.9,32.71,6.21,1.0,70.26,0.45,0.26,0.15,38.7,189.89,106.58,1.45,1.23,0.07,0.48,0.26,46.54,1.19,11.1,0.67,0.07,0.0,0.0,15.9,0.0,0,0,1.0,1.0,0.06,-1.0,-1.0
8,Marcos Acuña,ar ARG,DF,Sevilla,es La Liga,30,1991,21.2,30,21,1912,71.4,2.69,1.7,1.6,0.8,0.28,1.42,1.98,0.57,0.75,0.24,0.52,1.08,3.77,1.7,0.0,3.04,0.19,2.12,0.66,0.14,0.14,0.0,0.0,0.19,0.0,0.0,0.0,0.0,0.0,74.2,89.4,77.4,49.2,45.75,61.65,849.91,330.05,21.04,23.54,17.92,23.16,6.04,12.26,0.09,0.2,0.27,-0.11,1.79,3.35,1.79,0.9,4.81,61.65,49.43,12.12,2.31,0.14,0.66,6.93,8.21,1.6,0.33,1.04,0.0,45.75,0.09,1.18,0,0,0.14,0.24,0.14,0.24,0.05,0.2,0.26,0.05,0.26,0.47,0.14,2.31,4.81,4.43,48.1,42.6,71.84,3.25,19.15,31.6,22.03,1.23,71.84,2.55,1.23,1.08,38.82,191.98,102.74,2.31,2.03,0.24,1.37,0.94,42.59,4.43,23.8,0.99,0.24,0.14,0.6,25.6,1.0,0,0,1.2,1.2,0.06,1.8,1.8


In [69]:
# Define the desired order of columns
new_column_order = [
    'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born', '90s',
    'MP', 'Starts', 'Min'
] + [col for col in filtered_combined_data_2024.columns if col not in [
    'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born', '90s', 'MP', 'Starts', 'Min'
]]

# Reorder the DataFrame columns
filtered_combined_data_2022 = filtered_combined_data_2022[new_column_order]

# Remove commas from the 'Min' column and convert to integer
filtered_combined_data_2022['Min'] = filtered_combined_data_2022['Min'].str.replace(',', '').astype(int)



# Display the first few rows of the updated DataFrame
filtered_combined_data_2022.head()


Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,MP,Starts,Min,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net
1,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,33,1987,33.1,34,34,2983,50.0,1.45,0.82,1.03,0.42,0.0,0.48,0.97,0.48,1.51,0.76,0.76,2.05,3.5,3.14,0.0,0.66,0.03,0.51,0.0,0.06,0.03,0.0,0.06,0.03,0.0,0.0,0.0,0.0,0.0,81.4,88.5,90.9,54.5,38.79,47.67,770.88,289.61,12.54,14.17,21.6,23.78,4.38,8.04,0.0,0.02,0.02,-0.02,0.27,2.87,0.21,0.0,3.6,47.67,44.44,3.05,2.69,0.0,0.97,0.12,0.27,0.0,0.0,0.0,0.0,38.79,0.18,0.79,0,0,0.06,0.06,0.06,0.06,0.04,0.02,0.06,0.04,0.06,0.15,0.03,0.69,3.6,0.42,77.8,22.2,57.98,6.4,30.73,25.47,2.54,0.73,57.98,0.54,0.42,0.12,36.07,194.89,107.49,0.69,0.57,0.0,0.85,0.39,36.31,0.42,33.3,0.54,0.18,0.11,0.33,18.3,0,0,0,1.2,1.2,0.07,0.8,0.8
2,Salis Abdul Samed,gh GHA,MF,Clermont Foot,fr Ligue 1,21,2000,27.4,31,29,2462,35.1,1.57,0.84,0.91,0.58,0.07,0.73,2.08,1.35,0.66,0.04,0.62,1.53,3.1,0.55,0.0,1.83,0.11,1.5,0.0,0.0,0.15,0.15,0.04,0.11,0.0,0.0,0.0,0.0,0.0,90.2,92.9,93.7,83.7,56.02,62.08,931.31,199.12,27.3,29.38,23.91,25.51,3.94,4.71,0.0,0.03,0.03,-0.03,0.62,3.18,0.47,0.04,3.94,62.08,61.24,0.77,0.62,0.04,0.11,0.51,0.15,0.0,0.0,0.0,0.0,56.02,0.07,1.06,0,0,0.04,0.04,0.04,0.04,0.03,0.03,0.06,0.03,0.06,0.44,0.11,0.88,3.94,1.72,63.4,36.6,71.09,1.42,20.33,41.82,9.53,0.58,71.09,1.5,0.95,0.55,50.58,239.78,98.07,0.88,0.84,0.11,1.53,1.09,54.05,1.72,27.8,0.66,0.18,0.06,0.2,21.5,0,0,0,0.7,0.7,0.04,0.3,0.3
3,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,28,1993,32.8,34,34,2956,42.8,3.35,1.83,1.34,1.62,0.4,2.16,5.06,2.9,1.77,0.03,1.74,1.68,5.03,0.4,0.0,2.53,0.27,2.04,0.06,0.09,0.03,0.21,0.09,0.24,0.0,0.0,0.0,0.03,0.0,80.8,89.2,86.3,64.4,40.91,50.61,724.6,208.45,18.48,20.7,16.31,18.9,4.63,7.2,0.06,0.11,0.08,-0.05,1.07,4.48,0.7,0.27,5.21,50.61,49.3,1.07,0.76,0.21,0.64,1.13,0.24,0.06,0.0,0.03,0.0,40.91,0.24,1.49,0,0,0.0,0.06,0.0,0.06,0.04,0.11,0.16,0.04,0.16,0.27,0.0,0.91,5.21,2.23,63.9,36.1,61.95,0.73,12.32,39.57,10.88,0.4,61.95,1.86,1.19,0.67,39.97,171.62,71.74,0.91,1.19,0.09,1.59,1.22,42.99,2.23,24.1,0.88,0.21,0.0,0.0,24.2,0,0,0,1.5,1.5,0.05,-1.5,-1.5
5,Francesco Acerbi,it ITA,DF,Lazio,it Serie A,33,1988,28.2,30,29,2536,63.2,0.89,0.43,0.53,0.32,0.04,0.43,0.67,0.25,1.31,0.99,0.32,1.21,2.09,3.26,0.0,0.61,0.07,0.46,0.04,0.0,0.04,0.07,0.0,0.04,0.0,0.0,0.0,0.04,0.0,89.9,92.7,94.9,71.3,65.14,72.45,1240.99,435.21,24.04,25.92,32.59,34.33,7.48,10.5,0.0,0.02,0.03,-0.02,0.18,4.22,0.21,0.0,4.22,72.45,69.33,3.09,2.73,0.04,1.21,0.11,0.04,0.0,0.0,0.0,0.0,65.14,0.04,0.21,0,0,0.14,0.14,0.14,0.14,0.09,0.02,0.1,0.09,0.1,0.07,0.04,1.1,4.22,0.35,66.7,33.3,81.38,9.5,39.08,40.53,2.09,0.78,81.38,0.21,0.14,0.07,48.37,244.43,152.52,1.1,0.67,0.0,0.46,0.14,58.19,0.35,46.7,0.53,0.25,0.27,0.57,10.8,0,0,0,2.4,2.4,0.16,1.6,1.6
6,Marcos Acuña,ar ARG,DF,Sevilla,es La Liga,29,1991,25.1,31,26,2260,65.2,1.99,1.16,1.08,0.56,0.36,1.2,1.83,0.64,0.88,0.2,0.68,0.56,2.55,1.0,0.04,2.99,0.12,1.91,0.76,0.08,0.12,0.12,0.0,0.04,0.08,0.0,0.0,0.0,0.0,78.6,91.7,83.4,55.9,61.47,78.21,1168.45,406.61,26.37,28.76,25.5,30.56,8.73,15.62,0.12,0.12,0.16,0.0,1.71,4.58,1.71,1.08,5.22,78.21,64.7,13.27,2.51,0.12,1.2,7.09,9.16,1.59,0.0,1.24,0.0,61.47,0.24,1.2,0,0,0.04,0.16,0.04,0.16,0.05,0.11,0.16,0.05,0.16,0.4,0.0,2.91,5.22,6.1,66.7,33.3,87.01,2.15,25.38,38.21,24.62,0.76,87.01,2.63,1.75,0.88,50.04,249.12,151.35,2.91,2.31,0.08,1.51,1.08,58.45,6.1,31.3,0.64,0.2,0.06,0.2,21.7,1,0,0,1.2,1.2,0.08,-0.2,-0.2


In [70]:
filtered_combined_data_2024=filtered_combined_data_2024[filtered_combined_data_2024['Pos']!='GK']

filtered_combined_data_2023=filtered_combined_data_2023[filtered_combined_data_2023['Pos']!='GK']

filtered_combined_data_2022=filtered_combined_data_2022[filtered_combined_data_2022['Pos']!='GK']

len(filtered_combined_data_2024),len(filtered_combined_data_2023),len(filtered_combined_data_2022)

(700, 705, 701)

In [71]:
# Separate main and secondary positions
def split_position(pos):
    positions = pos.split(',')
    if len(positions) == 1:
        return positions[0], 'None'
    else:
        return positions[0], positions[1]

filtered_combined_data_2024[['Main_Pos', 'Secondary_Pos']] = filtered_combined_data_2024['Pos'].apply(lambda x: pd.Series(split_position(x)))
filtered_combined_data_2023[['Main_Pos', 'Secondary_Pos']] = filtered_combined_data_2023['Pos'].apply(lambda x: pd.Series(split_position(x)))
filtered_combined_data_2022[['Main_Pos', 'Secondary_Pos']] = filtered_combined_data_2022['Pos'].apply(lambda x: pd.Series(split_position(x)))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_combined_data_2024[['Main_Pos', 'Secondary_Pos']] = filtered_combined_data_2024['Pos'].apply(lambda x: pd.Series(split_position(x)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_combined_data_2024[['Main_Pos', 'Secondary_Pos']] = filtered_combined_data_2024['Pos'].apply(lambda x: pd.Series(split_position(x)))


In [72]:
filtered_combined_data_2024

Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,MP,Starts,Min,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net,Main_Pos,Secondary_Pos
2,Yunis Abdelhamid,ma MAR,DF,Reims,fr Ligue 1,35.0,1987.0,30.9,31,31,2781,57.8,2.07,1.13,1.17,0.74,0.16,0.84,1.46,0.61,1.65,1.04,0.61,1.26,3.33,3.53,0.06,0.78,0.03,0.61,0.03,0.00,0.10,0.03,0.00,0.00,0.00,0.00,0.03,0.00,0.00,84.5,88.9,91.5,56.0,50.23,59.42,958.51,313.01,15.76,17.73,28.90,31.59,4.56,8.16,0.00,0.01,0.02,-0.01,0.26,4.17,0.10,0.00,4.43,59.42,53.40,5.76,2.43,0.06,0.42,0.10,0.94,0.00,0.00,0.00,0.0,50.23,0.26,0.65,1,1,0.13,0.13,0.10,0.10,0.11,0.01,0.12,0.09,0.09,0.16,0.00,1.17,4.43,0.29,53.3,46.7,70.71,9.48,31.59,36.21,3.69,1.13,70.68,0.49,0.26,0.23,48.74,280.36,159.26,1.17,0.61,0.00,0.74,0.13,45.40,0.29,33.3,0.68,0.23,0.14,0.43,15.0,0.0,1,1,3.4,2.6,0.13,0.6,0.4,DF,
3,Salis Abdul Samed,gh GHA,MF,Lens,fr Ligue 1,23.0,2000.0,16.9,27,17,1519,44.4,1.24,0.83,0.47,0.59,0.18,0.47,1.07,0.59,0.71,0.06,0.65,0.71,1.95,1.07,0.00,1.60,0.18,1.60,0.00,0.00,0.00,0.00,0.00,0.18,0.00,0.00,0.00,0.00,0.00,88.9,90.8,91.7,75.9,47.10,52.96,737.87,177.99,23.25,25.62,19.53,21.30,2.43,3.20,0.00,0.03,0.07,-0.03,0.36,5.15,0.30,0.12,4.62,52.96,51.78,1.07,1.01,0.00,0.06,0.18,0.06,0.00,0.00,0.00,0.0,47.10,0.12,1.01,0,0,0.00,0.00,0.00,0.00,0.05,0.03,0.08,0.05,0.08,0.12,0.00,0.53,4.62,1.18,36.8,57.9,60.47,1.24,9.94,38.28,12.90,0.41,60.47,1.12,0.41,0.65,48.70,258.05,99.59,0.53,1.95,0.00,1.36,0.65,46.15,1.18,0.0,0.30,0.00,0.00,0.00,21.6,0.0,0,0,0.8,0.8,0.17,-0.8,-0.8,MF,
4,Laurent Abergel,fr FRA,MF,Lorient,fr Ligue 1,30.0,1993.0,31.8,33,32,2860,39.6,2.67,1.64,1.35,1.07,0.25,1.19,3.02,1.82,0.91,0.19,0.72,1.92,4.59,1.92,0.09,2.55,0.16,2.11,0.09,0.06,0.03,0.06,0.19,0.09,0.00,0.03,0.00,0.00,0.03,84.5,89.0,88.7,64.4,48.77,57.74,861.07,270.85,19.78,22.23,22.36,25.22,4.72,7.33,0.03,0.07,0.06,-0.04,0.94,5.91,0.72,0.09,6.10,57.74,54.81,2.77,2.14,0.19,0.41,1.07,0.44,0.00,0.00,0.00,0.0,48.77,0.16,0.79,0,0,0.06,0.09,0.06,0.09,0.04,0.07,0.10,0.04,0.10,0.13,0.00,1.19,6.10,1.60,67.7,27.7,68.05,3.30,17.39,40.50,10.97,0.31,68.05,2.04,1.38,0.57,51.86,273.81,122.39,1.19,1.54,0.09,1.01,0.94,47.23,1.60,42.3,0.82,0.35,0.08,0.18,26.5,0.0,0,0,1.1,1.1,0.04,0.9,0.9,MF,
8,Francesco Acerbi,it ITA,DF,Inter,it Serie A,35.0,1988.0,26.5,29,26,2388,78.6,0.83,0.49,0.60,0.23,0.00,0.42,0.53,0.11,0.75,0.49,0.26,1.21,2.04,2.91,0.04,0.83,0.04,0.72,0.00,0.00,0.11,0.00,0.00,0.04,0.00,0.00,0.00,0.00,0.00,90.7,92.5,94.3,73.7,60.94,67.21,1106.64,346.19,22.68,24.53,32.30,34.26,4.98,6.75,0.04,0.07,0.05,-0.03,0.26,2.19,0.19,0.08,3.28,67.21,64.87,2.26,1.62,0.00,0.42,0.34,0.30,0.00,0.00,0.00,0.0,60.94,0.08,0.19,0,0,0.11,0.15,0.11,0.15,0.06,0.07,0.13,0.06,0.13,0.04,0.00,0.79,3.28,0.91,66.7,33.3,74.60,8.26,35.51,35.70,3.70,0.94,74.60,0.11,0.08,0.04,45.70,218.08,116.26,0.79,0.42,0.00,0.15,0.04,54.57,0.91,40.0,0.57,0.23,0.20,0.50,12.0,0.0,0,0,1.6,1.6,0.11,1.4,1.4,DF,
9,Marcos Acuña,ar ARG,DF,Sevilla,es La Liga,31.0,1991.0,14.4,21,18,1292,64.3,2.29,1.46,1.39,0.83,0.07,0.62,0.97,0.35,0.83,0.14,0.69,0.28,2.57,1.74,0.00,3.42,0.28,1.94,1.04,0.21,0.00,0.14,0.07,0.07,0.21,0.00,0.00,0.00,0.00,74.1,87.5,79.9,51.4,43.61,58.82,842.08,342.22,18.06,20.62,18.19,22.78,6.53,12.71,0.14,0.10,0.15,0.03,1.60,3.61,1.32,0.83,4.10,58.82,46.67,12.01,2.22,0.07,0.76,5.69,8.47,1.32,0.49,0.83,0.0,43.61,0.14,1.11,0,0,0.07,0.21,0.07,0.21,0.02,0.11,0.13,0.02,0.13,0.49,0.00,1.60,4.10,2.92,47.6,38.1,68.33,2.29,18.75,33.06,17.57,0.69,68.33,1.46,0.69,0.56,32.85,157.36,85.00,1.60,1.39,0.28,1.39,1.11,37.92,2.92,44.4,0.63,0.28,0.11,0.25,27.9,0.0,0,0,0.3,0.3,0.03,0.7,0.7,DF,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1731,Igor Zubeldia,es ESP,DF,Real Sociedad,es La Liga,26.0,1997.0,28.0,30,29,2518,73.1,1.14,0.89,0.71,0.36,0.07,0.68,0.93,0.25,1.00,0.68,0.32,1.14,2.29,3.46,0.11,0.64,0.11,0.57,0.00,0.00,0.04,0.00,0.04,0.11,0.00,0.00,0.00,0.00,0.00,84.5,86.0,92.3,62.5,52.71,62.36,1083.96,336.54,13.82,16.07,32.61,35.32,5.96,9.54,0.07,0.04,0.03,0.03,0.36,3.54,0.14,0.00,4.61,62.36,60.57,1.50,1.46,0.11,0.29,0.11,0.00,0.00,0.00,0.00,0.0,52.71,0.29,0.25,0,0,0.00,0.07,0.00,0.07,0.03,0.04,0.07,0.03,0.07,0.39,0.04,1.00,4.61,0.00,50.0,50.0,70.82,5.79,32.43,36.93,1.79,0.64,70.82,0.29,0.14,0.14,43.89,269.18,168.75,1.00,0.07,0.00,0.50,0.11,47.00,0.00,0.0,0.43,0.00,0.00,0.00,13.4,0.0,0,0,0.8,0.8,0.06,-0.8,-0.8,DF,
1732,Martín Zubimendi,es ESP,MF,Real Sociedad,es La Liga,24.0,1999.0,29.5,31,29,2654,54.8,1.66,0.92,0.61,0.98,0.07,0.78,1.42,0.64,1.12,0.31,0.81,1.25,2.92,1.83,0.00,1.87,0.14,1.59,0.00,0.10,0.14,0.00,0.03,0.14,0.00,0.00,0.00,0.00,0.00,85.7,89.7,89.7,68.3,44.78,52.27,751.36,212.51,20.58,22.95,20.58,22.95,2.92,4.27,0.03,0.05,0.07,-0.02,0.54,4.61,0.51,0.03,5.15,52.27,49.93,2.10,2.00,0.14,0.14,0.24,0.07,0.00,0.00,0.00,0.0,44.78,0.24,0.44,0,0,0.14,0.17,0.14,0.17,0.09,0.05,0.14,0.09,0.14,0.17,0.00,1.15,5.15,0.92,66.7,27.8,60.78,3.08,13.46,38.78,9.05,1.08,60.78,0.61,0.41,0.17,31.93,179.83,90.54,1.15,1.39,0.17,0.88,0.54,40.17,0.92,50.0,0.75,0.37,0.18,0.36,14.8,0.0,0,0,2.7,2.7,0.12,1.3,1.3,MF,
1733,Martin Ødegaard,no NOR,MF,Arsenal,eng Premier League,24.0,1998.0,34.3,35,35,3091,29.3,1.43,0.50,0.52,0.61,0.29,0.50,1.69,1.20,0.67,0.03,0.64,0.44,1.87,0.15,0.00,6.41,0.67,5.19,0.50,0.23,0.29,0.17,0.03,0.52,0.06,0.00,0.06,0.03,0.00,84.3,91.4,85.3,65.0,49.33,58.48,751.60,230.26,26.44,28.92,18.51,21.69,2.65,4.08,0.29,0.28,0.33,0.01,2.97,4.66,3.79,0.38,10.03,58.48,55.01,3.00,1.60,1.14,0.09,2.24,0.38,0.76,0.61,0.03,0.0,49.33,0.47,1.11,2,2,0.23,0.52,0.17,0.47,0.22,0.28,0.50,0.17,0.45,0.06,0.00,2.65,10.03,5.89,45.7,46.9,67.81,0.70,7.11,27.00,34.20,4.81,67.76,2.36,1.08,1.11,45.57,238.78,102.45,2.65,2.42,1.08,1.55,1.69,53.27,5.89,28.0,2.18,0.61,0.08,0.29,20.2,4.0,2,2,7.4,5.8,0.08,0.6,0.2,MF,
1734,Milan Đurić,ba BIH,FW,Hellas Verona,it Serie A,33.0,1990.0,13.4,20,13,1204,0.0,0.07,0.00,0.00,0.00,0.07,0.00,0.22,0.22,0.00,0.00,0.00,0.07,0.15,0.97,0.00,2.47,0.30,1.87,0.00,0.00,0.37,0.15,0.07,0.07,0.00,0.00,0.00,0.15,0.07,54.2,61.0,46.4,41.7,13.51,24.93,157.91,29.25,9.70,15.90,2.39,5.15,0.37,0.90,0.07,0.09,0.04,-0.01,1.19,1.34,0.22,0.00,1.12,24.93,24.63,0.30,0.00,0.00,0.22,0.07,0.00,0.00,0.00,0.00,0.0,13.51,0.00,0.22,1,3,0.37,0.45,0.30,0.37,0.34,0.09,0.43,0.16,0.25,0.15,0.00,0.22,1.12,2.46,66.7,33.3,31.42,1.04,3.06,16.94,11.42,3.43,31.19,0.22,0.15,0.07,13.66,39.48,11.27,0.22,0.45,0.07,1.64,0.60,26.27,2.46,59.1,1.64,0.97,0.18,0.31,12.5,0.0,1,3,4.5,2.2,0.10,0.5,1.8,FW,


In [73]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Function to calculate weighted average
def weighted_avg(group, value_col, weight_col):
    d = group[value_col]
    w = group[weight_col]
    return (d * w).sum() / w.sum()

# General function to process season data
def process_season(data):
    # Remove goalkeepers by filtering out rows where Pos == 'GK'
    data = data[data['Pos'] != 'GK']

    # Identify normalized and percentage features
    normalized_features = [col for col in data.columns if '_per90' in col]
    percentage_features = [col for col in data.columns if '%' in col]

    # Columns to sum
    sum_columns = [
        'Goals_per_shot', 'Goals_per_shot_on_target', 'Average_shot_distance',
        'Shots_free_kicks', 'Pens_Scored', 'Pens_Attempted', 'XG', 'Npxg',
        'Npxg_per_shot', 'Xg_net', 'Npxg_net'
    ]

    # Identify players with multiple clubs in the season data
    players_multiple_clubs = data[data.duplicated(subset=['Player', 'Age'], keep=False)]
    unique_players_multiple_clubs = players_multiple_clubs['Player'].unique()

    # Create a list to store combined stats
    combined_stats_list = []

    for player, group in data.groupby('Player'):
        combined_stats = {}
        if player in unique_players_multiple_clubs:
            # Apply weighted average for players with multiple clubs
            combined_stats = {col: weighted_avg(group, col, '90s') for col in normalized_features + percentage_features}
            combined_stats['Player'] = player
            #combined_stats['Total_90s'] = group['90s'].sum()

            # Handle specific columns
            combined_stats['Nation'] = group['Nation'].iloc[0]
            combined_stats['Pos'] = group['Pos'].iloc[0]
            combined_stats['Age'] = group['Age'].iloc[0]
            combined_stats['Born'] = group['Born'].iloc[0]
            combined_stats['MP'] = group['MP'].sum()
            combined_stats['Starts'] = group['Starts'].sum()
            combined_stats['Min'] = group['Min'].sum()
            combined_stats['90s'] = group['Min'].sum()
            combined_stats['Squad'] = '+'.join(group['Squad'].unique())
            combined_stats['Comp'] = '+'.join(group['Comp'].unique()) if len(group['Comp'].unique()) > 1 else group['Comp'].iloc[0]
            combined_stats['Main_Pos'] = group['Main_Pos'].iloc[0]
            combined_stats['Secondary_Pos'] = group['Secondary_Pos'].iloc[0]
            #combined_stats['Unnamed: 0']=group['Unnamed: 0'].iloc[0]
            # Sum specific columns
            for col in sum_columns:
                combined_stats[col] = group[col].sum()
        else:
            # Retain original stats for players with a single club
            combined_stats = group.iloc[0].to_dict()
            combined_stats['Total_90s'] = group['90s'].sum()

        combined_stats_list.append(combined_stats)

    # Convert list of dictionaries to DataFrame
    combined_stats_df = pd.DataFrame(combined_stats_list)

    return combined_stats_df



# Process each season's data
combined_stats_2022 = process_season(filtered_combined_data_2022)
combined_stats_2023 = process_season(filtered_combined_data_2023)
combined_stats_2024 = process_season(filtered_combined_data_2024)


combined_stats_2024


Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,MP,Starts,Min,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net,Main_Pos,Secondary_Pos,Total_90s
0,Aaron Wan-Bissaka,eng ENG,DF,Manchester Utd,eng Premier League,25.0,1997.0,19.8,22,20,1780,73.1,2.12,1.26,0.91,1.01,0.20,0.96,1.31,0.35,1.52,0.76,0.76,2.07,4.19,3.43,0.05,1.31,0.15,1.21,0.05,0.00,0.00,0.05,0.00,0.15,0.00,0.00,0.00,0.00,0.0,83.5,89.5,85.3,50.0,41.57,49.80,546.67,178.74,27.07,30.25,12.27,14.39,0.96,1.92,0.10,0.08,0.07,0.03,0.61,2.58,0.81,0.15,3.89,49.80,42.37,7.32,0.35,0.00,0.05,0.71,6.97,0.00,0.00,0.00,0.00,41.57,0.10,1.36,0.0,0.0,0.00,0.10,0.00,0.10,0.01,0.08,0.08,0.01,0.08,0.20,0.00,1.52,3.89,2.73,53.1,28.1,61.87,4.29,21.41,27.17,14.04,1.21,61.87,1.62,0.86,0.45,30.00,146.92,72.17,1.52,1.36,0.20,1.06,0.45,34.90,2.73,33.3,0.15,0.05,0.00,0.00,20.2,0.0,0,0,0.1,0.1,0.04,-0.1,-0.1,DF,,19.8
1,Aarón Martín,es ESP,DF,Genoa,it Serie A,26.0,1997.0,15.3,22,17,1376,40.9,1.37,0.65,0.46,0.65,0.26,0.59,1.44,0.85,0.78,0.26,0.52,0.59,1.96,0.98,0.00,2.16,0.13,1.31,0.78,0.07,0.00,0.00,0.00,0.13,0.00,0.00,0.00,0.00,0.0,69.6,90.3,74.4,42.2,28.17,40.46,506.41,178.24,12.75,14.12,11.05,14.84,3.53,8.37,0.07,0.11,0.11,-0.05,1.70,1.70,1.05,0.72,2.29,40.46,32.35,7.78,1.05,0.00,0.13,6.93,4.64,2.09,0.46,1.37,0.00,28.17,0.33,1.44,0.0,0.0,0.00,0.07,0.00,0.07,0.01,0.11,0.12,0.01,0.12,0.26,0.07,1.83,2.29,4.71,50.0,50.0,45.95,1.70,10.85,18.82,16.67,0.65,45.95,0.78,0.39,0.39,23.99,120.20,63.53,1.83,1.37,0.20,0.78,0.26,28.24,4.71,0.0,0.33,0.00,0.00,0.00,24.8,2.0,0,0,0.2,0.2,0.03,-0.2,-0.2,DF,,15.3
2,Abdoulaye Doucouré,ml MLI,"FW,MF",Everton,eng Premier League,30.0,1993.0,29.2,32,32,2629,39.1,1.47,0.92,0.68,0.48,0.31,0.62,1.58,0.96,0.79,0.10,0.68,0.51,1.99,0.65,0.00,2.29,0.17,2.02,0.00,0.00,0.21,0.03,0.03,0.10,0.00,0.00,0.07,0.00,0.0,76.5,84.1,83.0,47.5,23.39,30.58,335.86,79.76,13.05,15.51,8.18,9.86,0.96,2.02,0.03,0.10,0.05,-0.07,1.16,1.47,0.62,0.03,3.32,30.58,30.00,0.55,0.00,0.03,0.03,0.89,0.00,0.00,0.00,0.00,0.00,23.39,0.03,0.99,0.0,0.0,0.24,0.27,0.24,0.27,0.30,0.10,0.40,0.30,0.40,0.24,0.00,1.88,3.32,4.79,52.1,43.8,40.75,1.23,6.44,17.91,16.95,3.42,40.75,1.64,0.86,0.72,21.75,112.74,57.95,1.88,1.27,0.58,2.09,1.44,27.40,4.79,44.7,1.61,0.72,0.15,0.33,12.2,0.0,0,0,8.8,8.8,0.20,-1.8,-1.8,FW,MF,29.2
3,Achraf Hakimi,ma MAR,DF,Paris S-G,fr Ligue 1,24.0,1998.0,21.5,25,20,1932,50.0,1.35,0.88,0.65,0.42,0.28,0.79,1.58,0.79,1.30,0.19,1.12,0.56,1.91,1.30,0.09,4.33,0.65,3.67,0.05,0.09,0.23,0.23,0.05,0.60,0.00,0.00,0.05,0.00,0.0,86.5,92.0,88.1,61.6,79.63,92.05,1179.35,375.53,44.09,47.91,28.51,32.37,3.58,5.81,0.23,0.24,0.20,-0.01,2.00,7.12,2.09,0.60,10.14,92.05,82.47,9.44,1.35,0.19,0.28,3.67,7.44,0.65,0.05,0.33,0.00,79.63,0.14,1.95,0.0,0.0,0.19,0.42,0.19,0.42,0.20,0.24,0.44,0.20,0.44,0.14,0.00,3.91,10.14,10.70,47.4,52.6,103.12,3.16,19.77,45.40,38.84,3.12,103.12,2.65,1.26,1.40,81.95,428.60,229.86,3.91,3.63,1.02,1.53,1.53,78.09,10.70,28.6,1.96,0.56,0.10,0.33,19.8,9.0,0,0,4.2,4.2,0.10,-0.2,-0.2,DF,,21.5
4,Adam Marušić,me MNE,DF,Lazio,it Serie A,30.0,1992.0,34.4,37,37,3100,55.1,1.31,0.90,0.70,0.55,0.06,0.78,1.42,0.64,0.78,0.20,0.58,0.84,2.15,1.83,0.06,0.99,0.06,0.84,0.03,0.06,0.03,0.03,0.00,0.06,0.00,0.00,0.00,0.00,0.0,83.0,90.2,82.7,56.0,45.35,54.65,701.74,266.19,24.51,27.18,16.92,20.47,2.59,4.62,0.00,0.02,0.03,-0.02,0.29,3.20,0.70,0.32,3.43,54.65,45.78,8.84,0.58,0.00,0.12,1.28,8.26,0.00,0.00,0.00,0.00,45.35,0.03,0.78,0.0,0.0,0.03,0.03,0.03,0.03,0.01,0.02,0.04,0.01,0.04,0.09,0.03,1.02,3.43,3.14,43.8,43.8,61.13,2.97,20.20,29.88,11.37,0.64,61.13,0.93,0.41,0.41,28.46,127.27,68.84,1.02,0.90,0.09,0.70,0.15,38.78,3.14,37.5,0.23,0.09,0.13,0.33,19.9,0.0,0,0,0.4,0.4,0.05,0.6,0.6,DF,,34.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
689,Óscar Rodríguez Arnaiz,es ESP,"MF,FW",Getafe,es La Liga,25.0,1998.0,10.7,24,9,967,60.0,2.24,1.21,0.19,1.40,0.65,0.84,1.40,0.56,1.12,0.09,1.03,0.47,2.71,0.47,0.00,4.56,0.37,3.46,0.47,0.37,0.19,0.09,0.00,0.28,0.00,0.09,0.00,0.00,0.0,75.3,82.9,79.1,68.7,31.12,41.31,531.03,124.49,16.26,19.63,9.53,12.06,4.30,6.26,0.09,0.20,0.13,-0.10,1.96,3.64,1.03,0.09,4.86,41.31,39.07,2.15,0.56,0.28,0.75,1.59,0.28,1.03,0.28,0.37,0.00,31.12,0.09,1.40,0.0,0.0,0.19,0.28,0.19,0.28,0.14,0.20,0.34,0.14,0.34,0.84,0.00,1.21,4.86,4.49,45.2,51.6,55.61,0.65,5.23,26.82,24.21,2.80,55.61,2.90,1.31,1.50,29.91,169.91,70.75,1.21,2.24,0.37,3.27,1.12,38.79,4.49,26.5,3.16,0.84,0.06,0.22,24.9,4.0,0,0,1.5,1.5,0.04,0.5,0.5,MF,FW,10.7
690,Óscar Trejo,ar ARG,"FW,MF",Rayo Vallecano,es La Liga,35.0,1988.0,15.3,31,19,1380,61.9,1.76,0.98,0.72,0.59,0.46,0.85,1.37,0.52,1.37,0.07,1.31,0.33,2.09,0.59,0.00,4.77,0.33,3.27,0.26,0.33,0.20,0.65,0.07,0.13,0.00,0.00,0.13,0.07,0.0,82.6,89.9,84.6,53.6,42.22,51.11,561.83,160.59,26.73,29.74,11.11,13.14,1.96,3.66,0.00,0.08,0.14,-0.08,1.50,4.84,1.63,0.00,8.10,51.11,48.24,2.55,0.59,0.20,0.00,1.50,0.46,0.72,0.13,0.33,0.00,42.22,0.33,1.05,0.0,0.0,0.00,0.00,0.00,0.00,0.06,0.08,0.14,0.06,0.14,0.26,0.07,1.83,8.10,5.29,38.8,50.6,66.41,0.98,4.97,35.69,26.86,3.33,66.41,5.56,2.16,2.81,35.62,172.75,68.89,1.83,1.57,0.65,2.94,3.92,50.85,5.29,16.7,1.17,0.20,0.00,0.00,19.6,0.0,0,0,0.9,0.9,0.05,-0.9,-0.9,FW,MF,15.3
691,Óscar Valentín,es ESP,MF,Rayo Vallecano,es La Liga,28.0,1994.0,29.9,34,32,2692,45.5,3.58,2.21,1.30,2.07,0.20,1.54,3.38,1.84,2.07,0.33,1.74,1.17,4.75,1.74,0.03,1.40,0.00,1.10,0.03,0.07,0.07,0.07,0.07,0.00,0.00,0.00,0.00,0.00,0.0,81.2,87.4,84.5,59.8,33.61,41.37,559.60,189.97,16.25,18.60,14.18,16.79,2.54,4.25,0.00,0.02,0.02,-0.02,0.37,4.05,0.20,0.03,4.41,41.37,40.23,1.00,0.97,0.17,0.30,0.17,0.03,0.00,0.00,0.00,0.00,33.61,0.13,0.64,0.0,0.0,0.03,0.03,0.03,0.03,0.06,0.02,0.07,0.06,0.07,0.23,0.03,0.80,4.41,0.37,53.8,30.8,52.94,2.84,13.41,34.52,5.42,0.60,52.94,0.43,0.23,0.13,23.71,130.07,64.78,0.80,0.80,0.03,1.10,0.33,29.00,0.37,46.7,0.50,0.23,0.07,0.14,12.7,0.0,0,0,1.7,1.7,0.11,-0.7,-0.7,MF,,29.9
692,Óscar de Marcos,es ESP,DF,Athletic Club,es La Liga,34.0,1989.0,25.1,28,26,2258,59.2,2.23,1.55,1.04,0.96,0.24,1.67,2.83,1.16,1.12,0.24,0.88,0.60,2.83,1.75,0.00,1.83,0.28,1.59,0.20,0.00,0.00,0.04,0.00,0.28,0.00,0.00,0.00,0.00,0.0,79.4,89.5,80.4,56.3,49.52,62.39,811.91,316.14,25.26,28.21,19.00,23.63,4.10,7.29,0.20,0.12,0.14,0.08,0.84,5.22,1.47,1.04,5.70,62.39,50.28,11.87,1.20,0.04,0.12,4.82,10.64,0.04,0.04,0.00,0.00,49.52,0.24,1.31,0.0,0.0,0.04,0.24,0.04,0.24,0.03,0.12,0.15,0.03,0.15,0.20,0.00,2.03,5.70,4.98,27.3,72.7,70.28,2.39,14.94,34.58,21.08,1.35,70.28,0.88,0.24,0.64,34.70,141.35,80.16,2.03,1.59,0.24,0.60,0.24,40.92,4.98,40.0,0.20,0.08,0.20,0.50,15.7,0.0,0,0,0.6,0.6,0.13,0.4,0.4,DF,,25.1


In [74]:
combined_stats_2024[combined_stats_2024['Player']=='Kevin Vogt']

Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,MP,Starts,Min,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net,Main_Pos,Secondary_Pos,Total_90s
353,Kevin Vogt,de GER,DF,Hoffenheim+Union Berlin,de Bundesliga,31.0,1991.0,2638.0,32,28,2638,55.75,0.815,0.645,0.445,0.375,0.0,0.475,0.85,0.375,1.295,0.95,0.34,0.545,1.36,4.285,0.07,1.57,,1.56,0.0,0.0,0.0,0.0,0.0,0.27,0.0,0.0,0.0,0.0,0.0,86.6,94.0,89.8,72.6,55.31,63.88,1165.78,512.11,16.12,17.14,28.1,31.29,9.93,13.67,0.0,0.01,0.03,-0.01,0.14,5.37,0.75,0.0,5.71,63.88,61.02,2.79,1.97,0.0,0.41,0.2,0.68,0.0,0.0,0.0,0.0,55.31,0.07,0.48,,,0.0,0.0,0.0,0.0,0.05,0.01,0.05,0.05,0.05,0.14,0.0,1.09,5.71,0.82,83.3,16.7,71.5,6.05,32.24,36.67,2.93,0.07,71.5,0.41,0.34,0.07,42.79,224.08,129.86,1.09,0.48,0.0,0.27,0.07,48.84,0.82,50.0,0.14,0.07,0.0,0.0,37.6,0.0,0,0,1.4,1.4,0.68,-1.4,-1.4,DF,,


In [75]:
combined_stats_2023[combined_stats_2023['Player']=='João Cancelo']

Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,MP,Starts,Min,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net,Main_Pos,Secondary_Pos,Total_90s
331,João Cancelo,pt POR,DF,Manchester City+Bayern Munich,eng Premier League+de Bundesliga,28,1994,2286.0,32,27,2286,58.762205,2.044409,1.378583,0.984488,0.744646,0.315276,1.063307,1.84874,0.785433,0.825118,0.118504,0.706614,1.34,3.384409,1.216457,0.039134,2.914331,,2.401024,0.0,0.117953,0.235354,0.118504,0.039134,0.548425,0.0,0.078819,0.0,0.0,0.0,83.974016,93.188189,83.562205,61.666929,68.859213,81.933858,1109.411102,292.084094,38.53937,41.375354,24.290079,29.054173,5.117953,8.267559,0.197874,0.126142,0.171732,0.071732,0.986693,5.31252,2.248976,0.590315,6.573701,81.933858,73.737402,8.068504,1.260079,0.197323,1.025276,3.147953,6.808425,0.0,0.0,0.0,0.0,68.859213,0.117953,1.262835,,,0.117953,0.311417,0.117953,0.311417,0.06,0.126142,0.181732,0.06,0.181732,0.236457,0.039134,4.879843,6.573701,8.464882,51.461417,38.714173,93.58685,2.009685,17.679921,46.929843,29.801102,3.345276,93.58685,4.961417,2.559213,1.928661,58.10937,340.628661,191.810079,4.879843,2.520157,1.577008,1.499291,0.90622,67.755669,8.464882,24.522835,1.142677,0.276142,0.2,0.92,41.2,0.0,0,0,1.5,1.5,0.11,1.5,1.5,DF,,


In [76]:
combined_stats_2022[combined_stats_2022['Player']=='Dejan Kulusevski']

Unnamed: 0,Player,Nation,Pos,Squad,Comp,Age,Born,90s,MP,Starts,Min,Dribblers_Tackle_W%,Tackles_per90,Tackles_Won_per90,Tackles_Def_3rd_per90,Tackles_Mid_3rd_per90,Tackles_Att_3rd_per90,Dribblers_Tackled_per90,Dribblers_Challenged_per90,Dribblers_Tackle_Lost_per90,Blocks_per90,Shots_Blocked_per90,Passes_Blocked_Def_per90,Interceptions_per90,Tackles+Interceptions_per90,Clearances_per90,Errors_Shots_per90,Shot_Creating_Action_per90,Goal_Creating_Action_90,Pass_Live_Shot_per90,Pass_Dead_Shot_per90,Take_Ons_Shot_per90,Shot-Shot_per90,Fouls_drawn_Shot_per90,Defensive_Shot_per90,Pass_Live_Goal_per90,Pass_Dead_Goal_per90,Take_Ons_Goal_per90,Shot_Goal_per90,Fouls_Drawn_Goal_per90,Defensive_Goal_per90,Passes_Total_Cmp%,Passes_Short_Cmp%,Passes_Medium_Cmp%,Passes_Long_Cmp%,Passes_Total_Cmp_per90,Passes_Total_Att_per90,Passes_TotDist_per90,Passes_PrgDist_per90,Passes_Short_Cmp_per90,Passes_Short_Att_per90,Passes_Medium_Cmp_per90,Passes_Medium_Att_per90,Passes_Long_Cmp_per90,Passes_Long_Att_per90,Assists_per90,xAG_per90_x,xA_per90,A-xAG_per90,Key_Passes_per90,Passes_1/3_per90,Passes_Penalty_Area_per90,Crosses_Penalty_Area_per90,Progressive_Passes_per90,Passes_Attempted_per90,Live_Ball_Passes_per90,Dead_Ball_Passes_per90,Free_Kick_Passes_per90,Through_Balls_per90,Switches_per90,Crosses_per90,Throw_Ins_Taken_per90,Corner_Kicks_per90,In_Corner_Kicks_per90,Out_Corner_Kicks_per90,Str_Corner_Kicks_per90,Passes_Cmp_per90,Passes_Offside_per90,Passes_Blocked_Off_per90,PK,PK_Attempted,Goals_per90,G+A_per90,G-PK_per90,G+A-PK_per90,xG_per90,xAG_per90_y,xG+xAG_per90,npxG_per90,npxG+xAG_per90,Yellow_per90,Red_per90,Prg_Carries_per90,Prg_Passes_per90,Prg_Passes_Received_per90,Take_Ons_Succ%,Tackled_Take_Ons%,Touches_per90,Touches_Def_Pen_per90,Touches_Def_3rd_per90,Touches_Mid_3rd_per90,Touches_Att_3rd_per90,Touches_Att_Pen_per90,Tocuhes_Live_Balls_per90,Take_Ons_Attempted_per90,Take_Ons_Succ_per90,Tackled_Take_Ons_per90,Carries_per90,Total_Distance_per90,Progressive_Distance_Carried_per90,Progressive_Carries_per90,1/3_Carries_per90,Carries_Penalty_Area_per90,Miscontrols_per90,Dispossessed_per90,Passes_Received_per90,Progressive_Passes_Received_per90,Shots_on_target_%,Shots_total_per90,Shots_on_target_per90,Goals_per_shot,Goals_per_shot_on_target,Average_shot_distance,Shots_free_kicks,Pens_Scored,Pens_Attempted,XG,Npxg,Npxg_per_shot,Xg_net,Npxg_net,Main_Pos,Secondary_Pos,Total_90s
148,Dejan Kulusevski,se SWE,"MF,FW",Tottenham,eng Premier League,21,2000,14.0,18,14,1261,21.7,1.36,0.93,0.5,0.71,0.14,0.36,1.64,1.29,1.0,0.0,1.0,0.93,2.29,0.21,0.0,3.64,0.93,2.79,0.0,0.36,0.43,0.07,0.0,0.79,0.0,0.07,0.07,0.0,0.0,83.1,90.2,84.3,70.8,28.86,34.71,387.93,81.71,18.5,20.5,7.64,9.07,1.21,1.71,0.57,0.24,0.19,0.33,1.79,1.07,1.71,0.36,2.5,34.71,32.5,2.14,0.21,0.5,0.0,1.79,0.29,0.07,0.0,0.0,0.0,28.86,0.07,1.21,0.0,0.0,0.36,0.93,0.36,0.93,0.2,0.25,0.44,0.2,0.44,0.21,0.0,3.64,2.5,7.14,56.8,43.2,45.93,0.29,6.93,18.43,21.21,4.79,45.93,3.14,1.79,1.36,28.5,185.29,90.29,3.64,1.93,1.71,2.71,1.29,32.07,7.14,34.6,1.86,0.64,0.19,0.56,16.9,0,0,0,2.7,2.7,0.11,2.3,2.3,MF,FW,14.0


In [77]:
combined_stats_2024.to_csv('FBRef Data/2024/Filtered_Combined_Data_2024.csv')
combined_stats_2022.to_csv('FBRef Data/2022/Filtered_Combined_Data_2022.csv')
combined_stats_2023.to_csv('FBRef Data/2023/Filtered_Combined_Data_2023.csv')


In [78]:
for col in combined_stats_2024.columns:
  print(col)

Player
Nation
Pos
Squad
Comp
Age
Born
90s
MP
Starts
Min
Dribblers_Tackle_W%
Tackles_per90
Tackles_Won_per90
Tackles_Def_3rd_per90
Tackles_Mid_3rd_per90
Tackles_Att_3rd_per90
Dribblers_Tackled_per90
Dribblers_Challenged_per90
Dribblers_Tackle_Lost_per90
Blocks_per90
Shots_Blocked_per90
Passes_Blocked_Def_per90
Interceptions_per90
Tackles+Interceptions_per90
Clearances_per90
Errors_Shots_per90
Shot_Creating_Action_per90
Goal_Creating_Action_90
Pass_Live_Shot_per90
Pass_Dead_Shot_per90
Take_Ons_Shot_per90
Shot-Shot_per90
Fouls_drawn_Shot_per90
Defensive_Shot_per90
Pass_Live_Goal_per90
Pass_Dead_Goal_per90
Take_Ons_Goal_per90
Shot_Goal_per90
Fouls_Drawn_Goal_per90
Defensive_Goal_per90
Passes_Total_Cmp%
Passes_Short_Cmp%
Passes_Medium_Cmp%
Passes_Long_Cmp%
Passes_Total_Cmp_per90
Passes_Total_Att_per90
Passes_TotDist_per90
Passes_PrgDist_per90
Passes_Short_Cmp_per90
Passes_Short_Att_per90
Passes_Medium_Cmp_per90
Passes_Medium_Att_per90
Passes_Long_Cmp_per90
Passes_Long_Att_per90
Assists_per9