In [1]:
import pandas as pd
import numpy as np
from PIL import Image
from mplsoccer import PyPizza, add_image, FontManager
import urllib.request

In [2]:

# Display the current settings
print("Current maximum number of rows: ", pd.get_option('display.max_rows'))
print("Current maximum number of columns: ", pd.get_option('display.max_columns'))

# Set new maximum number of rows and columns
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)

# Display the updated settings
print("Updated maximum number of rows: ", pd.get_option('display.max_rows'))
print("Updated maximum number of columns: ", pd.get_option('display.max_columns'))


Current maximum number of rows:  60
Current maximum number of columns:  20
Updated maximum number of rows:  500
Updated maximum number of columns:  500


In [3]:
import os

folder_path = "../../Data Collection"  # Replace with the actual folder path

files = os.listdir(folder_path+'/2020/')

for file in files:
    print(file)


Miscellaneous_2020.csv
Players_Standard_2020.csv
Passing_2020.csv
Defense_2020.csv
Passing_Type_2020.csv
Shooting_2020.csv
Possession_2020.csv
Goal_Creating_Actions_2020.csv


In [4]:
def filter_player(dataframe, player_name='Fabinho'):
    return dataframe[dataframe['Player'] == player_name]

pas_2020 = filter_player(pd.read_csv(folder_path+'/2020/Passing_2020.csv').fillna(0))
pas_2022 = filter_player(pd.read_csv(folder_path+'/2022/Passing_2022.csv').fillna(0))
pas_2019 = filter_player(pd.read_csv(folder_path+'/2019/Passing_2019.csv').fillna(0))
gca_2020 = filter_player(pd.read_csv(folder_path+'/2020/Goal_Creating_Actions_2020.csv').fillna(0))
gca_2022 = filter_player(pd.read_csv(folder_path+'/2022/Goal_Creating_Actions_2022.csv').fillna(0))
gca_2019 = filter_player(pd.read_csv(folder_path+'/2019/Goal_Creating_Actions_2019.csv').fillna(0))
poss_2020 = filter_player(pd.read_csv(folder_path+'/2020/Possession_2020.csv').fillna(0))
poss_2022 = filter_player(pd.read_csv(folder_path+'/2022/Possession_2022.csv').fillna(0))
poss_2019 = filter_player(pd.read_csv(folder_path+'/2019/Possession_2019.csv').fillna(0))
sh_2020 = filter_player(pd.read_csv(folder_path+'/2020/Shooting_2020.csv').fillna(0))
sh_2022 = filter_player(pd.read_csv(folder_path+'/2022/Shooting_2022.csv').fillna(0))
sh_2019 = filter_player(pd.read_csv(folder_path+'/2019/Shooting_2019.csv').fillna(0))
past_2020 = filter_player(pd.read_csv(folder_path+'/2020/Passing_Type_2020.csv').fillna(0))
past_2022 = filter_player(pd.read_csv(folder_path+'/2022/Passing_Type_2022.csv').fillna(0))
past_2019 = filter_player(pd.read_csv(folder_path+'/2019/Passing_Type_2019.csv').fillna(0))
misc_2020 = filter_player(pd.read_csv(folder_path+'/2020/Miscellaneous_2020.csv').fillna(0))
misc_2022 = filter_player(pd.read_csv(folder_path+'/2022/Miscellaneous_2022.csv').fillna(0))
misc_2019 = filter_player(pd.read_csv(folder_path+'/2019/Miscellaneous_2019.csv').fillna(0))
st_2020 = filter_player(pd.read_csv(folder_path+'/2020/Players_Standard_2020.csv').fillna(0))
st_2022 = filter_player(pd.read_csv(folder_path+'/2022/Players_Standard_2022.csv').fillna(0))
st_2019 = filter_player(pd.read_csv(folder_path+'/2019/Players_Standard_2019.csv').fillna(0))


In [5]:
# Create new columns by dividing existing columns by '90s'
columns_to_divide_by_90s = ['Prg_Carries', 'Prg_Passes', 'Prg_Passes_Received']

for column in columns_to_divide_by_90s:
    st_2020[column + '_per_90'] = round(st_2020[column] / st_2020['90s'],2)
    st_2022[column + '_per_90'] = round(st_2022[column] / st_2022['90s'],2)
    st_2019[column + '_per_90'] = round(st_2019[column] / st_2019['90s'],2)

# The DataFrame 'st_2020' now has new columns with values divided by '90s'
st_2020.columns


Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       'MP', 'Starts', 'Min', '90s', 'Goals', 'Assists', 'G+A', 'G-PK', 'PK',
       'PKatt', 'Yellow', 'Red', 'xG', 'npxG', 'xAG', 'npxG+xAG',
       'Prg_Carries', 'Prg_Passes', 'Prg_Passes_Received', 'Goals_per90',
       'Assits_per90', 'G+A_per90', 'G-PK_per90', 'G+A-PK_per90', 'xG_per90',
       'xAG_per90', 'xG+xAG_per90', 'npxG_per90', 'npxG+xAG_per90', 'Matches',
       'Prg_Carries_per_90', 'Prg_Passes_per_90',
       'Prg_Passes_Received_per_90'],
      dtype='object')

In [6]:
# Create new columns by dividing existing columns by '90s'
columns_to_divide_by_90s = ['Touches', 'Touches_Def_Pen', 'Touches_Def_3rd', 'Touches_Mid_3rd', 'Touches_Att_3rd',
                            'Touches_Att_Pen', 'Tocuhes_Live_Balls', 'Take_Ons_Attempted', 'Take_Ons_Succ',
                            'Tackled_Take_Ons', 'Carries', 'Total_Distance', 'Progressive_Distance_Carried',
                            'Progressive_Carries', '1/3_Carries', 'Carries_Penalty_Area', 'Miscontrols', 'Dispossessed',
                            'Passes_Received', 'Progressive_Passes_Received']

for column in columns_to_divide_by_90s:
    poss_2020[column + '_per_90'] = round(poss_2020[column] / poss_2020['90s'],2)
    poss_2022[column + '_per_90'] = round(poss_2022[column] / poss_2022['90s'],2)
    poss_2019[column + '_per_90'] = round(poss_2019[column] / poss_2019['90s'],2)

# The DataFrame 'poss_2020' now has new columns with values divided by '90s'
poss_2020.columns


Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Touches', 'Touches_Def_Pen', 'Touches_Def_3rd',
       'Touches_Mid_3rd', 'Touches_Att_3rd', 'Touches_Att_Pen',
       'Tocuhes_Live_Balls', 'Take_Ons_Attempted', 'Take_Ons_Succ',
       'Take_Ons_Succ%', 'Tackled_Take_Ons', 'Tackled_Take_Ons%', 'Carries',
       'Total_Distance', 'Progressive_Distance_Carried', 'Progressive_Carries',
       '1/3_Carries', 'Carries_Penalty_Area', 'Miscontrols', 'Dispossessed',
       'Passes_Received', 'Progressive_Passes_Received', 'Matches',
       'Touches_per_90', 'Touches_Def_Pen_per_90', 'Touches_Def_3rd_per_90',
       'Touches_Mid_3rd_per_90', 'Touches_Att_3rd_per_90',
       'Touches_Att_Pen_per_90', 'Tocuhes_Live_Balls_per_90',
       'Take_Ons_Attempted_per_90', 'Take_Ons_Succ_per_90',
       'Tackled_Take_Ons_per_90', 'Carries_per_90', 'Total_Distance_per_90',
       'Progressive_Distance_Carried_per_90', 'Progressive_Carries_per_90',
       '1/3_

In [7]:
# Create new columns by dividing existing columns by '90s'
columns_to_divide_by_90s = ['Pass_Live_Shot', 'Pass_Dead_Shot',
       'Take_Ons_Shot', 'Shot-Shot', 'Fouls_drawn_Shot', 'Defensive_Shot',
        'Pass_Live_Goal','Pass_Dead_Goal', 'Take_Ons_Goal', 'Shot_Goal', 'Fouls_Drawn_Goal',
       'Defensive_Goal']

for column in columns_to_divide_by_90s:
    gca_2020[column + '_per_90'] = round(gca_2020[column] / gca_2020['90s'],2)
    gca_2022[column + '_per_90'] = round(gca_2022[column] / gca_2022['90s'],2)
    gca_2019[column + '_per_90'] = round(gca_2019[column] / gca_2019['90s'],2)

# The DataFrame 'gca_2020' now has new columns with values divided by '90s'
gca_2020.columns


Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Shot_Creating_Action', 'Shot_Creating_Action_per90',
       'Pass_Live_Shot', 'Pass_Dead_Shot', 'Take_Ons_Shot', 'Shot-Shot',
       'Fouls_drawn_Shot', 'Defensive_Shot', 'Goal_Creating_Action',
       'Goal_Creating_Action_90', 'Pass_Live_Goal', 'Pass_Dead_Goal',
       'Take_Ons_Goal', 'Shot_Goal', 'Fouls_Drawn_Goal', 'Defensive_Goal',
       'Matches', 'Pass_Live_Shot_per_90', 'Pass_Dead_Shot_per_90',
       'Take_Ons_Shot_per_90', 'Shot-Shot_per_90', 'Fouls_drawn_Shot_per_90',
       'Defensive_Shot_per_90', 'Pass_Live_Goal_per_90',
       'Pass_Dead_Goal_per_90', 'Take_Ons_Goal_per_90', 'Shot_Goal_per_90',
       'Fouls_Drawn_Goal_per_90', 'Defensive_Goal_per_90'],
      dtype='object')

In [8]:
pas_2022.columns

Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Passes_Total_Cmp', 'Passes_Total_Att', 'Passes_Total_Cmp%',
       'Passes_TotDist', 'Passes_PrgDist', 'Passes_Short_Cmp',
       'Passes_Short_Att', 'Passes_Short_Cmp%', 'Passes_Medium_Cmp',
       'Passes_Medium_Att', 'Passes_Medium_Cmp%', 'Passes_Long_Cmp',
       'Passes_Long_Att', 'Passes_Long_Cmp%', 'Assists', 'xAG', 'xA', 'A-xAG',
       'Key_Passes', 'Passes_1/3', 'Passes_Penalty_Area',
       'Crosses_Penalty_Area', 'Progressive_Passes', 'Matches'],
      dtype='object')

In [9]:
# Create new columns by dividing existing columns by '90s'
columns_to_divide_by_90s = ['Assists','xAG', 'xA', 'A-xAG', 'Key_Passes', 'Passes_1/3', 
                            'Passes_Penalty_Area','Crosses_Penalty_Area', 'Progressive_Passes']

for column in columns_to_divide_by_90s:
    pas_2020[column + '_per_90'] = round(pas_2020[column] / pas_2020['90s'],2)
    pas_2022[column + '_per_90'] = round(pas_2022[column] / pas_2022['90s'],2)
    pas_2019[column + '_per_90'] = round(pas_2019[column] / pas_2019['90s'],2)

# The DataFrame 'pas_2020' now has new columns with values divided by '90s'
pas_2020.columns


Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Passes_Total_Cmp', 'Passes_Total_Att', 'Passes_Total_Cmp%',
       'Passes_TotDist', 'Passes_PrgDist', 'Passes_Short_Cmp',
       'Passes_Short_Att', 'Passes_Short_Cmp%', 'Passes_Medium_Cmp',
       'Passes_Medium_Att', 'Passes_Medium_Cmp%', 'Passes_Long_Cmp',
       'Passes_Long_Att', 'Passes_Long_Cmp%', 'Assists', 'xAG', 'xA', 'A-xAG',
       'Key_Passes', 'Passes_1/3', 'Passes_Penalty_Area',
       'Crosses_Penalty_Area', 'Progressive_Passes', 'Matches',
       'Assists_per_90', 'xAG_per_90', 'xA_per_90', 'A-xAG_per_90',
       'Key_Passes_per_90', 'Passes_1/3_per_90', 'Passes_Penalty_Area_per_90',
       'Crosses_Penalty_Area_per_90', 'Progressive_Passes_per_90'],
      dtype='object')

In [10]:
# Create new columns by dividing existing columns by '90s'
columns_to_divide_by_90s = ['Passes_Attempted', 'Live_Ball_Passes',
       'Dead_Ball_Passes', 'Free_Kick_Passes', 'Through_Balls', 'Switches',
       'Crosses', 'Throw_Ins_Taken', 'Corner_Kicks', 'In_Corner_Kicks',
       'Out_Corner_Kicks', 'Str_Corner_Kicks', 'Passes_Cmp', 'Passes_Off',
       'Passes_Blocked']

for column in columns_to_divide_by_90s:
    past_2020[column + '_per_90'] = round(past_2020[column] / past_2020['90s'],2)
    past_2022[column + '_per_90'] = round(past_2022[column] / past_2022['90s'],2)
    past_2019[column + '_per_90'] = round(past_2019[column] / past_2019['90s'],2)

# The DataFrame 'past_2020' now has new columns with values divided by '90s'
past_2020.columns


Index(['Unnamed: 0', 'Player', 'Nation', 'Pos', 'Squad', 'Comp', 'Age', 'Born',
       '90s', 'Passes_Attempted', 'Live_Ball_Passes', 'Dead_Ball_Passes',
       'Free_Kick_Passes', 'Through_Balls', 'Switches', 'Crosses',
       'Throw_Ins_Taken', 'Corner_Kicks', 'In_Corner_Kicks',
       'Out_Corner_Kicks', 'Str_Corner_Kicks', 'Passes_Cmp', 'Passes_Off',
       'Passes_Blocked', 'Matches', 'Passes_Attempted_per_90',
       'Live_Ball_Passes_per_90', 'Dead_Ball_Passes_per_90',
       'Free_Kick_Passes_per_90', 'Through_Balls_per_90', 'Switches_per_90',
       'Crosses_per_90', 'Throw_Ins_Taken_per_90', 'Corner_Kicks_per_90',
       'In_Corner_Kicks_per_90', 'Out_Corner_Kicks_per_90',
       'Str_Corner_Kicks_per_90', 'Passes_Cmp_per_90', 'Passes_Off_per_90',
       'Passes_Blocked_per_90'],
      dtype='object')

In [11]:
misc_2020['90s']=st_2020['90s']
misc_2022['90s']=st_2022['90s']
misc_2019['90s']=st_2019['90s']