# Player and Club Statistics Data

In [2]:
# Import required libraries
import pandas as pd
import time

# Database connector 
import mysql.connector
from mysql.connector import Error

In [3]:
# Connect to the database
connection = mysql.connector.connect(
    host = 'localhost',
    port = 3306,
    user = 'root',
    password = '#',
    database = 'arsenaldb'
)

# Function to execute a SQL query 
def execute_query(connection, query):
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        connection.commit()
        print("Successful")
    except Error as err:
        print(f"Error: '{err}'")

# Read and Run Query
def read_query(connection, query):
    cursor = connection.cursor()
    result = None
    try:
        cursor.execute(query)
        result = cursor.fetchall() # Reads data from the database without making any changes to it.
        return result
    except Error as err:
        print(f'Error: "{err}"')

# Use DB
execute_query(connection, """USE ArsenalDB""")

Successful


In [4]:
# Assign the seasons accordingly
season_mapping = {
    2023:'2023-2024',
    2022:'2022-2023',
    2021:'2021-2022',
    2020:'2020-2021',
    2019:'2019-2020'
}

# Function to choose the appropriate season URL
def foo_url(season):
    if season == 2023:
        return 'https://fbref.com/en/squads/18bb7c10/Arsenal-Stats'
    
    return f'https://fbref.com/en/squads/18bb7c10/{season_mapping[season]}/Arsenal-Stats'


In [5]:
# HTML Table tags
standard_stats = {"id":"stats_standard_9"}
goalkeeping = {"id":"stats_keeper_9"}
shooting = {"id":"stats_shooting_9"}
passing = {"id":"stats_passing_9"}
defensive = {"id":"stats_defense_9"}
possession = {"id":"stats_possession_9"}
miscell_stats = {"id":"stats_misc_9"}

# Function that takes the season and desired stats we need to scrape then returns a table

def foo_table(season, attribute):
        
        # Add a 3 second delay to prevent HTTP request error
        time.sleep(3) 

        data = pd.read_html(foo_url(season), attrs = attribute)[0]

        # New Columns
        new_columns = []
        for col in data.columns:
            if col[0] == 'Per 90 Minutes':
                new_col_name = f"{col[1]}_per90"
            else:
                new_col_name = col[1]
            new_columns.append(new_col_name)

        # Assign the new column names to the DataFrame
        data.columns = new_columns 

        # Clean Data Accordingly
        clean_data = None

        # Goalkeeping
        if attribute == goalkeeping:
            clean_data = data.iloc[:,[0,4,6,8,9,10,11,12,16,18,20]] # Filter Columns 
            clean_data.rename(columns = {'Player':'PlayerName','Save%': 'SavePercentage','SoTA': 'ShotsOnTargetAgaint',
                                         'MP': 'MatchesPlayed' , 'Min': 'MinutesPlayed', 'GA':'GoalAgainst',
                                         'CS': 'CleanSheet', 'PKatt':'PenaltyFaced', 'PKsv':'PenaltySaved'}, inplace=True) # Rename Column Names

        # Passing  
        elif attribute == passing:
             clean_data = data.iloc[:,[0,5,6,7,23,24,25]]
             clean_data.rename(columns = {'Player':'PlayerName','Cmp' : 'CompletedPass',
                                          'Att' : 'AttemptedPass',
                                          'Cmp%':'PassCompletionPercent',
                                          'KP' : 'KeyPass',
                                          '1/3': 'KeyPass_Final3rd',
                                          'PPA' : 'PassesPenaltyArea'}, inplace=True)

        # Shooting Stats  
        elif attribute == shooting:
             clean_data = data.loc[:,['Player','Gls','Sh', 'SoT', 'SoT%',
                                   'Sh/90', 'SoT/90', 'G/Sh', 'G/SoT', 'Dist', 'xG']]
             clean_data.rename(columns = {'Player':'PlayerName','Gls':'Goals', 'Sh':'Shots', 'SoT':'ShotsonTarget', 
                                          'SoT%': 'ShotsonTargetPercent', 'Sh/90':'Shots_Per90', 
                                          'SoT/90':'ShotsonTarget_Per90', 'G/Sh':'GoalsPerShot', 
                                          'G/SoT':'GoalsPerShotsonTarget', 'Dist':'ShotDistance'}, inplace= True)
             
        # Standard Stats    
        elif attribute == standard_stats:
             clean_data = data.loc[:,['Player', 'MP', 'Starts', 'Min','Gls',
                                   'Ast', 'G+A', 'G-PK', 'PK','CrdY', 'CrdR', 'xG', 'xAG','PrgC', 'PrgP', 'PrgR',
                                   'Gls_per90', 'Ast_per90', 'G+A_per90','xG_per90', 'xAG_per90']]
             clean_data.rename(columns = {'Player':'PlayerName','MP':'MatchesPlayed','Min':'MinutesPlayed',
                                          'Gls':'Goals', 'Ast':'Assists', 'G+A':'GoalContribution', 'G-PK':'NonPenaltyGoals',
                                        'PK':'PenaltyScored','CrdY':'YellowCard', 'CrdR':'RedCard','PrgC':'ProgressiveCarries',
                                        'PrgP':'ProgressivePasses', 'PrgR':'ProgressivePassesRecevied', 'Gls_per90' : 'Goals_Per90',
                                        'Ast_per90':'Assists_Per90', 'G+A_per90':'GoalContribution_Per90','xG_per90':'xG_Per90',
                                        'xAG_per90':'xAG_Per90'}, inplace=True)
             
        # Defensive     
        elif attribute == defensive:
             clean_data = data.iloc[:,[0,5,6,14,17,19]]
             clean_data.rename(columns = {'Player':'PlayerName','Int' : 'Interceptions', 'Tkl':'Tackles', 'TklW': 'TacklesWon',
                                          'Clr':'Clearances'}, inplace=True)


        # Miscellinious Stas     
        elif attribute == miscell_stats:
             clean_data = data.loc[:,['Player', 'Fls','Fld', 'Off', 'Recov', 'Won','Lost']]
             clean_data.rename(columns = {'Player':'PlayerName','Fls':'FoulsCommitted','Fld':'FoulsDrawn', 'Off':'Offside', 
                                          'Recov':'BallRecoveries', 'Won':'AerialDuelsWon','Lost':'AerialDuelsLost'}, inplace=True)

        # Possession
        elif attribute == possession:
             clean_data = data.loc[:,['Player','Touches', 'Def Pen', 'Att', 'Succ','Succ%', 'Carries','TotDist']]
             clean_data.rename(columns={'Player':'PlayerName','Def Pen':'TouchesDefense', 'Att': 'AttemptedTakeons',
                                        'Succ':'SuccessfulTakeons', 'Succ%':'SuccessfulTakeonsPercent',
                                        'TotDist':'TotalCarryDistance'}, inplace=True)

        # Update names 
        # Update names in wages table to match players table
        # Sead Kolašinac
        clean_data.loc[clean_data['PlayerName'] == "Sead Kolašinac", 'PlayerName'] = "Sead Kolasinac"

        # Fabio Viera
        clean_data.loc[clean_data['PlayerName'] == "Fabio Vieira", 'PlayerName'] = "Fábio Vieira"

        # Timber
        clean_data.loc[clean_data['PlayerName'] == "Jurriën Timber", 'PlayerName'] = "Jurrien Timber"

        # Hein
        clean_data.loc[clean_data['PlayerName'] == "Karl Jakob Hein", 'PlayerName'] = "Karl Hein"

        # Add respective season
        clean_data['SeasonName'] = season_mapping[season]

        # Fill blank cells with 0
        clean_data = clean_data.fillna(0)
        

        return clean_data

Extract Season Stats

In [None]:
# Passing Data
df_pass23 = foo_table(2023, passing)
df_pass22 = foo_table(2022, passing)
df_pass21 = foo_table(2021, passing)
df_pass20 = foo_table(2020, passing)
df_pass19 = foo_table(2019, passing)

# Shooting Data
df_shoot23 = foo_table(2023, shooting)
df_shoot22 = foo_table(2022, shooting)
df_shoot21 = foo_table(2021, shooting)
df_shoot20 = foo_table(2020, shooting)
df_shoot19 = foo_table(2019, shooting)

# Goalkeeping
df_gk23 = foo_table(2023, goalkeeping)
df_gk22 = foo_table(2022, goalkeeping)
df_gk21 = foo_table(2021, goalkeeping)
df_gk20 = foo_table(2020, goalkeeping)
df_gk19 = foo_table(2019, goalkeeping)

# Standard Stats
df_ss23 = foo_table(2023, standard_stats)
df_ss22 = foo_table(2022, standard_stats)
df_ss21 = foo_table(2021, standard_stats)
df_ss20 = foo_table(2020, standard_stats)
df_ss19 = foo_table(2019, standard_stats)

# Defensive
df_defense23 = foo_table(2023, defensive)
df_defense22 = foo_table(2022, defensive)
df_defense21 = foo_table(2021, defensive)
df_defense20 = foo_table(2020, defensive)
df_defense19 = foo_table(2019, defensive)

# Possession
df_poss23 = foo_table(2023, possession)
df_poss22 = foo_table(2022, possession)
df_poss21 = foo_table(2021, possession)
df_poss20 = foo_table(2020, possession)
df_poss19 = foo_table(2019, possession)

# Miscell
df_miscel23 = foo_table(2023, miscell_stats)
df_miscel22 = foo_table(2022, miscell_stats)
df_miscel21 = foo_table(2021, miscell_stats)
df_miscel20 = foo_table(2020, miscell_stats)
df_miscel19 = foo_table(2019, miscell_stats)


In [6]:
# Function to join all tables by stats
def prepare(data):

    # Join individual tables together
    df_join = pd.concat(data, ignore_index=True)

    # Drop both Squad Total and Opponent Total, irrelevant to analysis
    df_joined = df_join.drop(df_join[(df_join['PlayerName'] == 'Opponent Total') | (df_join['PlayerName'] == 'Squad Total')].index).reset_index(drop=True)

    # Create a total to store the squad total for each stat
    df_total = df_join[df_join['PlayerName'] == 'Squad Total'].reset_index(drop=True)
    df_total = df_total.iloc[:,1:]


    return df_joined, df_total

In [215]:
# Store historical dataframes into a list
miscell_stats = [df_miscel19, df_miscel20, df_miscel21, df_miscel22, df_miscel23]
shooting = [df_shoot19, df_shoot20, df_shoot21, df_shoot22, df_shoot23]
passing = [df_pass19, df_pass20, df_pass21, df_pass22, df_pass23]
possession = [df_poss19, df_poss20, df_poss21, df_poss22, df_poss23]
defensive = [df_defense19, df_defense20, df_defense21, df_defense22, df_defense23]
standard_stats = [df_ss19, df_ss20, df_ss21, df_ss22, df_ss23]
goalkeeping = [df_gk19, df_gk20, df_gk21, df_gk22, df_gk23]

# Join the tables
df_player_pass, df_squad_pass = prepare(passing)
df_player_shooting, df_squad_shooting = prepare(shooting)
df_player_possession, df_squad_possession = prepare(possession)
df_player_miscell_stats, df_squad_miscell_stats = prepare(miscell_stats)
df_player_defensive,df_squad_defensive = prepare(defensive)
df_player_goalkeeping, df_squad_goalkeeping = prepare(goalkeeping)
df_player_standard_stats, df_squad_standard_stats = prepare(standard_stats)

Pull Season and Player ID Information

In [7]:
# Season
season_table = """SELECT *FROM season"""
season_result = read_query(connection, season_table)
season_columns = ['SeasonID','SeasonName','League']

# Convert to pandas dataframe
df_season = pd.DataFrame(season_result, columns=season_columns)
df_season

Unnamed: 0,SeasonID,SeasonName,League
0,1,2019-2020,English Premier League
1,2,2020-2021,English Premier League
2,3,2021-2022,English Premier League
3,4,2022-2023,English Premier League
4,5,2023-2024,English Premier League


In [8]:
# Players 
players_table = """SELECT * FROM players"""
players_result = read_query(connection, players_table)
players_column = ['PlayerID','PlayerName','NationCode','Nation']

# Convert to a dataframe
df_players = pd.DataFrame(players_result, columns=players_column)
df_players.head()

Unnamed: 0,PlayerID,PlayerName,NationCode,Nation
0,1,Bernd Leno,de,Germany
1,2,Matt Macey,eng,England
2,3,Sokratis Papastathopoulos,gr,Greece
3,4,Shkodran Mustafi,de,Germany
4,5,Rob Holding,eng,England


Join the tables

In [9]:
# Define a function that merges the tables, convert to a dictinary and insert to database

def join_player(stats_table, season_table, players_table):

    # Merge the tables
    df_table = pd.merge(stats_table, season_table, on = 'SeasonName', how = 'left')
    df_full = pd.merge(df_table, players_table, on = 'PlayerName', how = 'left')
    
    return df_full

In [10]:
# Merge squad stats to add seasonid
def join_squad(squad_stat_tables, season_table):

    df_table = pd.merge(squad_stat_tables, season_table, on = 'SeasonName', how = 'left')

    return df_table

### Standard Stats

In [170]:
# Drop Table
execute_query(connection, """DROP TABLE IF EXISTS p_standard_stats""")
execute_query(connection, """DROP TABLE IF EXISTS c_standard_stats""")

# Create Table
p_standard_stats_table = """
CREATE TABLE p_standard_stats(
    SeasonID INT NOT NULL,
    PlayerID INT NOT NULL,
    PlayerName VARCHAR(80),
	MatchesPlayed INT NOT NULL, 
	Starts INT NOT NULL, 
	MinutesPlayed INT NOT NULL,
	Goals INT NOT NULL,
	Assists INT NOT NULL,
	GoalContribution INT NOT NULL, 
	NonPenaltyGoals INT NOT NULL,
	PenaltyScored INT NOT NULL, 
	YellowCard INT NOT NULL, 
	RedCard INT NOT NULL, 
	xG DECIMAL(3,1) NOT NULL, 
	xAG DECIMAL(3,1) NOT NULL, 
	ProgressiveCarries INT NOT NULL, 
	ProgressivePasses INT NOT NULL, 
	ProgressivePassesRecevied INT NOT NULL
)
"""

c_standard_stats_table = """
CREATE TABLE c_standard_stats(
	SeasonID INT NOT NULL,
	MatchesPlayed INT NOT NULL, 
	Goals INT NOT NULL,
    xG DECIMAL(3,1) NOT NULL,
	Assists INT NOT NULL,
    xAG DECIMAL(3,1) NOT NULL,
	GoalContribution INT NOT NULL, 
	NonPenaltyGoals INT NOT NULL,
	PenaltyScored INT NOT NULL, 
	YellowCard INT NOT NULL, 
	RedCard INT NOT NULL, 
	ProgressiveCarries INT NOT NULL, 
	ProgressivePasses INT NOT NULL, 
	ProgressivePassesRecevied INT NOT NULL,
    Goals_Per90 INT NOT NULL,
    Assists_Per90 INT NOT NULL, 
    GoalContribution_Per90 INT NOT NULL,
    xG_Per90 INT NOT NULL,
    xAG_Per90 INT NOT NULL
)
"""

execute_query(connection, p_standard_stats_table)
execute_query(connection, c_standard_stats_table)

Successful
Successful
Successful
Successful


In [None]:
# Player Standard Stats
p_standard_stats = join_player(df_player_standard_stats, df_season, df_players)
# Filter for needed data
p_standard_stats = p_standard_stats.loc[:,['SeasonID','PlayerID','PlayerName','MatchesPlayed','Starts','MinutesPlayed',
                                           'Goals','Assists','GoalContribution','NonPenaltyGoals','PenaltyScored','YellowCard',
                                           'RedCard','xG','xAG','ProgressiveCarries','ProgressivePasses', 'ProgressivePassesRecevied']]

p_standard_stats = p_standard_stats.dropna(subset = ['PlayerID'])

# Convert to a dictionaryt and insert table into database
dict_pss = p_standard_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_pss)):
    if i in dict_pss:
        columns = ", ".join(dict_pss[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_pss[i].values())

        sql_query = f'INSERT INTO p_standard_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

In [172]:
# Club Standard Stats
c_standard_stats = join_squad(df_squad_standard_stats, df_season)
c_standard_stats = c_standard_stats.loc[:,['SeasonID','MatchesPlayed','Goals','xG','xAG','Assists','GoalContribution','NonPenaltyGoals','PenaltyScored','YellowCard',
                                           'RedCard','ProgressiveCarries','ProgressivePasses', 'ProgressivePassesRecevied',
                                           'Goals_Per90','Assists_Per90', 'GoalContribution_Per90','xG_Per90','xAG_Per90']]

# Convert to a dictionaryt and insert table into database
dict_css = c_standard_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_css)):
    if i in dict_css:
        columns = ", ".join(dict_css[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_css[i].values())

        sql_query = f'INSERT INTO c_standard_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")


Unnamed: 0,SeasonID,MatchesPlayed,Goals,xG,xAG,Assists,GoalContribution,NonPenaltyGoals,PenaltyScored,YellowCard,RedCard,ProgressiveCarries,ProgressivePasses,ProgressivePassesRecevied,Goals_Per90,Assists_Per90,GoalContribution_Per90,xG_Per90,xAG_Per90
0,1,38,56.0,47.0,31.4,35.0,91.0,53.0,3.0,88.0,5.0,815.0,1641.0,1630.0,1.47,0.92,2.39,1.24,0.83
1,2,38,53.0,51.8,35.1,38.0,91.0,47.0,6.0,49.0,5.0,758.0,1760.0,1748.0,1.39,1.0,2.39,1.36,0.92
2,3,38,60.0,60.5,40.2,41.0,101.0,55.0,5.0,67.0,4.0,734.0,1655.0,1642.0,1.58,1.08,2.66,1.59,1.06
3,4,38,84.0,71.9,53.8,64.0,148.0,81.0,3.0,51.0,0.0,824.0,2049.0,2024.0,2.21,1.68,3.89,1.89,1.41
4,5,27,65.0,55.3,38.3,45.0,110.0,57.0,8.0,41.0,2.0,609.0,1571.0,1557.0,2.41,1.67,4.07,2.05,1.42


### Shooting Stats

In [183]:

# Drop Table
execute_query(connection, """DROP TABLE IF EXISTS p_shooting_stats""")
execute_query(connection, """DROP TABLE IF EXISTS c_shooting_stats""")

# Create Table
p_shooting_stats_table = """
CREATE TABLE p_shooting_stats(
    SeasonID INT NOT NULL,
    PlayerID INT NOT NULL,
    PlayerName VARCHAR(80),
    Goals INT NOT NULL,
    Shots INT NOT NULL, 
    ShotsonTarget INT NOT NULL, 
    ShotsonTargetPercent DECIMAL(4,1) NOT NULL, 
    GoalsPerShot DECIMAL(3,1) NOT NULL, 
    GoalsPerShotsonTarget DECIMAL(3,1) NOT NULL, 
    ShotDistance DECIMAL(3,1) NOT NULL, 
    xG DECIMAL(3,1) NOT NULL
)
"""

c_shooting_stats_table = """
CREATE TABLE c_shooting_stats(
	SeasonID INT NOT NULL,
    Goals INT NOT NULL,
    xG DECIMAL(3,1) NOT NULL,
    Shots INT NOT NULL, 
    ShotsonTarget INT NOT NULL, 
    ShotsonTargetPercent DECIMAL(4,1) NOT NULL, 
    GoalsPerShot DECIMAL(3,1) NOT NULL, 
    GoalsPerShotsonTarget DECIMAL(3,1) NOT NULL, 
    ShotDistance DECIMAL(3,1) NOT NULL
)
"""

execute_query(connection, p_shooting_stats_table)
execute_query(connection, c_shooting_stats_table)

Successful
Successful
Successful
Successful


In [None]:
# Player Shooting Stats
p_shooting_stats = join_player(df_player_shooting, df_season, df_players)
# Filter for needed data
p_shooting_stats = p_shooting_stats.loc[:,['SeasonID','PlayerID','PlayerName','Goals','Shots','ShotsonTarget',
                                           'ShotsonTargetPercent','GoalsPerShot','GoalsPerShotsonTarget','ShotDistance','xG']]

# Convert to a dictionaryt and insert table into database
dict_pss = p_shooting_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_pss)):
    if i in dict_pss:
        columns = ", ".join(dict_pss[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_pss[i].values())

        sql_query = f'INSERT INTO p_shooting_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")


In [None]:
# Club Shooting Stats
c_shooting_stats = join_squad(df_squad_shooting, df_season)
c_shooting_stats = c_shooting_stats.loc[:,['SeasonID','Goals','xG','Shots','ShotsonTarget','ShotsonTargetPercent',
                                           'GoalsPerShot','GoalsPerShotsonTarget','ShotDistance']]

# Convert to a dictionaryt and insert table into database
dict_css = c_shooting_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_css)):
    if i in dict_css:
        columns = ", ".join(dict_css[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_css[i].values())

        sql_query = f'INSERT INTO c_shooting_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

### Passing Stats

In [198]:

# Drop Table
execute_query(connection, """DROP TABLE IF EXISTS p_passing_stats""")
execute_query(connection, """DROP TABLE IF EXISTS c_passing_stats""")

# Create Table
p_passing_stats_table = """
CREATE TABLE p_passing_stats(
    SeasonID INT NOT NULL,
    PlayerID INT NOT NULL,
    PlayerName VARCHAR(80), 
	CompletedPass INT NOT NULL, 
	AttemptedPass INT NOT NULL, 
	PassCompletionPercent DECIMAL(3,1) NOT NULL, 
	KeyPass INT NOT NULL,
	KeyPass_Final3rd INT NOT NULL, 
	PassesPenaltyArea INT NOT NULL
)
"""

c_passing_stats_table = """
CREATE TABLE c_passing_stats(
	SeasonID INT NOT NULL,
	CompletedPass INT NOT NULL, 
	AttemptedPass INT NOT NULL, 
	PassCompletionPercent DECIMAL(3,1) NOT NULL, 
	KeyPass INT NOT NULL,
	KeyPass_Final3rd INT NOT NULL, 
	PassesPenaltyArea INT NOT NULL
)
"""

execute_query(connection, p_passing_stats_table)
execute_query(connection, c_passing_stats_table)

Successful
Successful
Successful
Successful


In [None]:
# Player Passing Stats
p_passing_stats = join_player(df_player_pass, df_season, df_players)
# Filter for needed data
p_passing_stats = p_passing_stats.loc[:,['SeasonID','PlayerID','PlayerName','CompletedPass','AttemptedPass','PassCompletionPercent',
                                           'KeyPass','KeyPass_Final3rd','PassesPenaltyArea']]

# Convert to a dictionary and insert table into database
dict_pss = p_passing_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_pss)):
    if i in dict_pss:
        columns = ", ".join(dict_pss[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_pss[i].values())

        sql_query = f'INSERT INTO p_passing_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")



In [200]:
# Club Passing Stats
c_passing_stats = join_squad(df_squad_pass, df_season)
c_passing_stats = c_passing_stats.loc[:,['SeasonID','CompletedPass','AttemptedPass','PassCompletionPercent',
                                         'KeyPass','KeyPass_Final3rd', 'PassesPenaltyArea']]

# Convert to a dictionary and insert table into database
dict_cps = c_passing_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_cps)):
    if i in dict_cps:
        columns = ", ".join(dict_cps[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_cps[i].values())

        sql_query = f'INSERT INTO c_passing_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")


Successful
Successful
Successful
Successful
Successful


### Possession Stats

In [201]:

# Drop Table
execute_query(connection, """DROP TABLE IF EXISTS p_possession_stats""")
execute_query(connection, """DROP TABLE IF EXISTS c_possession_stats""")

# Create Table
p_possession_stats_table = """
CREATE TABLE p_possession_stats(
    SeasonID INT NOT NULL,
    PlayerID INT NOT NULL,
    PlayerName VARCHAR(80),
    Touches INT NOT NULL, 
	TouchesDefense INT NOT NULL, 
	AttemptedTakeons INT NOT NULL, 
	SuccessfulTakeons INT NOT NULL, 
	SuccessfulTakeonsPercent DECIMAL(4,1) NOT NULL, 
	Carries INT NOT NULL, 
	TotalCarryDistance INT NOT NULL
)
"""

c_possession_stats_table = """
CREATE TABLE c_possession_stats(
	SeasonID INT NOT NULL,
    Touches INT NOT NULL, 
	TouchesDefense INT NOT NULL, 
	AttemptedTakeons INT NOT NULL, 
	SuccessfulTakeons INT NOT NULL, 
	SuccessfulTakeonsPercent DECIMAL(4,1) NOT NULL, 
	Carries INT NOT NULL, 
	TotalCarryDistance INT NOT NULL
)
"""

execute_query(connection, p_possession_stats_table)
execute_query(connection, c_possession_stats_table)

Successful
Successful
Successful
Successful


In [None]:
# Player Possession Stats
p_possession_stats = join_player(df_player_possession, df_season, df_players)
# Filter for needed data
p_possession_stats = p_possession_stats.loc[:,['SeasonID','PlayerID','PlayerName','Touches','TouchesDefense','AttemptedTakeons',
                                           'SuccessfulTakeons','SuccessfulTakeonsPercent','Carries','TotalCarryDistance']]

# Convert to a dictionary and insert table into database
dict_pps = p_possession_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_pps)):
    if i in dict_pps:
        columns = ", ".join(dict_pps[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_pps[i].values())

        sql_query = f'INSERT INTO p_possession_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

In [203]:
# Club Possession Stats
c_possession_stats = join_squad(df_squad_possession, df_season)
# Filter for needed data
c_possession_stats = c_possession_stats.loc[:,['SeasonID','Touches','TouchesDefense','AttemptedTakeons',
                                           'SuccessfulTakeons','SuccessfulTakeonsPercent','Carries','TotalCarryDistance']]

# Convert to a dictionary and insert table into database
dict_cps = c_possession_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_cps)):
    if i in dict_cps:
        columns = ", ".join(dict_cps[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_cps[i].values())

        sql_query = f'INSERT INTO c_possession_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

Successful
Successful
Successful
Successful
Successful


### Miscell Stats

In [207]:

# Drop Table
execute_query(connection, """DROP TABLE IF EXISTS p_miscell_stats""")
execute_query(connection, """DROP TABLE IF EXISTS c_miscell_stats""")

# Create Table
p_miscell_stats_table = """
CREATE TABLE p_miscell_stats(
    SeasonID INT NOT NULL,
    PlayerID INT NOT NULL,
    PlayerName VARCHAR(80),
	FoulsCommitted INT NOT NULL, 
	FoulsDrawn INT NOT NULL,
	Offside INT NOT NULL, 
	BallRecoveries INT NOT NULL, 
	AerialDuelsWon INT NOT NULL, 
	AerialDuelsLost INT NOT NULL
)
"""

c_miscell_stats_table = """
CREATE TABLE c_miscell_stats(
	SeasonID INT NOT NULL,
	FoulsCommitted INT NOT NULL, 
	FoulsDrawn INT NOT NULL,
	Offside INT NOT NULL, 
	BallRecoveries INT NOT NULL, 
	AerialDuelsWon INT NOT NULL, 
	AerialDuelsLost INT NOT NULL
)
"""

execute_query(connection, p_miscell_stats_table)
execute_query(connection, c_miscell_stats_table)

Successful
Successful
Successful
Successful


In [None]:
# Player Miscell Stats
p_miscell_stats = join_player(df_player_miscell_stats, df_season, df_players)
# Filter for needed data
p_miscell_stats = p_miscell_stats.loc[:,['SeasonID','PlayerID','PlayerName','FoulsCommitted','FoulsDrawn','Offside',
                                           'BallRecoveries','AerialDuelsWon','AerialDuelsLost']]

# Convert to a dictionary and insert table into database
dict_pps = p_miscell_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_pps)):
    if i in dict_pps:
        columns = ", ".join(dict_pps[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_pps[i].values())

        sql_query = f'INSERT INTO p_miscell_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

In [210]:
# Club Miscell Stats
c_miscell_stats = join_squad(df_squad_miscell_stats, df_season)
# Filter for needed data
c_miscell_stats = c_miscell_stats.loc[:,['SeasonID','FoulsCommitted','FoulsDrawn','Offside',
                                           'BallRecoveries','AerialDuelsWon','AerialDuelsLost']]

# Convert to a dictionary and insert table into database
dict_cps = c_miscell_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_cps)):
    if i in dict_cps:
        columns = ", ".join(dict_cps[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_cps[i].values())

        sql_query = f'INSERT INTO c_miscell_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

Successful
Successful
Successful
Successful
Successful


### Goalkeeping Stats

In [221]:
# Drop Table
execute_query(connection, """DROP TABLE IF EXISTS p_goalkeeping_stats""")
execute_query(connection, """DROP TABLE IF EXISTS c_goalkeeping_stats""")

# Create Table
p_goalkeeping_stats_table = """
CREATE TABLE p_goalkeeping_stats(
    SeasonID INT NOT NULL,
    PlayerID INT NOT NULL,
    PlayerName VARCHAR(80),
    MatchesPlayed INT NOT NULL, 
	MinutesPlayed INT NOT NULL, 
	GoalAgainst INT NOT NULL ,
	ShotsOnTargetAgaint INT NOT NULL, 
	Saves INT NOT NULL,
	SavePercentage DECIMAL(4,1) NOT NULL, 
	CleanSheet INT NOT NULL,
	PenaltyFaced INT NOT NULL,
	PenaltySaved INT NOT NULL
)
"""

c_goalkeeping_stats_table = """
CREATE TABLE c_goalkeeping_stats(
	SeasonID INT NOT NULL,
	GoalAgainst INT NOT NULL ,
	ShotsOnTargetAgaint INT NOT NULL, 
	Saves INT NOT NULL,
	SavePercentage DECIMAL(4,1) NOT NULL, 
	CleanSheet INT NOT NULL,
	PenaltyFaced INT NOT NULL,
	PenaltySaved INT NOT NULL
)
"""

execute_query(connection, p_goalkeeping_stats_table)
execute_query(connection, c_goalkeeping_stats_table)

Successful
Successful
Successful
Successful


In [222]:
# Player Goalkeeping Stats
p_goalkeeping_stats = join_player(df_player_goalkeeping, df_season, df_players)
# Filter for needed data
p_goalkeeping_stats = p_goalkeeping_stats.loc[:,['SeasonID','PlayerID','PlayerName','MatchesPlayed','MinutesPlayed','GoalAgainst',
                                           'ShotsOnTargetAgaint','Saves','SavePercentage','CleanSheet','PenaltyFaced','PenaltySaved']]

# Convert to a dictionary and insert table into database
dict_pps = p_goalkeeping_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_pps)):
    if i in dict_pps:
        columns = ", ".join(dict_pps[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_pps[i].values())

        sql_query = f'INSERT INTO p_goalkeeping_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful


In [223]:
# Club Goalkeeping Stats
c_goalkeeping_stats = join_squad(df_squad_goalkeeping, df_season)
# Filter for needed data
c_goalkeeping_stats = c_goalkeeping_stats.loc[:,['SeasonID','GoalAgainst','ShotsOnTargetAgaint','Saves',
                                           'SavePercentage','CleanSheet','PenaltyFaced','PenaltySaved']]

# Convert to a dictionary and insert table into database
dict_cps = c_goalkeeping_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_cps)):
    if i in dict_cps:
        columns = ", ".join(dict_cps[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_cps[i].values())

        sql_query = f'INSERT INTO c_goalkeeping_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

Successful
Successful
Successful
Successful
Successful


### Defensive Stats

In [224]:
# Drop Table
execute_query(connection, """DROP TABLE IF EXISTS p_defensive_stats""")
execute_query(connection, """DROP TABLE IF EXISTS c_defensive_stats""")

# Create Table
p_defensive_stats_table = """
CREATE TABLE p_defensive_stats(
    SeasonID INT NOT NULL,
    PlayerID INT NOT NULL,
    PlayerName VARCHAR(80),
    Tackles INT NOT NULL, 
	TacklesWon INT NOT NULL, 
	Blocks INT NOT NULL, 
	Interceptions INT NOT NULL, 
	Clearances INT NOT NULL
)
"""

c_defensive_stats_table = """
CREATE TABLE c_defensive_stats(
	SeasonID INT NOT NULL,
    Tackles INT NOT NULL, 
	TacklesWon INT NOT NULL, 
	Blocks INT NOT NULL, 
	Interceptions INT NOT NULL, 
	Clearances INT NOT NULL
)
"""

execute_query(connection, p_defensive_stats_table)
execute_query(connection, c_defensive_stats_table)

Successful
Successful
Successful
Successful


In [225]:
# Player Defensive Stats
p_defensive_stats = join_player(df_player_defensive, df_season, df_players)
# Filter for needed data
p_defensive_stats = p_defensive_stats.loc[:,['SeasonID','PlayerID','PlayerName','Tackles','TacklesWon','Blocks',
                                           'Interceptions','Clearances']]

# Convert to a dictionary and insert table into database
dict_pps = p_defensive_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_pps)):
    if i in dict_pps:
        columns = ", ".join(dict_pps[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_pps[i].values())

        sql_query = f'INSERT INTO p_defensive_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful

In [226]:
# Club Defensive Stats
c_defensive_stats = join_squad(df_squad_defensive, df_season)
# Filter for needed data
c_defensive_stats = c_defensive_stats.loc[:,['SeasonID','Tackles','TacklesWon','Blocks',
                                           'Interceptions','Clearances']]

# Convert to a dictionary and insert table into database
dict_cps = c_defensive_stats.to_dict(orient='index')

sql_command = []

for i in range(len(dict_cps)):
    if i in dict_cps:
        columns = ", ".join(dict_cps[i].keys())
        values = ", ".join(f'"{value}"' for value in dict_cps[i].values())

        sql_query = f'INSERT INTO c_defensive_stats ({columns}) VALUES ({values})'
        sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

Successful
Successful
Successful
Successful
Successful


### Per90 Stats

In [None]:
# Drop Table
execute_query(connection, """DROP TABLE IF EXISTS p_per_90""")
execute_query(connection, """DROP TABLE IF EXISTS c_per_90""")

# Create Table
p_per90_stats_table = """
CREATE TABLE p_per_90(
    SeasonID INT NOT NULL,
    PlayerID INT NOT NULL,
    PlayerName VARCHAR(80),
    Goals_Per90 DECIMAL(4,1) NOT NULL, 
    Assists_Per90 DECIMAL(4,1) NOT NULL, 
    GoalContribution_Per90 DECIMAL(4,1) NOT NULL, 
    xG_Per90 DECIMAL(4,1) NOT NULL, 
    xAG_Per90 DECIMAL(4,1) NOT NULL, 
    Shots_Per90 DECIMAL(4,1) NOT NULL, 
    ShotsonTarget_Per90 DECIMAL(4,1) NOT NULL
)
"""

c_per90_stats_table = """
CREATE TABLE c_per_90(
	SeasonID INT NOT NULL,
    Goals_Per90 DECIMAL(4,1) NOT NULL, 
    Assists_Per90 DECIMAL(4,1) NOT NULL, 
    GoalContribution_Per90 DECIMAL(4,1) NOT NULL, 
    xG_Per90 DECIMAL(4,1) NOT NULL, 
    xAG_Per90 DECIMAL(4,1) NOT NULL, 
    Shots_Per90 DECIMAL(4,1) NOT NULL, 
    ShotsonTarget_Per90 DECIMAL(4,1) NOT NULL
)
"""

execute_query(connection, p_per90_stats_table)
execute_query(connection, c_per90_stats_table)

In [None]:
# Player per90 Stats
p_per90_stats = join_player(df_player_defensive, df_season, df_players)
# Filter for needed data
p_defensive_stats = p_defensive_stats.loc[:,['SeasonID','PlayerID','PlayerName','Tackles','TacklesWon','Blocks',
                                           'Interceptions','Clearances']]

p_defensive_stats.head()

### Updating with New Data

In [None]:
# Scrape new data
df_pass23 = foo_table(2023, passing)
df_shoot23 = foo_table(2023, shooting)
df_gk23 = foo_table(2023, goalkeeping)
df_ss23 = foo_table(2023, standard_stats)
df_defense23 = foo_table(2023, defensive)
df_poss23 = foo_table(2023, possession)
df_miscel23 = foo_table(2023, miscell_stats)

# Function to join all tables by stats
def prepare(data):

    # Join individual tables together
    df_join = pd.concat(data, ignore_index=True)

    # Drop both Squad Total and Opponent Total, irrelevant to analysis
    df_joined = df_join.drop(df_join[(df_join['PlayerName'] == 'Opponent Total') | (df_join['PlayerName'] == 'Squad Total')].index).reset_index(drop=True)

    # Create a total to store the squad total for each stat
    df_total = df_join[df_join['PlayerName'] == 'Squad Total'].reset_index(drop=True)
    df_total = df_total.iloc[:,1:]

    return df_joined, df_total

# Use ArsenalDB
update_table = """UPDATE 
SET
WHERE"""

def db_new(season, attribute):

    # Scrape data
    df = foo_table(season, attribute)

    # Clean data
    df_join = df.drop(df_join[(df_join['PlayerName'] == 'Opponent Total') | (df_join['PlayerName'] == 'Squad Total')].index).reset_index(drop=True)

    # Clean Data Accordingly
    clean_data = None

    # Goalkeeping
    if attribute == goalkeeping:
        clean_data = df_join.iloc[:,[0,4,6,8,9,10,11,12,16,18,20]] # Filter Columns 
        clean_data.rename(columns = {'Player':'PlayerName','Save%': 'SavePercentage','SoTA': 'ShotsOnTargetAgaint',
                                        'MP': 'MatchesPlayed' , 'Min': 'MinutesPlayed', 'GA':'GoalAgainst',
                                        'CS': 'CleanSheet', 'PKatt':'PenaltyFaced', 'PKsv':'PenaltySaved'}, inplace=True) # Rename Column Names



    return stat_dict

In [None]:
# Standard

# Player Standard Stats
p_standard_stats = join_player(df_player_standard_stats, df_season, df_players)
# Filter for needed data
p_standard_stats = p_standard_stats.loc[:,['SeasonID','PlayerID','PlayerName','MatchesPlayed','Starts','MinutesPlayed',
                                           'Goals','Assists','GoalContribution','NonPenaltyGoals','PenaltyScored','YellowCard',
                                           'RedCard','xG','xAG','ProgressiveCarries','ProgressivePasses', 'ProgressivePassesRecevied']]

p_standard_stats = p_standard_stats.dropna(subset = ['PlayerID'])

# Club Standard Stats
c_standard_stats = join_squad(df_squad_standard_stats, df_season)
c_standard_stats = c_standard_stats.loc[:,['SeasonID','MatchesPlayed','Goals','xG','xAG','Assists','GoalContribution','NonPenaltyGoals','PenaltyScored','YellowCard',
                                           'RedCard','ProgressiveCarries','ProgressivePasses', 'ProgressivePassesRecevied',
                                           'Goals_Per90','Assists_Per90', 'GoalContribution_Per90','xG_Per90','xAG_Per90']]


# Shooting

# Player Shooting Stats
p_shooting_stats = join_player(df_player_shooting, df_season, df_players)
# Filter for needed data
p_shooting_stats = p_shooting_stats.loc[:,['SeasonID','PlayerID','PlayerName','Goals','Shots','ShotsonTarget',
                                           'ShotsonTargetPercent','GoalsPerShot','GoalsPerShotsonTarget','ShotDistance','xG']]

# Club Shooting Stats
c_shooting_stats = join_squad(df_squad_shooting, df_season)
c_shooting_stats = c_shooting_stats.loc[:,['SeasonID','Goals','xG','Shots','ShotsonTarget','ShotsonTargetPercent',
                                           'GoalsPerShot','GoalsPerShotsonTarget','ShotDistance']]


# Passing 
# Player Passing Stats
p_passing_stats = join_player(df_player_pass, df_season, df_players)
# Filter for needed data
p_passing_stats = p_passing_stats.loc[:,['SeasonID','PlayerID','PlayerName','CompletedPass','AttemptedPass','PassCompletionPercent',
                                           'KeyPass','KeyPass_Final3rd','PassesPenaltyArea']]

# Club Passing Stats
c_passing_stats = join_squad(df_squad_pass, df_season)
c_passing_stats = c_passing_stats.loc[:,['SeasonID','CompletedPass','AttemptedPass','PassCompletionPercent',
                                         'KeyPass','KeyPass_Final3rd', 'PassesPenaltyArea']]

# Convert to a dictionary and insert table into database
dict_cps = c_passing_stats.to_dict(orient='index')


# Possession
# Player Possession Stats
p_possession_stats = join_player(df_player_possession, df_season, df_players)
# Filter for needed data
p_possession_stats = p_possession_stats.loc[:,['SeasonID','PlayerID','PlayerName','Touches','TouchesDefense','AttemptedTakeons',
                                           'SuccessfulTakeons','SuccessfulTakeonsPercent','Carries','TotalCarryDistance']]


# Club Possession Stats
c_possession_stats = join_squad(df_squad_possession, df_season)
# Filter for needed data
c_possession_stats = c_possession_stats.loc[:,['SeasonID','Touches','TouchesDefense','AttemptedTakeons',
                                           'SuccessfulTakeons','SuccessfulTakeonsPercent','Carries','TotalCarryDistance']]


# Miscell
# Player Miscell Stats
p_miscell_stats = join_player(df_player_miscell_stats, df_season, df_players)
# Filter for needed data
p_miscell_stats = p_miscell_stats.loc[:,['SeasonID','PlayerID','PlayerName','FoulsCommitted','FoulsDrawn','Offside',
                                           'BallRecoveries','AerialDuelsWon','AerialDuelsLost']]

# Club Miscell Stats
c_miscell_stats = join_squad(df_squad_miscell_stats, df_season)
# Filter for needed data
c_miscell_stats = c_miscell_stats.loc[:,['SeasonID','FoulsCommitted','FoulsDrawn','Offside',
                                           'BallRecoveries','AerialDuelsWon','AerialDuelsLost']]

#Goalkeeping
# Player Goalkeeping Stats
p_goalkeeping_stats = join_player(df_player_goalkeeping, df_season, df_players)
# Filter for needed data
p_goalkeeping_stats = p_goalkeeping_stats.loc[:,['SeasonID','PlayerID','PlayerName','MatchesPlayed','MinutesPlayed','GoalAgainst',
                                           'ShotsOnTargetAgaint','Saves','SavePercentage','CleanSheet','PenaltyFaced','PenaltySaved']]

# Club Goalkeeping Stats
c_goalkeeping_stats = join_squad(df_squad_goalkeeping, df_season)
# Filter for needed data
c_goalkeeping_stats = c_goalkeeping_stats.loc[:,['SeasonID','GoalAgainst','ShotsOnTargetAgaint','Saves',
                                           'SavePercentage','CleanSheet','PenaltyFaced','PenaltySaved']]

# Defence
# Player Defensive Stats
p_defensive_stats = join_player(df_player_defensive, df_season, df_players)
# Filter for needed data
p_defensive_stats = p_defensive_stats.loc[:,['SeasonID','PlayerID','PlayerName','Tackles','TacklesWon','Blocks',
                                           'Interceptions','Clearances']]

# Club Defensive Stats
c_defensive_stats = join_squad(df_squad_defensive, df_season)
# Filter for needed data
c_defensive_stats = c_defensive_stats.loc[:,['SeasonID','Tackles','TacklesWon','Blocks',
                                           'Interceptions','Clearances']]

In [None]:
df_pass23 = foo_table(2023, passing)
df_shoot23 = foo_table(2023, shooting)
df_gk23 = foo_table(2023, goalkeeping)
df_ss23 = foo_table(2023, standard_stats)
df_defense23 = foo_table(2023, defensive)
df_poss23 = foo_table(2023, possession)
df_miscel23 = foo_table(2023, miscell_stats)

# Function to join all tables by stats
def prepare(data):

    # Join individual tables together
    df_join = pd.concat(data, ignore_index=True)

    # Drop both Squad Total and Opponent Total, irrelevant to analysis
    df_joined = df_join.drop(df_join[(df_join['PlayerName'] == 'Opponent Total') | (df_join['PlayerName'] == 'Squad Total')].index).reset_index(drop=True)

    # Create a total to store the squad total for each stat
    df_total = df_join[df_join['PlayerName'] == 'Squad Total'].reset_index(drop=True)
    df_total = df_total.iloc[:,1:]


    return df_joined, df_total

# Join the tables
df_player_pass, df_squad_pass = prepare(passing)
df_player_shooting, df_squad_shooting = prepare(shooting)
df_player_possession, df_squad_possession = prepare(possession)
df_player_miscell_stats, df_squad_miscell_stats = prepare(miscell_stats)
df_player_defensive,df_squad_defensive = prepare(defensive)
df_player_goalkeeping, df_squad_goalkeeping = prepare(goalkeeping)
df_player_standard_stats, df_squad_standard_stats = prepare(standard_stats)