# Player's Average Ratings

In [1]:
# Import required libraries
import pandas as pd
import requests
import time

# Database connector 
import mysql.connector
from mysql.connector import Error

In [2]:
# Connect to the database
connection = mysql.connector.connect(
    host = 'localhost',
    port = 3306,
    user = 'root',
    password = '#',
    database = 'arsenaldb'
)

# Function to execute a SQL query 
def execute_query(connection, query):
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        connection.commit()
        print("Successful")
    except Error as err:
        print(f"Error: '{err}'")

# Read and Run Query
def read_query(connection, query):
    cursor = connection.cursor()
    result = None
    try:
        cursor.execute(query)
        result = cursor.fetchall() # Reads data from the database without making any changes to it.
        return result
    except Error as err:
        print(f'Error: "{err}"')

# Use DB
execute_query(connection, """USE ArsenalDB""")

Successful


Create Season Ratings Table

In [57]:
# Drop Table
execute_query(connection, """DROP TABLE IF EXISTS seasonrating""")

# Create Table
ratings_table = """
CREATE TABLE seasonrating(
    SeasonID INT NOT NULL,
    PlayerID INT NOT NULL,
    PlayerName VARCHAR(80) NOT NULL,
    Ratings DECIMAL(4,2) NOT NULL
)
"""
execute_query(connection, ratings_table)

Successful
Successful


In [3]:
# Assign the seasons accordingly
season_mapping = {
    '2023-2024': 20720,
    '2022-2023' : 17664,
    '2021-2022' : 16390,
    '2020-2021' : 15382,
    '2019-2020' : 14022
}

# Function takes the season, performs data cleaning and returns ratings
def ratings(season):

    # Scrape data using API
    headers = {
    'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
    'Referer': 'https://www.fotmob.com/leagues/47/stats/season/20720/players/rating/team/9825',
    'x-fm-req': 'eyJib2R5Ijp7ImNvZGUiOjE3MDYxMzExOTE5Mjl9LCJzaWduYXR1cmUiOiJCNjg0Mjg0ODFEQTY5NjQ3MEVCNTFFQjkyM0RFNjcyQSJ9',
    'sec-ch-ua-mobile': '?0',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
    'sec-ch-ua-platform': '"Windows"',
    }

    params = {
        'id': '47',
        'season': season_mapping[season],
        'type': 'players',
        'stat': 'rating',
        'teamId': '9825',
    }

    response = requests.get('https://www.fotmob.com/api/leagueseasondeepstats', params=params, headers=headers)

    # Check the response status code 
    if response.status_code == 200:
        playerRatings = response.json()

        statsData = playerRatings['statsData']

        # Store player names
        names = [player['name'] for player in statsData]

        # Store ratings in a list
        ratings = [statsData[i]['statValue']['value'] for i in range(len(statsData))] 

        # Convert the dictionary into a dataframe
        df_ratings = pd.DataFrame.from_dict(statsData)
        
        df_ratings['PlayerName'] = names
        df_ratings['Ratings'] = ratings
        
        # Filter for needed columns and insert a new colum to assgin the appropriate season
        df_ratings = df_ratings.loc[:,['PlayerName','Ratings']]
        df_ratings['SeasonName'] = season

        # Update naming in ratings table
        df_ratings.loc[df_ratings['PlayerName'] == "Daniel Ceballos", 'PlayerName'] = "Dani Ceballos"

        df_ratings.loc[df_ratings['PlayerName'] == "Matteo Guendouzi", 'PlayerName'] = "Mattéo Guendouzi"

        df_ratings.loc[df_ratings['PlayerName'] == "Joseph Willock", 'PlayerName'] = "Joe Willock"

        df_ratings.loc[df_ratings['PlayerName'] == "Hector Bellerin", 'PlayerName'] = "Héctor Bellerín"

        df_ratings.loc[df_ratings['PlayerName'] == "Gabriel", 'PlayerName'] = "Gabriel Dos Santos"

        df_ratings.loc[df_ratings['PlayerName'] == "Edward Nketiah", 'PlayerName'] = "Eddie Nketiah"

        return df_ratings
    
    else:
        f'Wrong response: {response.status_code}'
    

## Extract Data

In [4]:
# Save old season table
# Run for each year   
df_ratings_19 = ratings('2019-2020')
df_ratings_20 = ratings('2020-2021')
df_ratings_21 = ratings('2021-2022')
df_ratings_22 = ratings('2022-2023')
df_ratings_23 = ratings('2023-2024')

# Join all the old data together into a table,
previous_season = [df_ratings_19,df_ratings_20,df_ratings_21,df_ratings_22, df_ratings_23]
df_ratings_table = pd.concat(previous_season, ignore_index=True)

# Export table to .csv
#df_ratings_1922_table.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_ratings19-22_raw.csv", index=False, encoding = 'utf-8-sig')

In [60]:
# Sample result
df_ratings_table.describe

<bound method NDFrame.describe of                    PlayerName  Ratings SeasonName
0   Pierre-Emerick Aubameyang     7.03  2019-2020
1                Nicolas Pépé     6.92  2019-2020
2             Daniel Ceballos     6.79  2019-2020
3                Granit Xhaka     6.77  2019-2020
4         Alexandre Lacazette     6.72  2019-2020
..                        ...      ...        ...
67                Kai Havertz     7.08  2023-2024
68                 David Raya     6.90  2023-2024
69           Leandro Trossard     6.88  2023-2024
70          Takehiro Tomiyasu     6.81  2023-2024
71             Edward Nketiah     6.71  2023-2024

[72 rows x 3 columns]>

Pull Season and Player ID Information

In [4]:

# Then join with the new data in the function
def db_join(ratings_data, season_table, players_table):

    df_table = pd.merge(ratings_data, season_table, on = 'SeasonName', how = 'left')
    df_full = pd.merge(df_table, players_table, on = 'PlayerName', how = 'left')

    # Filter
    df_full = df_full.loc[:,['SeasonID','PlayerID','PlayerName','Ratings']]

    # Convert to dictionary
    dict_ratings = df_full.to_dict(orient='index')

    return dict_ratings


In [5]:
# Season
season_table = """SELECT *FROM season"""
season_result = read_query(connection, season_table)
season_columns = ['SeasonID','SeasonName','League']

# Convert to pandas dataframe
df_season = pd.DataFrame(season_result, columns=season_columns)
df_season

Unnamed: 0,SeasonID,SeasonName,League
0,1,2019-2020,English Premier League
1,2,2020-2021,English Premier League
2,3,2021-2022,English Premier League
3,4,2022-2023,English Premier League
4,5,2023-2024,English Premier League


In [6]:
# Players 
players_table = """SELECT * FROM players"""
players_result = read_query(connection, players_table)
players_column = ['PlayerID','PlayerName','NationCode','Nation']

# Convert to a dataframe
df_players = pd.DataFrame(players_result, columns=players_column)
df_players.head()

Unnamed: 0,PlayerID,PlayerName,NationCode,Nation
0,1,Bernd Leno,de,Germany
1,2,Matt Macey,eng,England
2,3,Sokratis Papastathopoulos,gr,Greece
3,4,Shkodran Mustafi,de,Germany
4,5,Rob Holding,eng,England


Join all tables

In [None]:
# Convert to a dictionary
dict_ratings = db_join(df_ratings_table, df_season, df_players)
sql_command = []

for i in range(len(dict_ratings)):
    columns = ", ".join(dict_ratings[i].keys())
    values = ", ".join(f'"{value}"' for value in dict_ratings[i].values())

    sql_query = f'INSERT INTO seasonrating ({columns}) VALUES ({values})'
    sql_command.append(sql_query)

# Execute the query to insert data into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

Convert to a dictionary and insert table into database

### Fetch and Update Table with New data

In [8]:
# Scrape new data
new_data = ratings('2023-2024')
new_joined = db_join(new_data, df_season, df_players)

sql_command = []

for record in new_joined.values():
    update_parts = []
    for column, value in record.items():
        # For string values, add quotes around the value
        if isinstance(value, str):
            formatted_value = f"'{value}'"
        else:
            formatted_value = str(value)
        update_parts.append(f"{column} = {formatted_value}")
    update_set = ", ".join(update_parts)
    sql_query = f"UPDATE seasonrating SET {update_set} WHERE SeasonID = 5 AND PlayerID = {record['PlayerID']}"
    sql_command.append(sql_query)

# Execute the query to insert into the table
for query in sql_command:
    try:
        execute_query(connection, query)
    except Error as err:
        print(f"Error: '{err}'")

Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful
Successful


In [9]:
# Run Query
run_table = """SELECT * 
FROM seasonrating;"""
results = read_query(connection, run_table)

# View as dataframe
# Convert to pandas dataframe
league_columns = ['SeasonID','PlayerID','PlayerName','Ratings']
df_table = pd.DataFrame(results, columns = league_columns)
df_table[df_table['SeasonID']==5]

Unnamed: 0,SeasonID,PlayerID,PlayerName,Ratings
58,5,35,Bukayo Saka,8.03
59,5,91,Declan Rice,7.7
60,5,46,Martin Ødegaard,7.62
61,5,78,Oleksandr Zinchenko,7.29
62,5,34,Gabriel Martinelli,7.31
63,5,49,Gabriel Dos Santos,7.28
64,5,43,William Saliba,7.21
65,5,56,Ben White,7.22
66,5,74,Gabriel Jesus,7.09
67,5,89,Kai Havertz,7.11


In [10]:
# Close connection
connection.close()