# Player's Wages

In [1]:
# Import required libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [2]:
# Assign the seasons accordingly
season_mapping = {
    2023:'2023-2024',
    2022:'2022-2023',
    2021:'2021-2022',
    2020:'2020-2021',
    2019:'2019-2020'
}

In [3]:
# Define a function that takes a season, scrapes the data and stores in 'df_wages' variable accordingly

def wages(season):

    page_url = f'https://fbref.com/en/squads/18bb7c10/{season}/wages/Arsenal-Wage-Details'

    df_wages = pd.read_html(page_url, attrs = {'id':"wages"})[0]
    df_wages = df_wages.iloc[:,0:5]
    df_wages['season_name'] = season

    # Remove blank rows
    df_wages = df_wages.drop(df_wages[df_wages['Weekly Wages'].isna()].index)

    return df_wages


In [44]:
# Enter desired season
df_wages_19 = wages(season_mapping[2019])
df_wages_20 = wages(season_mapping[2020])
df_wages_21 = wages(season_mapping[2021])
df_wages_22 = wages(season_mapping[2022])

dfs_to_concat = [df_wages_19, df_wages_20, df_wages_21, df_wages_22]
df_wages_1922_list = pd.concat(dfs_to_concat, ignore_index=True)

# Export table to .csv
df_wages_1922_list.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_wages19-22_raw.csv", index=False, encoding = 'utf-8-sig')

Run the below cell to update the database

In [4]:
# save new season table (Always updating)
df_wages_23 = wages(season_mapping[2023])

# read old season table (using pandas read excel)
previous_table = pd.read_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_wages19-22_raw.csv")

# join new and onld season (concat)
joined_season = pd.concat([previous_table, df_wages_23], ignore_index=True)

# export join (export and save as old+new updated table)
joined_season.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Updated tables/temp_updatedwages_raw.csv", index=False, encoding = 'utf-8-sig')

In [6]:
joined_season.sample(5)

Unnamed: 0,Player,Nation,Pos,Age,Weekly Wages,season_name
45,Mohamed Elneny,eg EGY,MF,28.0,"£ 50,000 (€ 59,628, $60,761)",2020-2021
16,Calum Chambers,eng ENG,DF,24.0,"£ 50,000 (€ 59,628, $60,761)",2019-2020
73,Eddie Nketiah,eng ENG,FW,22.0,"£ 45,000 (€ 53,665, $54,685)",2021-2022
89,Kieran Tierney,sct SCO,DF,25.0,"£ 110,000 (€ 127,861, $139,811)",2022-2023
102,Marquinhos,br BRA,MF,19.0,"£ 30,000 (€ 34,871, $38,130)",2022-2023


# Player's Nationality

In [7]:
# Function takes the season, performs data cleaning and returns nationality
def nationality(season):

    # Web scrape using Selenium
    driver = webdriver.Chrome()

    page_url = f"https://fbref.com/en/squads/18bb7c10/{season}/roster/Arsenal-Roster-Details"
    driver.get(page_url) 

    name = driver.find_elements(By.CLASS_NAME, 'roster-player-info')

    # Clean dataset
    name_list = []
    for i in range(len(name)):
        name_list.append(name[i].text.split('\n'))
    
    df_nationality = pd.DataFrame(name_list)

    a = df_nationality.iloc[:,3]
    df_nations = pd.DataFrame(a)

    # Remove duplicates
    df_nations = df_nations.drop_duplicates()
    df_nations.reset_index(drop=True, inplace=True)

    # Rename column
    df_nations.rename(columns = {3:'Nationality'}, inplace=True)

    # Remove empty rows
    df_nations = df_nations.dropna()

    return df_nations

### Historical Data
Stores and saves data from past season, these ratings do not change threfore don't need to be ran frequently.

In [47]:
# Enter desired season
df_nationality_19 = nationality(season_mapping[2019])
df_nationality_20 = nationality(season_mapping[2020])
df_nationality_21 = nationality(season_mapping[2021])
df_nationality_22 = nationality(season_mapping[2022])

# Join all the old data together into a table,
previous_season = [df_nationality_19,df_nationality_20,df_nationality_21,df_nationality_22]
df_nationality_1922_table = pd.concat(previous_season, ignore_index=True)

# Export table to .csv
# Export values
df_nationality_1922_table.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_nationality19-22_raw.csv", index=False, encoding = 'utf-8-sig')

### Current Season Data
Run the code below to add new data for the current season to the existing data above, then export the updated data.

In [8]:
# New table
df_nationality_23 = nationality(season_mapping[2023])

# read old season table (using pandas read excel)
previous_table = pd.read_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_nationality19-22_raw.csv")

# join new and onld season (concat)
joined_season = pd.concat([previous_table, df_nationality_23], ignore_index=True)

# export join (export and save as old+new updated table)
joined_season.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Updated tables/temp_updatednationality_raw.csv", index=False, encoding = 'utf-8-sig')

In [9]:
joined_season.sample(5)

Unnamed: 0,Nationality
70,National Team: Estonia ee
57,National Team: Ghana gh
6,National Team: Bosnia and Herzegovina ba
17,National Team: Côte d'Ivoire ci
38,National Team: Ghana gh
