# League Table

In [51]:
# Import required libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

In [52]:
# Assign the seasons accordingly
season_mapping = {
    2023:'2023-2024',
    2022:'2022-2023',
    2021:'2021-2022',
    2020:'2020-2021',
    2019:'2019-2020'
}

In [53]:
# A function to check and adjust the list accordingly and enusre they are all the same length
def check(data):

    # Adjust the team nammes
    cleaned_list = []
    for i in range(len(data)):
        if len(data[i]) == 13:
            clean_data = [data[i][0]] + [' '.join(data[i][1:3])] + data[i][3:]
            cleaned_list.append(clean_data)
        elif len(data[i]) > 13:
            clean_data = [data[i][0]] + [' '.join(data[i][1:4])] + data[i][4:]
            cleaned_list.append(clean_data)
        elif len(data[i]) == 12:
            cleaned_list.append(data[i])

    return cleaned_list

In [None]:
# Create a function that takes the season and scrapes the league table
def table(season):

    driver = webdriver.Chrome()

    page_url = f"https://understat.com/league/EPL/{season}"
    driver.get(page_url) 

    name = driver.find_elements(By.ID, 'league-chemp')[0]
    table = name.text.split('\n')
    
    # Breaks the table into a list of list
    data = [line.split() for line in table] 

    # Checks the length of each list item and sorts accordingly
    cleaned_list = check(data)

    # Transform to a dataframe
    df = pd.DataFrame(cleaned_list)

    # Assign the first row of the list as the column header
    df.columns = df.iloc[0]
    df = df[1:]

    # Filter to show only Arsenal data
    df_arsenal = df[df['Team'] == 'Arsenal']

    # Split columns on '-' and '+'
    split_columns = ['xG', 'xGA', 'xPTS']
    for col in split_columns:
        df_arsenal[col+'_1'] = df_arsenal[col].str.split('[\+\-]', expand=True)[0]
    # Drop original columns
    df_arsenal.drop(columns=split_columns, inplace=True)

    # Rename the column 
    df_arsenal.rename(columns = {'№':'Position',
                        'M':'MatchesPlayed',
                        'W':'Wins',
                        'D':'Draw',
                        'L':'Loss',
                        'G':'GoalsScored',
                        'GA':'GoalsAgainst',
                        'PTS':'Points',
                        'xG_1' : 'xG',
                        'xGA_1' : 'xGA',
                        'xPTS_1' : 'xPTS'}, inplace=True)

    # Change 'Position' to an interger
    df_arsenal['Position'] = df_arsenal['Position'].astype(int)

    # Add a current season to respective table
    df_arsenal['season_name'] = season_mapping[season]

    return df_arsenal


### Historical Data

In [None]:
# Previous season and historical data 
df_arsenal_19 = table(2019)
df_arsenal_20 = table(2020)
df_arsenal_21 = table(2021)
df_arsenal_22 = table(2022)

# Join all the old data together into a table,
previous_season = [df_arsenal_19,df_arsenal_20,df_arsenal_21, df_arsenal_22]
df_league_1922_table = pd.concat(previous_season, ignore_index=True)

# Export values
df_league_1922_table.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_leaguetable19-22_raw.csv", index=False, encoding = 'utf-8-sig')

### Current Season Data
Run the code below to add new data for the current season to the existing data above, then export the updated data.

In [None]:
# New changing data
df_arsenal_23 = table(2023)

# read old season table (using pandas read excel)
previous_table = pd.read_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_leaguetable19-22_raw.csv")

# join new and onld season (concat)
joined_season = pd.concat([previous_table, df_arsenal_23], ignore_index=True)

# export join (export and save as old+new updated table)
joined_season.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Updated tables/temp_leaguetable_raw.csv", index=False, encoding = 'utf-8-sig')

In [57]:
joined_season.head()

Unnamed: 0,Position,Team,MatchesPlayed,Wins,Draw,Loss,GoalsScored,GoalsAgainst,Points,xG,xGA,xPTS,season_name
0,8,Arsenal,38,14,14,10,56,48,56,50.82,57.25,50.15,2019-2020
1,8,Arsenal,38,18,7,13,55,39,61,52.25,43.23,58.72,2020-2021
2,5,Arsenal,38,22,3,13,61,48,69,63.39,48.39,64.76,2021-2022
3,2,Arsenal,38,26,6,6,88,43,84,76.51,45.16,72.53,2022-2023
4,2,Arsenal,25,17,4,4,58,22,55,54.53,21.7,52.58,2023-2024
