# Club Transfers

In [22]:
# Import required libraries
import requests
import re
from bs4 import BeautifulSoup
import datetime as dt
import pandas as pd

In [23]:
# Assign the seasons accordingly
season_mapping = {
    2023:'2023-2024',
    2022:'2022-2023',
    2021:'2021-2022',
    2020:'2020-2021',
    2019:'2019-2020'
}

Function to scrape - Departures


In [24]:
def departure(season):

    # Scrape the data using BeautifulSoup
    headers = {
    "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }

    url = f'https://www.transfermarkt.com/arsenal-fc/transfers/verein/11/plus/?saison_id={season}&pos=&detailpos=&w_s='
    response = requests.get(url, headers=headers)

    soup = BeautifulSoup(response.content, "html.parser")

    # Departure HTML classes
    odd_dept = soup.select('div[id="yw2"] tr[class="odd"]')
    even_dept = soup.select('div[id="yw2"] tr[class="even"]')

    # Data Cleaning
    # Departures 1/2
    c_list = []
    for i in range(len(odd_dept)):
        c_list.append(odd_dept[i].text.split('\n'))

    odd_dept_list = []
    for i in c_list:
        n_sublist = [value.strip() for value in i if value.strip()]
        odd_dept_list.append(n_sublist)

    # Departures 2/2
    d_list = []
    for i in range(len(even_dept)):
        d_list.append(even_dept[i].text.split('\n'))

    even_dept_list = []
    for i in d_list:
        n_sublist = [value.strip() for value in i if value.strip()]
        even_dept_list.append(n_sublist)

    # Join both lists together into a single one
    dept_list = odd_dept_list + even_dept_list

    # Convert the list into a dataframe
    df_departures = pd.DataFrame(dept_list)

    # Rename the columns
    df_departures.rename(columns = {0:'Player',
                                1:'Position',
                                2:'Age',
                                3:'Club',
                                4:'League',
                                5:'Transfer_Type_Amount'},
                                inplace= True)
    
    # Create new columns to the dataframe
    df_departures['Status'] = 'Out'
    df_departures['season_name'] = season_mapping[season]
    
    return df_departures

In [25]:
# Save older departures 
df_departures_19 = departure(2019)
df_departures_20 = departure(2020)
df_departures_21 = departure(2021)
df_departures_22 = departure(2022)

In [26]:
df_departures_22.head()

Unnamed: 0,Player,Position,Age,Club,League,Transfer_Type_Amount,Status,season_name
0,Mattéo Guendouzi,Central Midfield,23,Marseille,Ligue 1,€11.00m,Out,2022-2023
1,Bernd Leno,Goalkeeper,30,Fulham,Premier League,€3.60m,Out,2022-2023
2,Héctor Bellerín,Right-Back,27,Barcelona,LaLiga,free transfer,Out,2022-2023
3,Nicolas Pépé,Right Winger,27,OGC Nice,Ligue 1,loan transfer,Out,2022-2023
4,Nuno Tavares,Left-Back,22,Marseille,Ligue 1,loan transfer,Out,2022-2023


Arrivals


In [27]:
def arrivals(season):

    # Scrape the data
    headers = {
    "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }

    url = f'https://www.transfermarkt.com/arsenal-fc/transfers/verein/11/plus/?saison_id={season}&pos=&detailpos=&w_s='
    response = requests.get(url, headers=headers)

    soup = BeautifulSoup(response.content, "html.parser")

    # Arrival HTML classes
    odd_arrive = soup.select('div[id="yw1"] tr[class="odd"]')
    even_arrive = soup.select('div[id="yw1"] tr[class="even"]')

    # Arrivals 1/2
    a_list = []
    for i in range(len(odd_arrive)):
        a_list.append(odd_arrive[i].text.split('\n'))

    odd_arrive_list = []
    for i in a_list:
        n_sublist = [value.strip() for value in i if value.strip()]
        odd_arrive_list.append(n_sublist)

    # Arrivals 2/2
    b_list = []
    for i in range(len(even_arrive)):
        b_list.append(even_arrive[i].text.split('\n'))

    even_arrive_list = []
    for i in b_list:
        n_sublist = [value.strip() for value in i if value.strip()]
        even_arrive_list.append(n_sublist)

    # Join the lists
    arrival_list = odd_arrive_list + even_arrive_list

    # Convert the list into a dataframe
    df_arrivals = pd.DataFrame(arrival_list)
    df_arrivals.rename(columns = {0:'Player',
                                1:'Position',
                                2:'Age',
                                3:'Club',
                                4:'League',
                                5:'Transfer_Type_Amount'},
                                inplace= True)
    df_arrivals['Status'] = 'In'
    df_arrivals['season_name'] = season_mapping[season]
    
    return df_arrivals

In [28]:
# Arrivals
df_arrivals_19 = arrivals(2019)
df_arrivals_20 = arrivals(2020)
df_arrivals_21 = arrivals(2021)
df_arrivals_22 = arrivals(2022)

In [29]:
# Function to join all transfers
def transfers(departure, arrival):

    df_transfers = pd.concat([departure, arrival], axis = 0, ignore_index=True) 
    
    return df_transfers

In [30]:
df_transfers_19 = transfers(df_departures_19, df_arrivals_19)
df_transfers_20 = transfers(df_departures_20, df_arrivals_20)
df_transfers_21 = transfers(df_departures_21, df_arrivals_21)
df_transfers_22 = transfers(df_departures_22, df_arrivals_22)

# Join together
# Join all transfers tables
previous_season_transfers = [df_transfers_19,df_transfers_20,df_transfers_21,df_transfers_22]
df_transfers_1922_table = pd.concat(previous_season_transfers, ignore_index=True)
df_transfers_1922_table = df_transfers_1922_table.loc[:,['Player','Position','Age','Transfer_Type_Amount',
                                               'Status','Club','League','season_name']]

# Export and save as .csv file
df_transfers_1922_table.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_transfers19-22_raw.csv", 
                               index=False, encoding = 'utf-8-sig')


In [31]:
# Save new transfers
df_departures_23 = departure(2023)
df_arrivals_23 = arrivals(2023)

df_transfers_23 = transfers(df_departures_23, df_arrivals_23)
new_table = df_transfers_23.loc[:,['Player','Position','Age','Transfer_Type_Amount',
                                               'Status','Club','League','season_name']]

# Read old ones
previous_table = pd.read_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_transfers19-22_raw.csv")

# Join new and old
joined_transfers = pd.concat([previous_table, new_table])

# Export
joined_transfers.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Updated tables/temp_updatedtransfers_raw.csv", index=False, encoding = 'utf-8-sig')


In [35]:
df_departures_23

Unnamed: 0,Player,Position,Age,Club,League,Transfer_Type_Amount,Status,season_name
0,Folarin Balogun,Centre-Forward,22,Monaco,Ligue 1,€30.00m,Out,2023-2024
1,Matt Turner,Goalkeeper,29,Nottm Forest,Premier League,€8.15m,Out,2023-2024
2,Pablo Marí,Centre-Back,29,Monza,Serie A,€4.90m,Out,2023-2024
3,Rob Holding,Centre-Back,27,Crystal Palace,Premier League,€1.20m,Out,2023-2024
4,Marquinhos,Right Winger,20,FC Nantes,Ligue 1,Loan fee:€300k,Out,2023-2024
5,Ainsley Maitland-Niles,Central Midfield,25,Olympique Lyon,Ligue 1,free transfer,Out,2023-2024
6,Albert Sambi Lokonga,Central Midfield,23,Luton,Premier League,loan transfer,Out,2023-2024
7,Rúnar Alex Rúnarsson,Goalkeeper,28,Cardiff,Championship,loan transfer,Out,2023-2024
8,Granit Xhaka,Defensive Midfield,30,B. Leverkusen,Bundesliga,€15.00m,Out,2023-2024
9,Auston Trusty,Centre-Back,24,Sheff Utd,Premier League,€5.80m,Out,2023-2024


# Scrape Players Information 

In [32]:
def players(season):

    # Scrape the data
    headers = {
    "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
    }

    url = f'https://www.transfermarkt.com/arsenal-fc/kader/verein/11/plus/0/galerie/0?saison_id={season}'
    response = requests.get(url, headers=headers)

    soup = BeautifulSoup(response.content, "html.parser")

    # Player Check 1/2
    odd_player = soup.select('table[class="items"] tr[class="odd"]')

    a_list = []
    for i in range(len(odd_player)):
        a_list.append(odd_player[i].text.split('\n'))

    # Remove empty indexes in the list    
    odd_list = []
    for sublist in a_list:
        n_sublist = [value.strip() for value in sublist if value.strip()]
        odd_list.append(n_sublist)

    # Player Check 2/2
    even_player = soup.select('table[class="items"] tr[class="even"]')

    b_list = []
    for i in range(len(even_player)):
        b_list.append(even_player[i].text.split('\n'))
        
    even_list = []
    for sublist in b_list:
        n_sublist = [value.strip() for value in sublist if value.strip()]
        even_list.append(n_sublist)

    # Join the lists
    player_list = odd_list + even_list

    
    # Convert the list to a pandas dataframe
    df_players = pd.DataFrame(player_list)
    df_players = df_players.iloc[:,0:3]
    df_players.rename(columns = {0:'ShirtNumber',
                                1:'PlayerName',
                                2:'Position'},
                    inplace=True)
    
    df_players['season_name'] = season_mapping[season]
    
    return df_players


### Historical Data

In [33]:
df_players_19 = players(2019)
df_players_20 = players(2020)
df_players_21 = players(2021)
df_players_22 = players(2022)

# Join all the old data together into a table,
previous_season = [df_players_19,df_players_20,df_players_21,df_players_22]
df_players_1922_table = pd.concat(previous_season, ignore_index=True)

# Export table to .csv
# Export values
df_players_1922_table.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_players19-22_raw.csv", index=False, encoding = 'utf-8-sig')

### Current Season Data
Run the code below to add new data for the current season to the existing data above, then export the updated data.

In [34]:
# save new season table (Always updating)
df_players_23 = players(2023)

# read old season table (using pandas read excel)
previous_table = pd.read_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Old Season/temp_players19-22_raw.csv")

# join new and onld season (concat)
joined_season = pd.concat([previous_table, df_players_23], ignore_index=True)

# export join (export and save as old+new updated table)
joined_season.to_csv("/Users/Razak/Desktop/Arsenal Database Project/scraped csv files/Raw csv/Updated tables/temp_updatedplayers_raw.csv", index=False, encoding = 'utf-8-sig')