## Transfer Expenditure and Income

In [3]:
# imports
import bs4 as bs
import requests
import pandas as pd
from tqdm import tqdm
import numpy as np
import pickle
import sys
import pyarrow as pa

In [4]:
def create_soup(url):
    headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
                         "AppleWebKit/605.1.15 (KHTML, like Gecko) "
                         "Version/15.4 Safari/605.1.15"}
    page = requests.get(url, headers=headers)
    soup = bs.BeautifulSoup(page.content,'html.parser')
    return soup

In [2]:
# loading in the transfer data
transfer_df = pd.read_csv('data/all_transfers_no_duplicates_updated.csv')
transfer_df.head()

Unnamed: 0,Season,Player,From_Club,To_Club,From_League,To_League,Fee
0,18/19,Riyad Mahrez,leicester-city,manchester-city,GB1,GB1,67800000.0
1,18/19,Ante Palaversa,hnk-hajduk-split,manchester-city,KR1,GB1,6300000.0
2,18/19,Philippe Sandler,pec-zwolle,manchester-city,NL1,GB1,2500000.0
3,18/19,Ko Itakura,kawasaki-frontale,manchester-city,JAP1,GB1,1100000.0
4,18/19,Daniel Arzani,melbourne-city-fc,manchester-city,AUS1,GB1,890000.0


In [5]:
url = 'https://www.transfermarkt.com/premier-league/startseite/wettbewerb/GB1/plus/?saison_id=2018'
soup = create_soup(url)

In [21]:
soup.find_all('table', class_='items')[0].find('tbody').find_all('tr')[0].find('td', class_='hauptlink no-border-links').find('a').get('href').split('/')[1]

'manchester-city'

In [41]:
years = ["2018", "2019", "2020", "2021", "2022"]
seasons = ["18/19","19/20", "20/21", "21/22", "22/23"]
dfs = []
league_id = 'FR1'
league_code = 'ligue-1'

for year, main_season in zip(years, seasons):

    url = f'https://www.transfermarkt.com/{league_code}/startseite/wettbewerb/{league_id}/plus/?saison_id={year}'
    soup = create_soup(url)
    table = soup.find_all('table', class_='items')[0].find('tbody').find_all('tr')

    club_codes = []
    club_names = []
    expenditures = []
    incomes = []

    for row in tqdm(table):
        club_section = row.find('td', class_='hauptlink no-border-links').find('a')
        club_name = club_section.string
        club_link = club_section.get('href')
        club_code = club_link.split('/')[1]

        mask_exp = (transfer_df['Season'] == main_season) & (transfer_df['To_Club'] == club_code)
        exp = np.sum(transfer_df.loc[mask_exp, ]['Fee'])

        mask_inc = (transfer_df['Season'] == main_season) & (transfer_df['From_Club'] == club_code)
        inc = np.sum(transfer_df.loc[mask_inc, ]['Fee'])

        club_codes.append(club_code)
        club_names.append(club_name)
        expenditures.append(exp)
        incomes.append(inc)
            
    data = {'Season': [main_season]*len(club_codes),
            'League': [league_id]*len(club_codes),
            'club_code': club_codes, 
            'club_name': club_names,
            'Expenditure': expenditures,
            'Income': incomes}
    df = pd.DataFrame(data)
    dfs.append(df)

100%|██████████| 20/20 [00:00<00:00, 614.11it/s]
100%|██████████| 20/20 [00:00<00:00, 526.08it/s]
100%|██████████| 20/20 [00:00<00:00, 643.62it/s]
100%|██████████| 20/20 [00:00<00:00, 295.33it/s]
100%|██████████| 20/20 [00:00<00:00, 366.56it/s]


In [42]:
main_df = pd.concat(dfs)

# Display the DataFrame using df.head()
main_df.head()

Unnamed: 0,Season,League,club_code,club_name,Expenditure,Income
0,18/19,FR1,fc-paris-saint-germain,Paris Saint-Germain,262000000.0,109500000.0
1,18/19,FR1,olympique-lyon,Olympique Lyon,46450000.0,90100000.0
2,18/19,FR1,as-monaco,AS Monaco,145400000.0,361100000.0
3,18/19,FR1,losc-lille,LOSC Lille,29100000.0,67700000.0
4,18/19,FR1,olympique-marseille,Olympique Marseille,66000000.0,32850000.0


In [43]:
# saving dataframe as csv file
main_df.to_csv('L1_exp_inc.csv', index=False)

In [44]:
# combining csv files
file_names = ['PL_exp_inc.csv', 'LL_exp_inc.csv', 'BL_exp_inc.csv', 'SA_exp_inc.csv', 'L1_exp_inc.csv']
dfs = []

for file_name in file_names:
    df = pd.read_csv(file_name)
    dfs.append(df)

combined_df = pd.concat(dfs, ignore_index=True)
combined_df.to_csv('exp_inc.csv', index=False)