In [1]:
import pandas as pd

# Load data from CSV files into DataFrames
df_seasons_statistics_table = pd.read_csv('seasons_statistics_table.csv')
df_seasons_statistics_transfers = pd.read_csv('seasons_statistics_transfers.csv')

In [2]:
# Clean 'Teams' column in df_seasons_statistics_table
df_seasons_statistics_table['Teams'] = (
    df_seasons_statistics_table['Teams'].str.replace('\n', '', regex=False)
    .str.replace('\xa0', '', regex=False)  
    .str.strip()  
)

df_seasons_statistics_table

Unnamed: 0,Teams,Places,Matches,Wins,Draws,Loses,Goals,Goals Diff,Points,Year
0,Man City,1,38,28,7,3,96:34,62,91,23/24
1,Arsenal,2,38,28,5,5,91:29,62,89,23/24
2,Liverpool,3,38,24,10,4,86:41,45,82,23/24
3,Aston Villa,4,38,20,8,10,76:61,15,68,23/24
4,Tottenham,5,38,20,6,12,74:61,13,66,23/24
...,...,...,...,...,...,...,...,...,...,...
395,Portsmouth,16,38,10,9,19,43:59,-16,39,04/05
396,West Brom,17,38,6,16,16,36:61,-25,34,04/05
397,Crystal Palace,18,38,7,12,19,41:62,-21,33,04/05
398,Norwich,19,38,7,12,19,42:77,-35,33,04/05


In [3]:
# Standardize team names for consistent sorting and mapping replaces 'AFC Bournemouth' with 'Bournemouth' to ensure that
df_seasons_statistics_transfers['Teams'] = df_seasons_statistics_transfers['Teams'].replace('AFC Bournemouth', 'Bournemouth')


In [4]:
# Get unique teams from both DataFrames
unique_teams_table = df_seasons_statistics_table['Teams'].unique()
unique_teams_transfers = df_seasons_statistics_transfers['Teams'].unique() 

In [5]:
# Sort unique team names for consistent order and accurate comparisons
unique_teams_table.sort()
unique_teams_transfers.sort() 

In [6]:
# Create a mapping dictionary from adjusted team names to original team names
mapping_dict = dict(zip(unique_teams_transfers, unique_teams_table))
mapping_dict

{'Arsenal FC': 'Arsenal',
 'Aston Villa': 'Aston Villa',
 'Birmingham City': 'Birmingham',
 'Blackburn Rovers': 'Blackburn',
 'Blackpool FC': 'Blackpool',
 'Bolton Wanderers': 'Bolton',
 'Bournemouth': 'Bournemouth',
 'Brentford FC': 'Brentford',
 'Brighton & Hove Albion': 'Brighton',
 'Burnley FC': 'Burnley',
 'Cardiff City': 'Cardiff',
 'Charlton Athletic': 'Charlton',
 'Chelsea FC': 'Chelsea',
 'Crystal Palace': 'Crystal Palace',
 'Derby County': 'Derby',
 'Everton FC': 'Everton',
 'Fulham FC': 'Fulham',
 'Huddersfield Town': 'Huddersfield',
 'Hull City': 'Hull City',
 'Leeds United': 'Leeds',
 'Leicester City': 'Leicester',
 'Liverpool FC': 'Liverpool',
 'Luton Town': 'Luton',
 'Manchester City': 'Man City',
 'Manchester United': 'Man Utd',
 'Middlesbrough FC': 'Middlesbrough',
 'Newcastle United': 'Newcastle',
 'Norwich City': 'Norwich',
 'Nottingham Forest': 'Nottm Forest',
 'Portsmouth FC': 'Portsmouth',
 'Queens Park Rangers': 'QPR',
 'Reading FC': 'Reading',
 'Sheffield United

In [7]:
# Update team names in df_seasons_statistics_transfers based on the mapping dictionary
df_seasons_statistics_transfers['Teams'] = df_seasons_statistics_transfers['Teams'].map(mapping_dict).fillna(df_seasons_statistics_transfers['Teams'])
df_seasons_statistics_transfers

Unnamed: 0,Teams,Spend,Balance,Year
0,Chelsea,€464.10m,€-186.60m,23/24
1,Tottenham,€272.10m,€-151.40m,23/24
2,Man City,€259.60m,€-133.80m,23/24
3,Arsenal,€235.10m,€-165.90m,23/24
4,Man Utd,€202.30m,€-143.96m,23/24
...,...,...,...,...
395,Blackburn,€7.88m,€-175k,04/05
396,Middlesbrough,€6.75m,€-3.30m,04/05
397,Crystal Palace,€5.15m,€-5.05m,04/05
398,Man City,€1.50m,€6.44m,04/05


In [8]:
# Merge DataFrames on 'Teams' and 'Year' columns to align data and include only common entries
df_merged_data = pd.merge(df_seasons_statistics_table, df_seasons_statistics_transfers, on=['Teams', 'Year'], how='inner')
df_merged_data

Unnamed: 0,Teams,Places,Matches,Wins,Draws,Loses,Goals,Goals Diff,Points,Year,Spend,Balance
0,Man City,1,38,28,7,3,96:34,62,91,23/24,€259.60m,€-133.80m
1,Arsenal,2,38,28,5,5,91:29,62,89,23/24,€235.10m,€-165.90m
2,Liverpool,3,38,24,10,4,86:41,45,82,23/24,€172.00m,€-111.30m
3,Aston Villa,4,38,20,8,10,76:61,15,68,23/24,€111.50m,€-78.95m
4,Tottenham,5,38,20,6,12,74:61,13,66,23/24,€272.10m,€-151.40m
...,...,...,...,...,...,...,...,...,...,...,...,...
395,Portsmouth,16,38,10,9,19,43:59,-16,39,04/05,€10.85m,€-4.25m
396,West Brom,17,38,6,16,16,36:61,-25,34,04/05,€15.85m,€-13.90m
397,Crystal Palace,18,38,7,12,19,41:62,-21,33,04/05,€5.15m,€-5.05m
398,Norwich,19,38,7,12,19,42:77,-35,33,04/05,€8.53m,€-8.08m


In [9]:
# Function to convert spend and balance values
def convert_currency(value):
    value = value.replace('€', '')
    if value == '-':
        return 0.0
    elif 'm' in value:
        return float(value.replace('m', '')) * 1e6
    elif 'k' in value:
        return float(value.replace('k', '')) * 1e3
    else:
        return float(value)

# Apply the conversion to both Spend and Balance columns
df_merged_data['Spend'] = df_merged_data['Spend'].apply(convert_currency)
df_merged_data['Balance'] = df_merged_data['Balance'].apply(convert_currency)
df_merged_data

Unnamed: 0,Teams,Places,Matches,Wins,Draws,Loses,Goals,Goals Diff,Points,Year,Spend,Balance
0,Man City,1,38,28,7,3,96:34,62,91,23/24,259600000.0,-133800000.0
1,Arsenal,2,38,28,5,5,91:29,62,89,23/24,235100000.0,-165900000.0
2,Liverpool,3,38,24,10,4,86:41,45,82,23/24,172000000.0,-111300000.0
3,Aston Villa,4,38,20,8,10,76:61,15,68,23/24,111500000.0,-78950000.0
4,Tottenham,5,38,20,6,12,74:61,13,66,23/24,272100000.0,-151400000.0
...,...,...,...,...,...,...,...,...,...,...,...,...
395,Portsmouth,16,38,10,9,19,43:59,-16,39,04/05,10850000.0,-4250000.0
396,West Brom,17,38,6,16,16,36:61,-25,34,04/05,15850000.0,-13900000.0
397,Crystal Palace,18,38,7,12,19,41:62,-21,33,04/05,5150000.0,-5050000.0
398,Norwich,19,38,7,12,19,42:77,-35,33,04/05,8530000.0,-8080000.0
