In [1]:
import pandas as pd
df = pd.read_csv('data/machine_learning/predictions/SVC.csv')
df

Unnamed: 0,team_country,team,opponent,is_home,predicted_result
0,ENG,Burnley,Manchester City,1,L
1,ENG,Arsenal,Nottingham,1,W
2,ENG,Bournemouth,West Ham,1,D
3,ENG,Sheffield Utd,Crystal Palace,1,L
4,ENG,Brighton,Luton,1,W
...,...,...,...,...,...
3497,FRA,Clermont,Lorient,0,L
3498,FRA,Strasbourg,Lyon,0,L
3499,FRA,PSG,Metz,0,W
3500,FRA,Nantes,Monaco,0,L


In [2]:
df['match_id'] = df.apply(lambda x: '_'.join(sorted([x['team'], x['opponent']])), axis=1)
home_df = df[df['is_home'] == 1].copy()
away_df = df[df['is_home'] == 0].copy()
home_df = home_df.rename(columns={'team_country': 'country', 'team': 'home_team', 'opponent': 'away_team', 'predicted_result': 'home_predicted_result'}).drop(['is_home'], axis=1)
away_df['predicted_result'].replace({'W': 'L', 'L': 'W'}, inplace=True)
away_df = away_df.rename(columns={'team_country': 'country', 'team': 'away_team', 'opponent': 'home_team', 'predicted_result': 'home_predicted_result'}).drop(['is_home'], axis=1)

### Use how="outer" in next cell to show conflict scores

In [3]:
# full_game_df = pd.merge(home_df, away_df, how='outer')
full_game_df = pd.merge(home_df, away_df)
full_game_df

Unnamed: 0,country,home_team,away_team,home_predicted_result,match_id
0,ENG,Burnley,Manchester City,L,Burnley_Manchester City
1,ENG,Arsenal,Nottingham,W,Arsenal_Nottingham
2,ENG,Bournemouth,West Ham,D,Bournemouth_West Ham
3,ENG,Sheffield Utd,Crystal Palace,L,Crystal Palace_Sheffield Utd
4,ENG,Brighton,Luton,W,Brighton_Luton
...,...,...,...,...,...
1624,FRA,Lorient,Clermont,W,Clermont_Lorient
1625,FRA,Lyon,Strasbourg,W,Lyon_Strasbourg
1626,FRA,Metz,PSG,L,Metz_PSG
1627,FRA,Monaco,Nantes,W,Monaco_Nantes


In [4]:
match_id_counts = full_game_df.groupby('match_id').size()
duplicate_match_ids = match_id_counts[match_id_counts > 2]
print("Match IDs that appear more than twice:")
print(duplicate_match_ids)
duplicate_rows = full_game_df[full_game_df['match_id'].isin(duplicate_match_ids.index)]
display(duplicate_rows)

Match IDs that appear more than twice:
Series([], dtype: int64)


Unnamed: 0,country,home_team,away_team,home_predicted_result,match_id


In [6]:
def assign_points(row):
    if row['home_predicted_result'] == 'W':
        return pd.Series([3, 0])  # home win
    elif row['home_predicted_result'] == 'L':
        return pd.Series([0, 3])  # home loss
    else:
        return pd.Series([1, 1])  # draw
    
full_game_df[['home_points', 'away_points']] = full_game_df.apply(assign_points, axis=1)

# Preparing data for grouping
home_games = full_game_df[['country', 'home_team', 'home_points']].rename(columns={'home_team': 'team', 'home_points': 'points'})
away_games = full_game_df[['country', 'away_team', 'away_points']].rename(columns={'away_team': 'team', 'away_points': 'points'})

# Concatenating home and away games
all_games = pd.concat([home_games, away_games])

# Grouping by country and team and calculating total points and matches played
league_table = all_games.groupby(['country', 'team']).agg(
    total_points=pd.NamedAgg(column='points', aggfunc='sum'),
    matches_played=pd.NamedAgg(column='points', aggfunc='size')
).sort_values(by=['country', 'total_points'], ascending=[True, False])

for country, country_group in league_table.groupby(level='country'):
    print(f"League Table for {country}:")
    # Reset the index for the current group to make 'team' a regular column again
    country_group = country_group.reset_index(level='country', drop=True).reset_index()
    country_group.index = country_group.index + 1
    display(country_group[['team', 'total_points', 'matches_played']])
    print("\n")

League Table for ENG:


Unnamed: 0,team,total_points,matches_played
1,Manchester City,94,38
2,Arsenal,68,32
3,Tottenham,66,38
4,Liverpool,60,32
5,Manchester Utd,60,36
6,Chelsea,56,28
7,Newcastle,56,38
8,Aston Villa,44,34
9,Brighton,44,34
10,Wolves,44,34




League Table for FRA:


Unnamed: 0,team,total_points,matches_played
1,PSG,96,32
2,Lyon,72,34
3,Monaco,72,32
4,Rennes,72,34
5,Marseille,60,30
6,Lille,52,30
7,Nice,52,30
8,Lens,46,32
9,Lorient,36,32
10,Reims,36,32




League Table for GER:


Unnamed: 0,team,total_points,matches_played
1,Bayern Munich,88,32
2,RB Leipzig,86,34
3,Bayer Leverkusen,72,32
4,Dortmund,68,28
5,Eintracht Frankfurt,56,30
6,Wolfsburg,52,32
7,B. Monchengladbach,46,30
8,Freiburg,46,30
9,Hoffenheim,38,34
10,Mainz,36,32




League Table for ITA:


Unnamed: 0,team,total_points,matches_played
1,Napoli,102,38
2,Inter,86,34
3,AC Milan,82,34
4,Juventus,82,38
5,Atalanta,65,35
6,AS Roma,64,34
7,Lazio,64,34
8,Sassuolo,58,36
9,Fiorentina,57,33
10,Udinese,42,38




League Table for SPA:


Unnamed: 0,team,total_points,matches_played
1,Barcelona,102,38
2,Real Madrid,100,36
3,Atl. Madrid,82,34
4,Real Sociedad,78,34
5,Villarreal,62,34
6,Ath Bilbao,54,34
7,Betis,50,34
8,Osasuna,46,38
9,Celta Vigo,44,36
10,Girona,44,36




