In [64]:
import pandas as pd
import matplotlib.pyplot as plt
import csv

In [65]:
df = pd.read_csv("../Datasets/premier-league-matches.csv")
df.head()

matches_23_24 = pd.read_csv("../Datasets/matches_23-24.csv")

for i in range(1, 39):
    match_week = f'Matchweek {i}'
    filtered_dataset = matches_23_24[(matches_23_24['Round'] == match_week) & (matches_23_24['Venue'] == 'Home')]
    for index, row in filtered_dataset.iterrows():
        if row['Result'] == 'W':
            result = 'H'
        elif row['Result'] == 'L':
            result = 'A'
        else:
            result = 'D'
        new_row = {'Season_End_Year': 2024, 'Wk':i, 'Date': row['Date'], 'Home': row['Team'], 'Away': row['Opponent'], 'HomeGoals': row['GF'], 'AwayGoals': row['GA'], 'FTR': result}
        new_row = pd.DataFrame([new_row])
        df = pd.concat([df, new_row], ignore_index=True)
        

df.to_csv("../Datasets/premier-league-matches.csv", index=False)
#creating season-wise database for simplicity
seasons = df['Season_End_Year'].unique()
season_data = {}

# Iterate through each season and create a DataFrame for each
for season_number, season_end_year in enumerate(seasons, start=1):
    season_df = df[df['Season_End_Year'] == season_end_year][['Date','Home', 'HomeGoals', 'AwayGoals', 'Away', 'FTR']]
    season_data[season_number] = season_df


    

In [66]:
team_rating = {}
#setup initial elo rating for all the team playing in 1993 premier league
for i in df['Home'].unique():
    team_rating[i] = {"rating": 1500, "played": False}

print(team_rating.keys())


dict_keys(['Coventry City', 'Leeds United', 'Sheffield Utd', 'Crystal Palace', 'Arsenal', 'Ipswich Town', 'Everton', 'Southampton', 'Chelsea', "Nott'ham Forest", 'Manchester City', 'Blackburn', 'Wimbledon', 'Tottenham', 'Liverpool', 'Aston Villa', 'Oldham Athletic', 'Middlesbrough', 'Norwich City', 'QPR', 'Manchester Utd', 'Sheffield Weds', 'Newcastle Utd', 'West Ham', 'Swindon Town', 'Leicester City', 'Bolton', 'Sunderland', 'Derby County', 'Barnsley', 'Charlton Ath', 'Watford', 'Bradford City', 'Fulham', 'Birmingham City', 'West Brom', 'Portsmouth', 'Wolves', 'Wigan Athletic', 'Reading', 'Hull City', 'Stoke City', 'Burnley', 'Blackpool', 'Swansea City', 'Cardiff City', 'Bournemouth', 'Brighton', 'Huddersfield', 'Brentford', 'Luton Town'])


In [67]:
# Function to update Elo ratings for home and away teams based on match result
def update_elo(home_team, away_team, ftr, k=32):
    home_elo = team_rating[home_team]["rating"] # Get current Elo rating for home team
    away_elo = team_rating[away_team]["rating"] # Get current Elo rating for away team

    # Calculate expected probabilities of home team winning and away team winning
    expected_home = 1 / (1 + 10**((away_elo - home_elo) / 400))
    expected_away = 1 / (1 + 10**((home_elo - away_elo) / 400))

    # Update Elo ratings based on match result
    if ftr == 'H': # Home team wins
        home_elo_new = home_elo + k*(1 - expected_home)
        away_elo_new = away_elo + k*(-expected_away)
    elif ftr == 'D': # Draw
        home_elo_new = home_elo + k*(0.5 - expected_home)
        away_elo_new = away_elo + k*(0.5 - expected_away)
    else: # Away team wins
        home_elo_new = home_elo + k*(-expected_home)
        away_elo_new = away_elo + k*(1 - expected_away)

    # Update Elo ratings and set "played" flag to True for both teams
    team_rating[home_team]["rating"] = round(home_elo_new)
    team_rating[away_team]["rating"] = round(away_elo_new)
    team_rating[home_team]["played"] = True
    team_rating[away_team]["played"] = True


In [68]:
# Open a CSV file for writing
with open("../Datasets/ELO_ratings.csv",mode="w") as csv_file:
    csv_writer = csv.writer(csv_file)
    csv_writer.writerow(['Date', 'Home Team', 'Away Team', 'Home Team Rating', 'Away Team Rating', 'Home Goals', 'Away Goals', 'Winner'])
    
    season_count = [i for i in range(1,32)]
    
    # Iterate through each season and each match in the season
    for season_num, season_df in season_data.items():
        for i in season_df.index:
            date = season_df.at[i, "Date"]
            home_team = season_df.at[i, "Home"]
            away_team = season_df.at[i, "Away"]
            home_goals = season_df.at[i, "HomeGoals"]
            away_goals = season_df.at[i, "AwayGoals"]
            ftr = season_df.at[i, "FTR"]

            # Write match details to the CSV file
            if (ftr == 'H'):
                csv_writer.writerow([date, home_team, away_team, team_rating[home_team]["rating"], team_rating[away_team]["rating"],home_goals,away_goals, 1])
            elif (ftr == 'A'):
                csv_writer.writerow([date, home_team, away_team, team_rating[home_team]["rating"], team_rating[away_team]["rating"],home_goals,away_goals, -1])
            else:
                csv_writer.writerow([date, home_team, away_team, team_rating[home_team]["rating"], team_rating[away_team]["rating"],home_goals,away_goals, 0])
            
            # Update Elo ratings based on match result
            update_elo(home_team, away_team, ftr)

    with open('../Datasets/ELO_Team_Ratings.csv', mode='w') as file:
        writer2 = csv.writer(file)
        writer2.writerow(['Team', 'Rating'])
        for team, data in team_rating.items():
            writer2.writerow([team, data['rating']])
