In [None]:
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

In [None]:
###GLOBAL VARIABLES####
K = 32
starting_ELO = 1000
highest_ELO_ever = [starting_ELO, "team", "season", "week"]

In [None]:
def calc_new_ELO(season: str, week: str, home_team: str, away_team: str, ELO_dict : dict, result: str, K:int, top_rating, performance_data: dict): 
    prior_ELO_home = ELO_dict[home_team]
    prior_ELO_away = ELO_dict[away_team]
    expected_result_home = 1 / (1 + np.power(10, ((prior_ELO_away - prior_ELO_home)/ 400)))
    expected_result_away = 1 / (1 + np.power(10, ((prior_ELO_home - prior_ELO_away)/ 400)))
    performance_data[home_team][0].append(prior_ELO_away)
    performance_data[away_team][0].append(prior_ELO_home)
    
    if result == "H": 
        performance_data[home_team][1][0] += 1
        S = 1.
    elif result == "A":
        performance_data[away_team][1][0] += 1
        S = 0.
    else: 
        performance_data[home_team][1][0] += 0.5
        performance_data[away_team][1][0] += 0.5
        S = 0.5
    new_ELO_home = prior_ELO_home + K*(S - expected_result_home)
    new_ELO_away = prior_ELO_away + K*(1-S - expected_result_away)
    if new_ELO_home > top_rating[len(top_rating)-1][0]: 
        top_rating.append([new_ELO_home, home_team, season, week])
        #print("The highest ELO record has been broken by ", home_team, " after a home victory against ", away_team, " on match-week ", week, " of the ", season , "season " )
        #print(" the new top elo ever is ", new_ELO_home)
    elif new_ELO_away > top_rating[len(top_rating)-1][0]: 
        top_rating.append([new_ELO_away, away_team, season, week])
        #print("The highest ELO record has been broken by ", away_team, " after a away victory against ", home_team, " on match-week ", week, " of the ", season , "season " )
        #print(" the new top elo ever is ", new_ELO_away)

    ELO_dict[home_team] = new_ELO_home
    ELO_dict[away_team] = new_ELO_away

    return ELO_dict, top_rating, performance_data

In [None]:
def expected_score(opponent_ratings: list[float], own_rating: float) -> float:
    """How many points we expect to score in a tourney with these opponents"""
    return sum(
        1 / (1 + 10**((opponent_rating - own_rating) / 400))
        for opponent_rating in opponent_ratings
    )


def performance_rating(opponent_ratings: list[float], score: float) -> int:
    """Calculate mathematically perfect performance rating with binary search"""
    lo, hi = 0, 4000

    while hi - lo > 0.001:
        mid = (lo + hi) / 2

        if expected_score(opponent_ratings, mid) < score:
            lo = mid
        else:
            hi = mid

    return round(mid)


print(performance_rating([1851, 2457, 1989, 2379, 2407], 4))  # should be 2551

In [None]:
raw_data = np.genfromtxt("premier_league_data/premier-league-matches.csv", delimiter=",", skip_header=True, dtype=str)


In [None]:
needed_data = raw_data[:,[0,1,3,6,7]] #We only need the season, round, home team, away team and result
#The elo enginge doesn't consider the margin by which a team won. 
#We choose the round instead of the date, because it is esier to work with. 
#We therefore pretend that a match in round 5 always happened before a match in round 4, even though this isn't always true

In [None]:
all_teams = np.unique(needed_data[:,3])
print("All the teams who ever play on home ground ", all_teams)
#We look at all the unique teams, and are blessed with the fact, that there are no duplicates or weird names
#The data seems good. 

In [None]:
ELO_of_teams = {value: starting_ELO for value in all_teams}
#Now we give all teams their starting ELO

In [None]:
first_season = needed_data[needed_data[:,0] == "1993"]

In [None]:
first_season_sorted = first_season[first_season[:,1].astype(int).argsort()]
#Take out only first season to test on

In [None]:
ELO_of_teams = {value: starting_ELO for value in all_teams}
highest_ELO_ever = [[starting_ELO, "team", "season", "week"]]
performance_data = {value : [[],[0]] for value in all_teams}
#Performance data holds first opponents ratings prior to all games, own rating prior to all games and total points scored


for match in first_season_sorted: 
    ELO_of_teams, highest_ELO_ever, performance_data = calc_new_ELO(match[0], match[1], match[2], match[3], ELO_of_teams, match[4] , 32, highest_ELO_ever, performance_data)

In [None]:
performance_of_teams = {value: 0 for value in all_teams}
for team in performance_data: 
    performance_of_teams[team] = performance_rating(performance_data[team][0], performance_data[team][1][0])

In [None]:
print("The team with the highest performance rating over the 1992/93 season is", max(performance_of_teams, key=performance_of_teams.get)
, "with a performance rating of", max(performance_of_teams.values()))


In [None]:
sorted_matches = sorted(needed_data, key=lambda x: (x[0].astype(int), x[1].astype(int)))
#I sort the games by match-week
sorted_matches_np = np.array(sorted_matches)

In [None]:
ELO_of_teams = {team: starting_ELO for team in all_teams}
highest_ELO_ever = [[starting_ELO, "team", "season", "week"]]
current_year = sorted_matches_np[0,0].astype(int)
performance_data = {team : [[],[0]] for team in all_teams} #Holds the data necessary for calculating the performance rating after a season
historic_performance = {team : [] for team in all_teams} #Saves the performance ratings for the different seasons
i = 0
for match in sorted_matches_np: 
   if int(match[0]) != current_year: 
      for team in performance_data:
         historic_performance[team].append(performance_rating(performance_data[team][0], performance_data[team][1][0])) 
      performance_data = {team : [[],[0]] for team in all_teams}
      current_year = int(match[0])
   ELO_of_teams, highest_ELO_ever, performance_data = calc_new_ELO(match[0], match[1], match[2], match[3], ELO_of_teams, match[4] , 32, highest_ELO_ever, performance_data)




        


In [None]:
value_key_index_list = []
for key, array in historic_performance.items():
    for i, value in enumerate(array):
        value_key_index_list.append((value, key, i))

# Sort the list in descending order based on values
value_key_index_list.sort(reverse=True)

# Print the top ten values along with their corresponding keys and indices
print("Top Ten Performance ratings:")
for rank, (value, key, index) in enumerate(value_key_index_list[:10], start=1):
    print(f"{rank}. Performance rating: {value}, Team: {key}, Year: {index + 1993}")

In [None]:

#Fixing the highest ELO ever data, such that it can be plotted

highest_ELO_ever.append([highest_ELO_ever[len(highest_ELO_ever)-1][0], highest_ELO_ever[len(highest_ELO_ever)-1][1], '2023', '38'])
highest_ELO_ever = highest_ELO_ever[:][1:]
current_team = highest_ELO_ever[0][1]
for i in range(len(highest_ELO_ever)): 
    if highest_ELO_ever[i][1] != current_team: 
        highest_ELO_ever.append([highest_ELO_ever[i-1][0], current_team, highest_ELO_ever[i][2], highest_ELO_ever[i][3]])
        current_team = highest_ELO_ever[i][1]
highest_ELO_ever = np.array(highest_ELO_ever)


In [None]:
# Define colors for each team
team_colors = {
    'Arsenal' : 'darkred',
    'Chelsea': 'blue',
    'Coventry City': 'pink',
    'Liverpool': 'purple',
    'Manchester City': 'lightblue',
    'Manchester Utd' : 'red', 
    'Norwich City' : 'yellow', 
    'QPR' : 'green',    # Add more teams as needed
}

# Plotting
teams = highest_ELO_ever[:,1]


# Create a new figure
plt.figure()


# Loop over each team
for team in np.unique(teams):
    team_data = highest_ELO_ever[teams == team]
    # Get the data for this team    
    # Extract the elo, week, and year
    elo = team_data[:, 0].astype(float)
    week = team_data[:, 3].astype(float)
    year = team_data[:, 2].astype(float)
    # Plot this team's data with its corresponding color
    plt.plot( 52* year +week, elo, color=team_colors[team], label=team)

# Add labels, title, and legend
plt.xlabel('Weeks since year 0')
plt.ylabel('Highest Elo')
plt.title('Highest Elo over time')
plt.legend()

# Show the plot
plt.show()
