<a href="https://colab.research.google.com/github/MetalTanuj/FPL-Analysis/blob/main/FPL-ELO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!git clone https://github.com/vaastav/Fantasy-Premier-League.git

Cloning into 'Fantasy-Premier-League'...
remote: Enumerating objects: 335941, done.[K
remote: Counting objects: 100% (94992/94992), done.[K
remote: Compressing objects: 100% (34403/34403), done.[K
remote: Total 335941 (delta 50625), reused 93204 (delta 49931), pack-reused 240949 (from 1)[K
Receiving objects: 100% (335941/335941), 134.97 MiB | 16.04 MiB/s, done.
Resolving deltas: 100% (171596/171596), done.
Updating files: 100% (14264/14264), done.


In [7]:
import pandas as pd
import numpy as np
import plotly.express as px

# Define team colors
team_colors = {
    'Arsenal': '#EF0107',      # Red
    'Aston Villa': '#670E36',  # Claret
    'Bournemouth': '#DA291C',  # Red
    'Brentford': '#E30613',    # Red
    'Brighton': '#0057B8',     # Blue
    'Burnley': '#6C1D45',      # Claret
    'Chelsea': '#034694',      # Blue
    'Crystal Palace': '#1B458F', # Blue
    'Everton': '#003399',      # Blue
    'Fulham': '#CC0000',       # Red
    'Leeds': '#FFCD00',        # Yellow
    'Leicester': '#003090',    # Blue
    'Liverpool': '#C8102E',    # Red
    'Luton': '#FF6600',        # Orange
    'Man City': '#6CABDD',     # Sky Blue
    'Man Utd': '#DA291C',      # Red
    'Newcastle': '#241F20',    # Black
    'Norwich': '#FFF200',      # Yellow
    "Nott'm Forest": '#DD0000',# Red
    'Sheffield Utd': '#EE2737',# Red
    'Southampton': '#D71920',  # Red
    'Spurs': '#132257',        # Navy Blue
    'Watford': '#FFEE00',      # Yellow
    'West Brom': '#122F67',    # Navy Blue
    'West Ham': '#7A263A',     # Claret
    'Wolves': '#FDB913',       # Gold
}


def get_team_name(year):
    team_name = pd.read_csv("/content/Fantasy-Premier-League/data/" + year + "/teams.csv")
    team_name['id'] = [i for i in range(1, 21)]
    return team_name

raw_team = {'id': [], 'name': [], 'year': []}
for i in ['2019-20', '2020-21', '2021-22', '2022-23', '2023-24','2024-25']:
    data = get_team_name(i)
    raw_team['id'] += list(data['id'].values)
    raw_team['name'] += list(data['name'].values)
    raw_team['year'] += [i for j in range(20)]

teams = pd.DataFrame(raw_team)
teams.sort_values('name').name.unique()
columns = teams.sort_values('name').name.unique()
teams_agg = pd.DataFrame(data=np.array([1500 for i in columns]), columns=['elo'])
teams_agg['name'] = columns
teams_agg['date'] = ['2019-08-08T19:00:00Z' for i in columns]
teams_agg['year'] = ['2019' for i in columns]

master_team = teams_agg.reset_index(names='Master_id')[['Master_id', 'name']]
master_team = pd.merge(teams, master_team, on=['name'])

def update_elo(team_h_elo, team_a_elo, team_h_score, team_a_score,proba_only=False):
    SA = 0.5
    SB = 0.5
    PA = 1 / (1 + 10**((team_a_elo - team_h_elo) / 400))
    PB = 1 / (1 + 10**((team_h_elo - team_a_elo) / 400))
    if proba_only:
      return PA,PB
    if team_h_score > team_a_score:
        SA = 1
        SB = 0
    elif team_h_score < team_a_score:
        SA = 0
        SB = 1

    team_h_elo = team_h_elo + 50 * (SA - PA)
    team_a_elo = team_a_elo + 50 * (SB - PB)
    return team_h_elo, team_a_elo

def update_elo_df(row, year):
    team_h_name = master_team[(master_team['id'] == row['team_h']) & (master_team['year'] == year)]['name'].values[0]
    team_a_name = master_team[(master_team['id'] == row['team_a']) & (master_team['year'] == year)]['name'].values[0]
    op = update_elo(teams_agg[teams_agg['name'] == team_h_name]['elo'].values[-1],
                    teams_agg[teams_agg['name'] == team_a_name]['elo'].values[-1],
                    row['team_h_score'], row['team_a_score'])
    data_dict = {
        'elo': [op[0], op[1]],
        'name': [team_h_name, team_a_name],
        'date': [row['kickoff_time'], row['kickoff_time']],
        'year': [year, year]
    }
    return pd.DataFrame(data_dict)

elo_list = pd.DataFrame()
for year in ['2019-20', '2020-21', '2021-22', '2022-23', '2023-24']:
    fixtures = pd.read_csv("/content/Fantasy-Premier-League/data/" + year + "/fixtures.csv")
    fixtures = fixtures.where((fixtures['started'] == True) & (fixtures['finished'] == True))
    for i in fixtures['event'].unique():
        nth_round = fixtures[fixtures['event'] == i]
        x = nth_round.apply(update_elo_df, args=(year,), axis=1)
        teams_agg = pd.concat([teams_agg, pd.DataFrame(np.array(x.to_list()).reshape(-1, 4), columns=['elo', 'name', 'date', 'year'])]).reset_index(drop=True)

# Plot with team colors
teams_agg['color'] = teams_agg['name'].map(team_colors)

fig = px.line(teams_agg, y='elo', x='date', color='name', line_dash='name', line_shape='linear',
              color_discrete_map=team_colors)
fig.update_layout(
    title='Elo Rating Vs Time',

    height=650,
    width=625*2
)
fig.show()

In [None]:
latest=pd.read_csv("/content/Fantasy-Premier-League/data/2024-25/fixtures.csv")
latest=latest[latest['event']==1]
predictions=pd.DataFrame(columns=['Home Team vs Away Team','Probability of A','Probability of B'])
for fix in range(latest.shape[0]):
  row=latest.iloc[fix]
  year='2024-25'
  team_h_name = master_team[(master_team['id'] == row['team_h']) & (master_team['year'] == year)]['name'].values[0]
  team_a_name = master_team[(master_team['id'] == row['team_a']) & (master_team['year'] == year)]['name'].values[0]
  op = update_elo(teams_agg[teams_agg['name'] == team_h_name]['elo'].values[-1],
                  teams_agg[teams_agg['name'] == team_a_name]['elo'].values[-1],
                  row['team_h_score'], row['team_a_score'],True)
  predictions=pd.concat([predictions,pd.DataFrame([[team_h_name+' vs ' +team_a_name,op[0],op[1]]],columns=['Home Team vs Away Team','Probability of A','Probability of B'])])



The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



In [None]:
import plotly.graph_objects as go

total_prob = predictions['Probability of A'].sum() + predictions['Probability of B'].sum()

fig = go.Figure()

fig.add_trace(go.Bar(
    y=predictions['Home Team vs Away Team'],
    x=predictions['Probability of A'],
    orientation='h',
    name='Home Team Win',
    marker=dict(color='blue')
))

fig.add_trace(go.Bar(
    y=predictions['Home Team vs Away Team'],
    x=predictions['Probability of B'],
    orientation='h',
    name='Away Team Win',
    marker=dict(color='red')
))


fig.update_layout(
    barmode='relative',
    title='Match Outcome Probabilities Home VS Away',
    xaxis_title='Probability',
    yaxis_title='Match',
    xaxis=dict(range=[0, 1]),
    height=600,
    width=800
)

fig.show()
