#### OBJECTIVES
- Use a football api to get English Premier league data for the season 2024/2025
- Calculate win probabilities of the teams
- Binomial probability of the teams winning the same number of games 

In [None]:
import requests 
import pandas as pd 
import numpy as np
from scipy import stats 
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Football data org api and get your api key

url = "https://api.football-data.org/v4/competitions/PL/standings"
headers = {"X-Auth-Token": "API KEY"}
params = {"season": 2024}

response = requests.get(url, headers = headers, params = params)
data = response.json()

#view all the json data 
#data 

In [None]:
# get specifics from the data - total standings and data in 'table'
standings_data = data['standings'][0]['table']
#standings_data to view the extracted specifics

#turning standings_data into a data frame using list comprehension
standings_df = pd.DataFrame([{
    'Position': team['position'],
    'Team': team['team']['name'],
    'Played': team['playedGames'],
    'Won': team['won'],
    'Drawn': team['draw'],
    'Lost': team['lost'],
    'Goals For': team['goalsFor'],
    'Goals Against': team['goalsAgainst'],
    'Points': team['points']
} for team in standings_data])

In [None]:
# FUNCTION TO CALCULATE THE WIN, DRAW AND LOSS RATE DURING THE SEASON AND THE BINOMIAL PROBABILITY OF WINNING THE SAME NO OF GAMES

def win_probability(df):
    # The rate at which the teams won during the season
    df['win_rate'] = df['Won']/df['Played']

    # Draw probability during the season
    df['draw_rate'] = df['Drawn']/df['Played']

    # Loss probability during the season
    df['loss_rate'] = df['Lost']/df['Played']

    # BINOMIAL PROBABILITIES
    n = df['Played']
    k = df['Won']
    p = df['win_rate']

    df['win'] = stats.binom.pmf(k, n, p)
    return df

In [None]:
win_probability1= win_probability(standings_df)

In [None]:
#Visialization of the win probability and the binomial probability of winning the same amount of games 
f,ax = plt.subplots(figsize = (14,6))
sns.set_theme(style='whitegrid')

df_sort = standings_df.sort_values('win_rate', ascending = False)

# THE WIN RATE PROBABILITY OF THE SEASON
sns.set_color_codes('pastel')
sns.barplot(x = df_sort['win_rate'], y = df_sort['Team'], color = 'orange')

# BINOMIAL WIN PROBABILITY
sns.set_color_codes('colorblind')
sns.barplot(x= df_sort['win'], y=df_sort['Team'], color='b')

ax.set(ylabel="Teams", xlabel="Probability")