In [None]:
import requests
import os
from dotenv import load_dotenv
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math

# Load API key from .env file
load_dotenv()
api_key = os.getenv("Football_API_TOKEN")

# API endpoint for EPL 2024/2025 standings
url = "https://api.football-data.org/v4/competitions/PL/standings"
headers = {"X-Auth-Token": api_key}

# Fetch data from API
response = requests.get(url, headers=headers)
data = response.json()

# Extract standings data
standings = data['standings'][0]['table']  # Assuming 'total' standings table
teams_data = []
for team in standings:
    team_info = {
        'Team': team['team']['name'],
        'Played': team['playedGames'],
        'Won': team['won'],
        'Points': team['points']
    }
    teams_data.append(team_info)

# Create DataFrame
df = pd.DataFrame(teams_data)

# Calculate remaining games (total 38 matches per team in EPL)
total_games = 38
df['Remaining'] = total_games - df['Played']

# Calculate win probability using Bernoulli distribution
def calculate_win_probability(wins, played, remaining):
    if played == 0:  #
        return 0
    p = wins / played  # Current win rate
    # Expected wins in remaining games
    expected_wins = p * remaining
    total_expected_wins = wins + expected_wins
    # Use Bernoulli to estimate probability of achieving a high win total (e.g., 30 wins for EPL title)
    target_wins = 30  # Typical wins needed to win EPL
    if played + remaining < target_wins:
        return 0
    probability = math.comb(remaining, int(target_wins - wins)) * (p ** (target_wins - wins)) * ((1 - p) ** (remaining - (target_wins - wins)))
    return round(probability, 6)

# Apply win probability calculation
df['Win_Probability'] = df.apply(lambda x: calculate_win_probability(x['Won'], x['Played'], x['Remaining']), axis=1)

# Sort DataFrame by points and win probability
df = df.sort_values(by=['Points', 'Win_Probability'], ascending=False)

# Visualization 1: Team Standings Table
plt.figure(figsize=(12, 8))
sns.set(style="whitegrid")
table_data = df[['Team', 'Points', 'Played', 'Won']].copy()
table_data = table_data.reset_index(drop=True)
table_data.index += 1  # Start index at 1
plt.table(cellText=table_data.values,
          colLabels=table_data.columns,
          loc='center',
          cellLoc='center',
          colWidths=[0.4, 0.15, 0.15, 0.15])
plt.axis('off')
plt.title('EPL 2024/2025 Team Standings', fontsize=14)
plt.savefig('epl_standings_table.png', bbox_inches='tight', dpi=300)
plt.close()

# Visualization 2: Win Probability Bar Chart
plt.figure(figsize=(10, 6))
sns.barplot(x='Win_Probability', y='Team', data=df, palette='viridis')
plt.title('EPL 2024/2025 Title Win Probabilities', fontsize=14)
plt.xlabel('Probability of Winning EPL', fontsize=12)
plt.ylabel('Team', fontsize=12)
plt.tight_layout()
plt.savefig('epl_win_probabilities.png', dpi=300)
plt.close()

# Save standings and probabilities to CSV
df.to_csv('epl_standings_probabilities.csv', index=False)

print("Analysis complete. Check 'epl_standings_table.png' and 'epl_win_probabilities.png' for visualizations, and 'epl_standings_probabilities.csv' for data.")


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='Win_Probability', y='Team', data=df, palette='viridis')


Analysis complete. Check 'epl_standings_table.png' and 'epl_win_probabilities.png' for visualizations, and 'epl_standings_probabilities.csv' for data.


In [4]:
import json
import pandas as pd
import os
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

# Load data from api_response.json
try:
    with open("api_response.json", "r") as f:
        data = json.load(f)
except FileNotFoundError:
    print("Error: 'api_response.json' not found. Please ensure the file exists.")
    exit()
except json.JSONDecodeError:
    print("Error: Invalid JSON in 'api_response.json'. Please check the file.")
    exit()

# Extract standings data
try:
    standings = data['standings'][0]['table']
except (KeyError, IndexError):
    print("Error: Unexpected data structure in 'api_response.json'. Expected 'standings[0][table]'.")
    print("Available keys:", list(data.keys()))
    exit()

# Process team data
teams_data = []
for team in standings:
    team_info = {
        'Team': team['team'].get('name', 'Unknown'),
        'Played': team.get('playedGames', 0),
        'Won': team.get('won', 0),
        'Points': team.get('points', 0)
    }
    teams_data.append(team_info)

# Create DataFrame/Table
df = pd.DataFrame(teams_data)
print("Raw DataFrame:")
print(df[['Team', 'Played', 'Won', 'Points']].head())

# Check for zero points or games played
if df['Played'].sum() == 0 or df['Points'].sum() == 0:
    print("Warning: All teams have zero points or games played. Check if season data is populated.")
    print("Consider using historical data or adjusting the season parameter.")

# Calculate remaining games (total 38 matches per team in EPL)
total_games = 38
df['Remaining'] = total_games - df['Played']

# Calculate win probability using Poisson distribution
def calculate_win_probability_poisson(wins, played, remaining):
    if played == 0:
        # Use a prior for teams with no games played (e.g., average win rate)
        win_rate = 0.3  # Assume average EPL win rate ~0.3
    else:
        win_rate = wins / (played + 1e-6)  # Avoid division by zero
    expected_wins = win_rate * remaining + wins
    # Estimate probability of achieving 25+ wins (adjusted for EPL title)
    prob = 1 - stats.poisson.cdf(24, expected_wins)
    return round(prob, 6)

df['Win_Probability'] = df.apply(lambda x: calculate_win_probability_poisson(x['Won'], x['Played'], x['Remaining']), axis=1)

# Normalize probabilities to sum to 1
total_prob = df['Win_Probability'].sum()
if total_prob > 0:
    df['Win_Probability'] = df['Win_Probability'] / total_prob
else:
    print("Warning: All probabilities are zero. Using uniform probabilities as fallback.")
    df['Win_Probability'] = 1 / len(df)  # Uniform distribution if all zero

# Sort DataFrame by points and win probability
df = df.sort_values(by=['Points', 'Win_Probability'], ascending=False)

# Save DataFrame to CSV
df.to_csv('epl_standings_probabilities.csv', index=False)

# Visualization 1: Team Standings Table
plt.figure(figsize=(12, 8))
sns.set(style="whitegrid")
table_data = df[['Team', 'Points', 'Played', 'Won']].copy()
table_data = table_data.reset_index(drop=True)
table_data.index += 1  # Start index at 1
plt.table(cellText=table_data.values,
          colLabels=table_data.columns,
          loc='center',
          cellLoc='center',
          colWidths=[0.4, 0.15, 0.15, 0.15])
plt.axis('off')
plt.title('EPL 2024/2025 Team Standings', fontsize=14)
plt.savefig('epl_standings_table.png', bbox_inches='tight', dpi=300)
plt.close()

# Visualization 2: Win Probability Bar Chart
plt.figure(figsize=(10, 6))
sns.barplot(x='Win_Probability', y='Team', data=df, palette='viridis')
plt.title('EPL 2024/2025 Title Win Probabilities', fontsize=14)
plt.xlabel('Probability of Winning EPL', fontsize=12)
plt.ylabel('Team', fontsize=12)
plt.tight_layout()
plt.savefig('epl_win_probabilities.png', dpi=300)
plt.close()

print("Analysis complete. Check 'epl_standings_table.png' and 'epl_win_probabilities.png' for visualizations, and 'epl_standings_probabilities.csv' for data.")

Raw DataFrame:
                  Team  Played  Won  Points
0         Liverpool FC      38   25      84
1           Arsenal FC      38   20      74
2   Manchester City FC      38   21      71
3           Chelsea FC      38   20      69
4  Newcastle United FC      38   20      66



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `y` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='Win_Probability', y='Team', data=df, palette='viridis')


Analysis complete. Check 'epl_standings_table.png' and 'epl_win_probabilities.png' for visualizations, and 'epl_standings_probabilities.csv' for data.
