In [1]:
#LOAD GOAL STATS

import soccerdata as sd
understat = sd.Understat(leagues="ENG-Premier League", seasons="2023/2024")
team_match_stats = understat.read_team_match_stats()
team_match_stats.head()

import pandas as pd

# Assuming 'team_match_stats' is your DataFrame loaded from the code you've provided

# Calculating total xG for teams when they are at home
home_goals = team_match_stats.groupby('home_team')['home_goals'].sum().rename('total_home_goals_scored')

# Calculating total xG for teams when they are away
away_goals = team_match_stats.groupby('away_team')['away_goals'].sum().rename('total_away_goals_scored')

# Calculating total xGA (expected goals against) for teams when they are at home
home_goals_allowed = team_match_stats.groupby('home_team')['away_goals'].sum().rename('total_home_goals_allowed')

# Calculating total xGA for teams when they are away
away_goals_allowed = team_match_stats.groupby('away_team')['home_goals'].sum().rename('total_away_goals_allowed')

# Merging the above series into a single DataFrame
teams_goals_goalsallowed = pd.concat([home_goals, away_goals, home_goals_allowed, away_goals_allowed], axis=1)

# Filling NaN values with 0, in case some teams didn't play home or away games yet
teams_goals_goalsallowed = teams_goals_goalsallowed.fillna(0)

# Calculating overall total xG and xGA
teams_goals_goalsallowed['total_goals'] = teams_goals_goalsallowed['total_home_goals_scored'] + teams_goals_goalsallowed['total_away_goals_scored']
teams_goals_goalsallowed['total_goals_allowed'] = teams_goals_goalsallowed['total_home_goals_allowed'] + teams_goals_goalsallowed['total_away_goals_allowed']

teams_goals_goalsallowed


Unnamed: 0,total_home_goals_scored,total_away_goals_scored,total_home_goals_allowed,total_away_goals_allowed,total_goals,total_goals_allowed
Arsenal,46,42,15,13,88,28
Aston Villa,45,28,25,28,73,53
Bournemouth,26,26,26,37,52,63
Brentford,27,25,30,30,52,60
Brighton,29,24,23,34,53,57
Burnley,18,21,41,33,39,74
Chelsea,42,28,25,34,70,59
Crystal Palace,32,17,26,31,49,57
Everton,21,17,18,31,38,49
Fulham,31,20,20,35,51,55


In [2]:
#SET UP LAST 5 GAME GOAL STATS

import pandas as pd

# Assuming team_match_stats already loaded and contains the necessary columns

# Identify all unique teams
teams = pd.concat([team_match_stats['home_team'], team_match_stats['away_team']]).unique()

# Prepare a list to collect each team's average goals scored and allowed for home and away games
team_averages = []

# Iterate through each team
for team in teams:
    # Filter home games involving the current team
    team_home_games = team_match_stats[team_match_stats['home_team'] == team]
    # Sort home games by date in descending order and select the last 5
    team_last_5_home_games = team_home_games.sort_values(by='date', ascending=False).head(5)
    
    # Filter away games involving the current team
    team_away_games = team_match_stats[team_match_stats['away_team'] == team]
    # Sort away games by date in descending order and select the last 5
    team_last_5_away_games = team_away_games.sort_values(by='date', ascending=False).head(5)
    
    # Calculate average goals scored and allowed for home games
    team_home_goals_avg = team_last_5_home_games['home_goals'].mean()
    team_home_goals_allowed_avg = team_last_5_home_games['away_goals'].mean()
    
    # Calculate average goals scored and allowed for away games
    team_away_goals_avg = team_last_5_away_games['away_goals'].mean()
    team_away_goals_allowed_avg = team_last_5_away_games['home_goals'].mean()
    
    # Append the averages to the list
    team_averages.append({
        'Team': team,
        'Average Home Goals Last 5': team_home_goals_avg,
        'Average Home Goals Allowed Last 5': team_home_goals_allowed_avg,
        'Average Away Goals Last 5': team_away_goals_avg,
        'Average Away Goals Allowed Last 5': team_away_goals_allowed_avg
    })

# Convert the list of averages into a DataFrame
teams_goals_df = pd.DataFrame(team_averages)

# Display the DataFrame
teams_goals_df


Unnamed: 0,Team,Average Home Goals Last 5,Average Home Goals Allowed Last 5,Average Away Goals Last 5,Average Away Goals Allowed Last 5
0,Burnley,1.0,1.8,1.8,1.4
1,Arsenal,2.4,0.6,2.8,0.4
2,Bournemouth,2.4,1.2,1.0,1.6
3,Brighton,0.6,1.6,0.4,1.8
4,Everton,1.4,0.6,0.6,2.4
5,Newcastle United,3.4,1.0,1.6,2.0
6,Sheffield United,1.4,3.6,1.2,3.2
7,Brentford,1.0,0.6,2.0,1.8
8,Chelsea,3.8,1.0,1.4,2.4
9,Manchester United,2.0,1.4,1.4,2.8


In [3]:
#CALCULATE TOTAL MATCHES PLAYED FOR EACH TEAM

# Count the number of home matches played by each team
home_matches_played = team_match_stats.groupby('home_team')['home_ppda'].count().rename('home_matches_played')

# Count the number of away matches played by each team
away_matches_played = team_match_stats.groupby('away_team')['away_ppda'].count().rename('away_matches_played')

# Merging these counts into the teams_xg_xga DataFrame
teams_goals_goalsallowed = teams_goals_goalsallowed.merge(home_matches_played, how='left', left_index=True, right_index=True)
teams_goals_goalsallowed = teams_goals_goalsallowed.merge(away_matches_played, how='left', left_index=True, right_index=True)

# Filling NaN values with 0 in case some teams didn't play home or away games
teams_goals_goalsallowed['home_matches_played'] = teams_goals_goalsallowed['home_matches_played'].fillna(0)
teams_goals_goalsallowed['away_matches_played'] = teams_goals_goalsallowed['away_matches_played'].fillna(0)

# Calculating the total matches played by adding home and away matches
teams_goals_goalsallowed['matches_played'] = teams_goals_goalsallowed['home_matches_played'] + teams_goals_goalsallowed['away_matches_played']

teams_goals_goalsallowed


Unnamed: 0,total_home_goals_scored,total_away_goals_scored,total_home_goals_allowed,total_away_goals_allowed,total_goals,total_goals_allowed,home_matches_played,away_matches_played,matches_played
Arsenal,46,42,15,13,88,28,18,18,36
Aston Villa,45,28,25,28,73,53,18,18,36
Bournemouth,26,26,26,37,52,63,18,18,36
Brentford,27,25,30,30,52,60,18,18,36
Brighton,29,24,23,34,53,57,17,18,35
Burnley,18,21,41,33,39,74,18,18,36
Chelsea,42,28,25,34,70,59,18,17,35
Crystal Palace,32,17,26,31,49,57,18,18,36
Everton,21,17,18,31,38,49,18,18,36
Fulham,31,20,20,35,51,55,18,18,36


In [4]:
#CALCULATE LEAGUE WIDE AVERAGES


# Calculate the averages for each specified column
averages = teams_goals_goalsallowed.mean()

# Creating a new DataFrame with the averages to append it properly with a label
averages_df = pd.DataFrame([averages], index=['Average'])

# Append the averages row to your original DataFrame
teams_goals_goalsallowed_averaged = pd.concat([teams_goals_goalsallowed, averages_df])

teams_goals_goalsallowed_averaged

Unnamed: 0,total_home_goals_scored,total_away_goals_scored,total_home_goals_allowed,total_away_goals_allowed,total_goals,total_goals_allowed,home_matches_played,away_matches_played,matches_played
Arsenal,46.0,42.0,15.0,13.0,88.0,28.0,18.0,18.0,36.0
Aston Villa,45.0,28.0,25.0,28.0,73.0,53.0,18.0,18.0,36.0
Bournemouth,26.0,26.0,26.0,37.0,52.0,63.0,18.0,18.0,36.0
Brentford,27.0,25.0,30.0,30.0,52.0,60.0,18.0,18.0,36.0
Brighton,29.0,24.0,23.0,34.0,53.0,57.0,17.0,18.0,35.0
Burnley,18.0,21.0,41.0,33.0,39.0,74.0,18.0,18.0,36.0
Chelsea,42.0,28.0,25.0,34.0,70.0,59.0,18.0,17.0,35.0
Crystal Palace,32.0,17.0,26.0,31.0,49.0,57.0,18.0,18.0,36.0
Everton,21.0,17.0,18.0,31.0,38.0,49.0,18.0,18.0,36.0
Fulham,31.0,20.0,20.0,35.0,51.0,55.0,18.0,18.0,36.0


In [5]:
#CALCULATE PER GAME AVERAGES FOR GOAL STATS

import numpy as np

# Create a new DataFrame for per match averages
teams_per_match_averages = pd.DataFrame(index=teams_goals_goalsallowed_averaged.index)

# Calculate per match averages for home and away xG and xGA
teams_per_match_averages['per_match_home_goals'] = teams_goals_goalsallowed_averaged['total_home_goals_scored'] / teams_goals_goalsallowed_averaged['home_matches_played']
teams_per_match_averages['per_match_away_goals'] = teams_goals_goalsallowed_averaged['total_away_goals_scored'] / teams_goals_goalsallowed_averaged['away_matches_played']
teams_per_match_averages['per_match_home_goals_allowed'] = teams_goals_goalsallowed_averaged['total_home_goals_allowed'] / teams_goals_goalsallowed_averaged['home_matches_played']
teams_per_match_averages['per_match_away_goals_allowed'] = teams_goals_goalsallowed_averaged['total_away_goals_allowed'] / teams_goals_goalsallowed_averaged['away_matches_played']

# Calculate overall total xG and xGA per match
teams_per_match_averages['total_goals_per_match'] = teams_goals_goalsallowed_averaged['total_goals'] / teams_goals_goalsallowed_averaged['matches_played']
teams_per_match_averages['total_goals_allowed_per_match'] = teams_goals_goalsallowed_averaged['total_goals_allowed'] / teams_goals_goalsallowed_averaged['matches_played']

# Replace any potential infinite values with NaN (in case of division by zero) and then fill with 0
teams_per_match_averages.replace([np.inf, -np.inf], np.nan, inplace=True)
teams_per_match_averages.fillna(0, inplace=True)

# Calculate the average for each column
average_stats = teams_per_match_averages.mean()

# Append the 'Average' row with these averages to the DataFrame
teams_per_match_averages.loc['Average'] = average_stats

teams_per_match_averages


Unnamed: 0,per_match_home_goals,per_match_away_goals,per_match_home_goals_allowed,per_match_away_goals_allowed,total_goals_per_match,total_goals_allowed_per_match
Arsenal,2.555556,2.333333,0.833333,0.722222,2.444444,0.777778
Aston Villa,2.5,1.555556,1.388889,1.555556,2.027778,1.472222
Bournemouth,1.444444,1.444444,1.444444,2.055556,1.444444,1.75
Brentford,1.5,1.388889,1.666667,1.666667,1.444444,1.666667
Brighton,1.705882,1.333333,1.352941,1.888889,1.514286,1.628571
Burnley,1.0,1.166667,2.277778,1.833333,1.083333,2.055556
Chelsea,2.333333,1.647059,1.388889,2.0,2.0,1.685714
Crystal Palace,1.777778,0.944444,1.444444,1.722222,1.361111,1.583333
Everton,1.166667,0.944444,1.0,1.722222,1.055556,1.361111
Fulham,1.722222,1.111111,1.111111,1.944444,1.416667,1.527778


In [9]:
#GET LIST OF UPCOMING GAMES

# Load the full schedule
schedule_df = understat.read_schedule()

# Convert the 'date' column to datetime format, then format it to keep only the date part
schedule_df['date'] = pd.to_datetime(schedule_df['date']).dt.date

import pandas as pd

# Assuming 'schedule_df' is your DataFrame and it's already loaded

# Make sure the 'date' column is in datetime format
schedule_df['date'] = pd.to_datetime(schedule_df['date'])

# Define the start and end dates for filtering
start_date = pd.to_datetime('2024-05-10')
end_date = pd.to_datetime('2024-05-14')

# Filter the DataFrame for games within the date range, and use .copy() to avoid SettingWithCopyWarning
filtered_games = schedule_df[(schedule_df['date'] >= start_date) & (schedule_df['date'] <= end_date)].copy()

# Create a new column 'game_matchup' by concatenating 'home_team', 'vs.', and 'away_team'
filtered_games['game_matchup'] = filtered_games['home_team'] + ' vs. ' + filtered_games['away_team']

# Now, create a separate DataFrame that contains only the 'game_matchup' column
games_list_df = filtered_games[['game_matchup']].reset_index(drop=True)

# Display the new DataFrame to verify
print(games_list_df)

                                  game_matchup
0                    Bournemouth vs. Brentford
1                 Everton vs. Sheffield United
2                   Fulham vs. Manchester City
3                Newcastle United vs. Brighton
4                Nottingham Forest vs. Chelsea
5                        Tottenham vs. Burnley
6                           West Ham vs. Luton
7   Wolverhampton Wanderers vs. Crystal Palace
8                Manchester United vs. Arsenal
9                    Aston Villa vs. Liverpool
10               Tottenham vs. Manchester City


In [10]:
games_list_df

Unnamed: 0,game_matchup
0,Bournemouth vs. Brentford
1,Everton vs. Sheffield United
2,Fulham vs. Manchester City
3,Newcastle United vs. Brighton
4,Nottingham Forest vs. Chelsea
5,Tottenham vs. Burnley
6,West Ham vs. Luton
7,Wolverhampton Wanderers vs. Crystal Palace
8,Manchester United vs. Arsenal
9,Aston Villa vs. Liverpool


In [11]:
#CREATE HOME AND AWAY ATTACK "SCORES" BASED ON THEIR AVERAGES COMAPRED TO LEAGUE WIDE

import pandas as pd

# Assuming 'teams_per_match_averages' and 'games_list_df' are already defined

# Initialize an empty list for storing game metrics along with the matchup information
games_metrics_list = []

# Retrieve the 'Average' row for league-wide average stats
avg_stats = teams_per_match_averages.loc['Average']

# Looping through each game to calculate metrics using 'teams_per_match_averages'
for index, row in games_list_df.iterrows():
    # Extract home and away team names
    home_team, away_team = row['game_matchup'].split(' vs. ')
    
    # Retrieve team stats from 'teams_per_match_averages'
    home_team_stats = teams_per_match_averages.loc[home_team]
    away_team_stats = teams_per_match_averages.loc[away_team]
    
    # Calculate the metrics using the per match averages
    home_attack = home_team_stats['per_match_home_goals'] / avg_stats['per_match_home_goals']
    away_defence = away_team_stats['per_match_away_goals_allowed'] / avg_stats['per_match_away_goals_allowed']
    away_attack = away_team_stats['per_match_away_goals'] / avg_stats['per_match_away_goals']
    home_defence = home_team_stats['per_match_home_goals_allowed'] / avg_stats['per_match_home_goals_allowed']
    
    # Append the calculated metrics for this game to the list along with the game_matchup
    games_metrics_list.append({
        'Game Matchup': row['game_matchup'],
        'Home Attack': home_attack, 
        'Away Defence': away_defence,
        'Away Attack': away_attack, 
        'Home Defence': home_defence
    })

# Once all games are processed, create the DataFrame from the list
games_metrics_df = pd.DataFrame(games_metrics_list)
# Assuming 'games_metrics_df' already exists and contains the 'Game Matchup' column

# Splitting 'Game Matchup' into 'Home Team' and 'Away Team'
games_metrics_df[['Home Team', 'Away Team']] = games_metrics_df['Game Matchup'].str.split(' vs. ', expand=True)

# Merge home team averages
games_metrics_df = games_metrics_df.merge(
    teams_goals_df[['Team', 'Average Home Goals Last 5', 'Average Home Goals Allowed Last 5']],
    left_on='Home Team',
    right_on='Team',
    how='left'
).drop(columns=['Team'])  # Dropping the redundant 'Team' column after merge

# Merge away team averages
games_metrics_df = games_metrics_df.merge(
    teams_goals_df[['Team', 'Average Away Goals Last 5', 'Average Away Goals Allowed Last 5']],
    left_on='Away Team',
    right_on='Team',
    how='left',
    suffixes=('_home', '_away')
).drop(columns=['Team'])  # Dropping the redundant 'Team' column after merge

games_metrics_df


Unnamed: 0,Game Matchup,Home Attack,Away Defence,Away Attack,Home Defence,Home Team,Away Team,Average Home Goals Last 5,Average Home Goals Allowed Last 5,Average Away Goals Last 5,Average Away Goals Allowed Last 5
0,Bournemouth vs. Brentford,0.796973,0.920058,0.953031,0.993805,Bournemouth,Brentford,2.4,1.2,2.0,1.8
1,Everton vs. Sheffield United,0.643709,1.410755,0.60994,0.688019,Everton,Sheffield United,1.4,0.6,1.2,3.2
2,Fulham vs. Manchester City,0.950237,0.584507,1.574183,0.764466,Fulham,Manchester City,1.6,1.0,2.4,0.6
3,Newcastle United vs. Brighton,1.471334,1.042732,0.91491,0.802689,Newcastle United,Brighton,3.4,1.0,0.4,1.8
4,Nottingham Forest vs. Chelsea,0.76632,1.10407,1.130183,1.032029,Nottingham Forest,Chelsea,1.2,1.4,1.4,2.4
5,Tottenham vs. Burnley,1.168413,1.012064,0.800546,0.971321,Tottenham,Burnley,2.2,1.6,1.8,1.4
6,West Ham vs. Luton,0.858278,1.380087,0.876789,1.032029,West Ham,Luton,1.2,1.6,1.2,3.0
7,Wolverhampton Wanderers vs. Crystal Palace,0.76632,0.950727,0.648061,1.032029,Wolverhampton Wanderers,Crystal Palace,1.0,1.4,0.8,1.2
8,Manchester United vs. Arsenal,0.908765,0.398692,1.601092,1.011793,Manchester United,Arsenal,2.0,1.4,2.8,0.4
9,Aston Villa vs. Liverpool,1.379376,0.644041,1.296122,0.955582,Aston Villa,Liverpool,2.0,2.0,1.6,1.4


In [12]:
#FACTOR IN LAST 5 GAME AVERAGES AS ITS OWN SEPARATE WIDE COMPARED TO SEASONAL AVERAGES

import pandas as pd

# Assuming 'teams_per_match_averages', 'games_list_df', and 'teams_xg_xga_df' are already defined

# Retrieve season-long averages
season_avg_home_goals = teams_per_match_averages['per_match_home_goals'].mean()
season_avg_away_goals = teams_per_match_averages['per_match_away_goals'].mean()
season_avg_home_goals_allowed = teams_per_match_averages['per_match_home_goals_allowed'].mean()
season_avg_away_goals_allowed = teams_per_match_averages['per_match_away_goals_allowed'].mean()

# Initialize an empty list for storing game metrics along with the matchup information
games_metrics_list = []

# Loop through each game to calculate metrics using 'teams_per_match_averages'
for index, row in games_list_df.iterrows():
    home_team, away_team = row['game_matchup'].split(' vs. ')
    
    # Retrieve team stats from 'teams_per_match_averages'
    home_team_stats = teams_per_match_averages.loc[home_team]
    away_team_stats = teams_per_match_averages.loc[away_team]
    
    # Retrieve recent form from 'teams_xg_xga_df'
    home_recent_goals_scored = teams_goals_df.loc[teams_goals_df['Team'] == home_team, 'Average Home Goals Last 5'].values[0]
    away_recent_goals_scored = teams_goals_df.loc[teams_goals_df['Team'] == away_team, 'Average Away Goals Last 5'].values[0]
    home_recent_goals_allowed = teams_goals_df.loc[teams_goals_df['Team'] == home_team, 'Average Home Goals Allowed Last 5'].values[0]
    away_recent_goals_allowed = teams_goals_df.loc[teams_goals_df['Team'] == away_team, 'Average Away Goals Allowed Last 5'].values[0]
    
    # Compare recent form to season-long averages to get adjustment factors
    home_attack_factor = home_recent_goals_scored / home_team_stats['per_match_home_goals'] if home_team_stats['per_match_home_goals'] != 0 else 1
    away_defence_factor = away_recent_goals_allowed / away_team_stats['per_match_away_goals_allowed'] if away_team_stats['per_match_away_goals_allowed'] != 0 else 1
    away_attack_factor = away_recent_goals_scored / away_team_stats['per_match_away_goals'] if away_team_stats['per_match_away_goals'] != 0 else 1
    home_defence_factor = home_recent_goals_allowed / home_team_stats['per_match_home_goals_allowed'] if home_team_stats['per_match_home_goals_allowed'] != 0 else 1
    
    # Adjust metrics using the factors
    home_attack_adjusted = home_team_stats['per_match_home_goals'] * home_attack_factor / season_avg_home_goals
    away_defence_adjusted = away_team_stats['per_match_away_goals_allowed'] * away_defence_factor / season_avg_away_goals_allowed
    away_attack_adjusted = away_team_stats['per_match_away_goals'] * away_attack_factor / season_avg_away_goals
    home_defence_adjusted = home_team_stats['per_match_home_goals_allowed'] * home_defence_factor / season_avg_home_goals_allowed
    
    # Append the adjusted metrics for this game to the list
    games_metrics_list.append({
        'Game Matchup': row['game_matchup'],
        'Home Attack': home_attack_adjusted,
        'Away Defence': away_defence_adjusted,
        'Away Attack': away_attack_adjusted,
        'Home Defence': home_defence_adjusted
    })

# Create the DataFrame from the list
games_metrics_df = pd.DataFrame(games_metrics_list)

# Split 'Game Matchup' into 'Home Team' and 'Away Team'
games_metrics_df[['Home Team', 'Away Team']] = games_metrics_df['Game Matchup'].str.split(' vs. ', expand=True)

games_metrics_df


Unnamed: 0,Game Matchup,Home Attack,Away Defence,Away Attack,Home Defence,Home Team,Away Team
0,Bournemouth vs. Brentford,1.324198,0.993685,1.372205,0.825632,Bournemouth,Brentford
1,Everton vs. Sheffield United,0.772449,1.76655,0.823323,0.412816,Everton,Sheffield United
2,Fulham vs. Manchester City,0.882799,0.331228,1.646646,0.688027,Fulham,Manchester City
3,Newcastle United vs. Brighton,1.875947,0.993685,0.274441,0.688027,Newcastle United,Brighton
4,Nottingham Forest vs. Chelsea,0.662099,1.324913,0.960544,0.963237,Nottingham Forest,Chelsea
5,Tottenham vs. Burnley,1.213848,0.772866,1.234985,1.100843,Tottenham,Burnley
6,West Ham vs. Luton,0.662099,1.656141,0.823323,1.100843,West Ham,Luton
7,Wolverhampton Wanderers vs. Crystal Palace,0.551749,0.662456,0.548882,0.963237,Wolverhampton Wanderers,Crystal Palace
8,Manchester United vs. Arsenal,1.103498,0.220819,1.921087,0.963237,Manchester United,Arsenal
9,Aston Villa vs. Liverpool,1.103498,0.772866,1.097764,1.376053,Aston Villa,Liverpool


In [13]:
#GENERATE PROJECTED GOALS LIST

# Initialize an empty list for storing projected goals data
projected_goals_list = []

# Retrieve the league-wide average per match home and away xG
league_avg_per_match_home_goals = teams_per_match_averages.loc['Average', 'per_match_home_goals']
league_avg_per_match_away_goals = teams_per_match_averages.loc['Average', 'per_match_away_goals']

# Iterate over each row in the games_metrics_df to calculate projected goals
for index, row in games_metrics_df.iterrows():
    # Extract home and away team names from the game matchup
    home_team, away_team = row['Game Matchup'].split(' vs. ')

    # Calculate metrics using the teams' attack and defence strengths
    home_attack = row['Home Attack']
    away_defence = row['Away Defence']
    away_attack = row['Away Attack']
    home_defence = row['Home Defence']

    # Calculate projected home and away goals using the league-wide average per game xG
    projected_home_goals = home_attack * away_defence * league_avg_per_match_home_goals
    projected_away_goals = away_attack * home_defence * league_avg_per_match_away_goals

    # Calculate total projected goals
    total_projected_goals = projected_home_goals + projected_away_goals

    # Append the results to the list
    projected_goals_list.append({
        'Game Matchup': row['Game Matchup'],
        'Projected Home Goals': projected_home_goals,
        'Projected Away Goals': projected_away_goals,
        'Total Projected Goals': total_projected_goals
    })

# Convert the list to a DataFrame
projected_goals_df = pd.DataFrame(projected_goals_list)

# Display the DataFrame
projected_goals_df


Unnamed: 0,Game Matchup,Projected Home Goals,Projected Away Goals,Total Projected Goals
0,Bournemouth vs. Brentford,2.384838,1.651072,4.035909
1,Everton vs. Sheffield United,2.473165,0.495322,2.968486
2,Fulham vs. Manchester City,0.529964,1.651072,2.181036
3,Newcastle United vs. Brighton,3.37852,0.275179,3.653698
4,Nottingham Forest vs. Chelsea,1.589892,1.348375,2.938267
5,Tottenham vs. Burnley,1.700301,1.981286,3.681587
6,West Ham vs. Luton,1.987365,1.320858,3.308222
7,Wolverhampton Wanderers vs. Crystal Palace,0.662455,0.7705,1.432955
8,Manchester United vs. Arsenal,0.441637,2.696751,3.138387
9,Aston Villa vs. Liverpool,1.545728,2.201429,3.747157


In [14]:
#LOADING IN XG STATS NOW

import soccerdata as sd
understat = sd.Understat(leagues="ENG-Premier League", seasons="2023/2024")
team_match_stats = understat.read_team_match_stats()
import pandas as pd

# Assuming 'team_match_stats' is your DataFrame loaded from the code you've provided

# Calculating total xG for teams when they are at home
home_xg = team_match_stats.groupby('home_team')['home_xg'].sum().rename('total_home_xg')

# Calculating total xG for teams when they are away
away_xg = team_match_stats.groupby('away_team')['away_xg'].sum().rename('total_away_xg')

# Calculating total xGA (expected goals against) for teams when they are at home
home_xga = team_match_stats.groupby('home_team')['away_xg'].sum().rename('total_home_xga')

# Calculating total xGA for teams when they are away
away_xga = team_match_stats.groupby('away_team')['home_xg'].sum().rename('total_away_xga')

# Merging the above series into a single DataFrame
teams_xg_xga = pd.concat([home_xg, away_xg, home_xga, away_xga], axis=1)

# Filling NaN values with 0, in case some teams didn't play home or away games yet
teams_xg_xga = teams_xg_xga.fillna(0)

# Calculating overall total xG and xGA
teams_xg_xga['total_xg'] = teams_xg_xga['total_home_xg'] + teams_xg_xga['total_away_xg']
teams_xg_xga['total_xga'] = teams_xg_xga['total_home_xga'] + teams_xg_xga['total_away_xga']


import pandas as pd

# Identify all unique teams
teams = pd.concat([team_match_stats['home_team'], team_match_stats['away_team']]).unique()

# Prepare a list to collect each team's average xG and xGA for home and away games
xg_team_averages = []

# Iterate through each team
for team in teams:
    # Filter home games involving the current team
    team_home_games = team_match_stats[team_match_stats['home_team'] == team]
    # Sort home games by date in descending order and select the last 5
    team_last_5_home_games = team_home_games.sort_values(by='date', ascending=False).head(5)
    
    # Filter away games involving the current team
    team_away_games = team_match_stats[team_match_stats['away_team'] == team]
    # Sort away games by date in descending order and select the last 5
    team_last_5_away_games = team_away_games.sort_values(by='date', ascending=False).head(5)
    
    # Calculate average xG and xGA for home games
    team_home_xg_avg = team_last_5_home_games['home_xg'].mean()
    team_home_xga_avg = team_last_5_home_games['away_xg'].mean()
    
    # Calculate average xG and xGA for away games
    team_away_xg_avg = team_last_5_away_games['away_xg'].mean()
    team_away_xga_avg = team_last_5_away_games['home_xg'].mean()
    
    # Append the averages to the list
    xg_team_averages.append({
        'Team': team,
        'Average Home xG Last 5': team_home_xg_avg,
        'Average Home xGA Last 5': team_home_xga_avg,
        'Average Away xG Last 5': team_away_xg_avg,
        'Average Away xGA Last 5': team_away_xga_avg
    })

# Convert the list of averages into a DataFrame
teams_xg_xga_df = pd.DataFrame(xg_team_averages)

# Display the DataFrame

# Count the number of home matches played by each team
home_matches_played = team_match_stats.groupby('home_team')['home_ppda'].count().rename('home_matches_played')

# Count the number of away matches played by each team
away_matches_played = team_match_stats.groupby('away_team')['away_ppda'].count().rename('away_matches_played')

# Merging these counts into the teams_xg_xga DataFrame
teams_xg_xga = teams_xg_xga.merge(home_matches_played, how='left', left_index=True, right_index=True)
teams_xg_xga = teams_xg_xga.merge(away_matches_played, how='left', left_index=True, right_index=True)

# Filling NaN values with 0 in case some teams didn't play home or away games
teams_xg_xga['home_matches_played'] = teams_xg_xga['home_matches_played'].fillna(0)
teams_xg_xga['away_matches_played'] = teams_xg_xga['away_matches_played'].fillna(0)

# Calculating the total matches played by adding home and away matches
teams_xg_xga['matches_played'] = teams_xg_xga['home_matches_played'] + teams_xg_xga['away_matches_played']

teams_xg_xga



Unnamed: 0,total_home_xg,total_away_xg,total_home_xga,total_away_xga,total_xg,total_xga,home_matches_played,away_matches_played,matches_played
Arsenal,44.208918,35.240821,14.620855,15.944697,79.449739,30.565552,18,18,36
Aston Villa,37.207586,26.899611,26.789028,34.155214,64.107197,60.944242,18,18,36
Bournemouth,30.784509,29.896567,26.434013,35.548727,60.681076,61.98274,18,18,36
Brentford,33.265252,27.39764,28.078518,27.459331,60.662892,55.537849,18,18,36
Brighton,31.318533,25.380379,24.072274,31.86524,56.698912,55.937514,17,18,35
Burnley,23.25671,18.152825,31.085736,40.050569,41.409535,71.136305,18,18,36
Chelsea,47.35239,27.976734,23.846537,32.981976,75.329124,56.828513,18,17,35
Crystal Palace,29.789882,19.399162,21.384037,34.85185,49.189044,56.235887,18,18,36
Everton,34.108784,24.156378,22.80254,33.777329,58.265162,56.579869,18,18,36
Fulham,29.631866,21.972225,26.284413,35.400109,51.604091,61.684522,18,18,36


In [15]:
#SAME CLEANING OF DATA WE DID FOR GOAL STATS


# Calculate the averages for each specified column
xg_averages = teams_xg_xga.mean()

# Creating a new DataFrame with the averages to append it properly with a label
xg_averages_df = pd.DataFrame([xg_averages], index=['Average'])

# Append the averages row to your original DataFrame
teams_xg_xga_with_averages = pd.concat([teams_xg_xga, xg_averages_df])

teams_xg_xga_with_averages

Unnamed: 0,total_home_xg,total_away_xg,total_home_xga,total_away_xga,total_xg,total_xga,home_matches_played,away_matches_played,matches_played
Arsenal,44.208918,35.240821,14.620855,15.944697,79.449739,30.565552,18.0,18.0,36.0
Aston Villa,37.207586,26.899611,26.789028,34.155214,64.107197,60.944242,18.0,18.0,36.0
Bournemouth,30.784509,29.896567,26.434013,35.548727,60.681076,61.98274,18.0,18.0,36.0
Brentford,33.265252,27.39764,28.078518,27.459331,60.662892,55.537849,18.0,18.0,36.0
Brighton,31.318533,25.380379,24.072274,31.86524,56.698912,55.937514,17.0,18.0,35.0
Burnley,23.25671,18.152825,31.085736,40.050569,41.409535,71.136305,18.0,18.0,36.0
Chelsea,47.35239,27.976734,23.846537,32.981976,75.329124,56.828513,18.0,17.0,35.0
Crystal Palace,29.789882,19.399162,21.384037,34.85185,49.189044,56.235887,18.0,18.0,36.0
Everton,34.108784,24.156378,22.80254,33.777329,58.265162,56.579869,18.0,18.0,36.0
Fulham,29.631866,21.972225,26.284413,35.400109,51.604091,61.684522,18.0,18.0,36.0


In [16]:
#MORE REPETITIVE NONSENSE

import numpy as np

# Create a new DataFrame for per match averages
xg_teams_per_match_averages = pd.DataFrame(index=teams_xg_xga.index)

# Calculate per match averages for home and away xG and xGA
xg_teams_per_match_averages['per_match_home_xg'] = teams_xg_xga['total_home_xg'] / teams_xg_xga['home_matches_played']
xg_teams_per_match_averages['per_match_away_xg'] = teams_xg_xga['total_away_xg'] / teams_xg_xga['away_matches_played']
xg_teams_per_match_averages['per_match_home_xga'] = teams_xg_xga['total_home_xga'] / teams_xg_xga['home_matches_played']
xg_teams_per_match_averages['per_match_away_xga'] = teams_xg_xga['total_away_xga'] / teams_xg_xga['away_matches_played']

# Calculate overall total xG and xGA per match
xg_teams_per_match_averages['total_xg_per_match'] = teams_xg_xga['total_xg'] / teams_xg_xga['matches_played']
xg_teams_per_match_averages['total_xga_per_match'] = teams_xg_xga['total_xga'] / teams_xg_xga['matches_played']

# Replace any potential infinite values with NaN (in case of division by zero) and then fill with 0
xg_teams_per_match_averages.replace([np.inf, -np.inf], np.nan, inplace=True)
xg_teams_per_match_averages.fillna(0, inplace=True)

# Calculate the average for each column
xg_average_stats = xg_teams_per_match_averages.mean()

# Append the 'Average' row with these averages to the DataFrame
xg_teams_per_match_averages.loc['Average'] = xg_average_stats

xg_teams_per_match_averages


# Display the new DataFrame to verify
print(games_list_df)

                                  game_matchup
0                    Bournemouth vs. Brentford
1                 Everton vs. Sheffield United
2                   Fulham vs. Manchester City
3                Newcastle United vs. Brighton
4                Nottingham Forest vs. Chelsea
5                        Tottenham vs. Burnley
6                           West Ham vs. Luton
7   Wolverhampton Wanderers vs. Crystal Palace
8                Manchester United vs. Arsenal
9                    Aston Villa vs. Liverpool
10               Tottenham vs. Manchester City


In [17]:
#LITTLE BIT MORE REPETETITVE NONSENSE

import pandas as pd

# Assuming 'teams_per_match_averages' and 'games_list_df' are already defined

# Initialize an empty list for storing game metrics along with the matchup information
xg_games_metrics_list = []

# Retrieve the 'Average' row for league-wide average stats
xg_avg_stats = xg_teams_per_match_averages.loc['Average']

# Looping through each game to calculate metrics using 'teams_per_match_averages'
for index, row in games_list_df.iterrows():
    # Extract home and away team names
    home_team, away_team = row['game_matchup'].split(' vs. ')
    
    # Retrieve team stats from 'teams_per_match_averages'
    xg_home_team_stats = xg_teams_per_match_averages.loc[home_team]
    xg_away_team_stats = xg_teams_per_match_averages.loc[away_team]
    
    # Calculate the metrics using the per match averages
    xg_home_attack = xg_home_team_stats['per_match_home_xg'] / xg_avg_stats['per_match_home_xg']
    xg_away_defence = xg_away_team_stats['per_match_away_xga'] / xg_avg_stats['per_match_away_xga']
    xg_away_attack = xg_away_team_stats['per_match_away_xg'] / xg_avg_stats['per_match_away_xg']
    xg_home_defence = xg_home_team_stats['per_match_home_xga'] / xg_avg_stats['per_match_home_xga']
    
    # Append the calculated metrics for this game to the list along with the game_matchup
    xg_games_metrics_list.append({
        'Game Matchup': row['game_matchup'],
        'Home Attack': xg_home_attack, 
        'Away Defence': xg_away_defence,
        'Away Attack': xg_away_attack, 
        'Home Defence': xg_home_defence
    })

# Once all games are processed, create the DataFrame from the list
xg_games_metrics_df = pd.DataFrame(xg_games_metrics_list)
# Assuming 'games_metrics_df' already exists and contains the 'Game Matchup' column

# Splitting 'Game Matchup' into 'Home Team' and 'Away Team'
xg_games_metrics_df[['Home Team', 'Away Team']] = xg_games_metrics_df['Game Matchup'].str.split(' vs. ', expand=True)

# Merge home team averages
xg_games_metrics_df = xg_games_metrics_df.merge(
    teams_xg_xga_df[['Team', 'Average Home xG Last 5', 'Average Home xGA Last 5']],
    left_on='Home Team',
    right_on='Team',
    how='left'
).drop(columns=['Team'])  # Dropping the redundant 'Team' column after merge

# Merge away team averages
xg_games_metrics_df = xg_games_metrics_df.merge(
    teams_xg_xga_df[['Team', 'Average Away xG Last 5', 'Average Away xGA Last 5']],
    left_on='Away Team',
    right_on='Team',
    how='left',
    suffixes=('_home', '_away')
).drop(columns=['Team'])  # Dropping the redundant 'Team' column after merge


xg_games_metrics_df


Unnamed: 0,Game Matchup,Home Attack,Away Defence,Away Attack,Home Defence,Home Team,Away Team,Average Home xG Last 5,Average Home xGA Last 5,Average Away xG Last 5,Average Away xGA Last 5
0,Bournemouth vs. Brentford,0.89213,0.796715,1.052144,1.016351,Bournemouth,Brentford,2.082582,1.356384,1.628473,1.433314
1,Everton vs. Sheffield United,0.988466,1.177276,0.585776,0.876726,Everton,Sheffield United,1.471785,1.308707,1.300443,2.796576
2,Fulham vs. Manchester City,0.858726,0.640712,1.56257,1.010599,Fulham,Manchester City,1.383195,1.311212,1.654039,1.484134
3,Newcastle United vs. Brighton,1.431077,0.92455,0.974676,0.901341,Newcastle United,Brighton,3.175466,1.152474,1.012301,1.86887
4,Nottingham Forest vs. Chelsea,0.718535,1.013242,1.137582,0.769512,Nottingham Forest,Chelsea,1.527574,1.131109,1.534112,2.212763
5,Tottenham vs. Burnley,1.161915,1.162042,0.697118,1.121315,Tottenham,Burnley,2.189954,1.194422,1.530362,2.408109
6,West Ham vs. Luton,0.712741,1.526052,0.748969,1.184198,West Ham,Luton,1.383939,1.818785,1.04056,2.517132
7,Wolverhampton Wanderers vs. Crystal Palace,0.716843,1.011204,0.744981,1.176063,Wolverhampton Wanderers,Crystal Palace,1.043676,1.810426,0.907182,1.839198
8,Manchester United vs. Arsenal,0.997788,0.462625,1.353343,1.259634,Manchester United,Arsenal,2.45031,2.09911,2.0652,0.923815
9,Aston Villa vs. Liverpool,1.078269,0.752621,1.282248,1.030001,Aston Villa,Liverpool,1.383567,1.790578,2.189228,1.061992


In [18]:
#BUILDING HOME AND AWAY ATTACK "SCORES" FOR XG NOW USING BOTH SEASONAL AND LAST 5 AVERAGES IN WEIGHTING

import pandas as pd

# Assuming 'teams_per_match_averages', 'games_list_df', and 'teams_xg_xga_df' are already defined

# Retrieve season-long averages
season_avg_home_xg = xg_teams_per_match_averages['per_match_home_xg'].mean()
season_avg_away_xg = xg_teams_per_match_averages['per_match_away_xg'].mean()
season_avg_home_xga = xg_teams_per_match_averages['per_match_home_xga'].mean()
season_avg_away_xga = xg_teams_per_match_averages['per_match_away_xga'].mean()

# Initialize an empty list for storing game metrics along with the matchup information
xg_games_metrics_list = []

# Loop through each game to calculate metrics using 'teams_per_match_averages'
for index, row in games_list_df.iterrows():
    home_team, away_team = row['game_matchup'].split(' vs. ')
    
    # Retrieve team stats from 'teams_per_match_averages'
    xg_home_team_stats = xg_teams_per_match_averages.loc[home_team]
    xg_away_team_stats = xg_teams_per_match_averages.loc[away_team]
    
    # Retrieve recent form from 'teams_xg_xga_df'
    home_recent_xg = teams_xg_xga_df.loc[teams_xg_xga_df['Team'] == home_team, 'Average Home xG Last 5'].values[0]
    away_recent_xg = teams_xg_xga_df.loc[teams_xg_xga_df['Team'] == away_team, 'Average Away xG Last 5'].values[0]
    home_recent_xga = teams_xg_xga_df.loc[teams_xg_xga_df['Team'] == home_team, 'Average Home xGA Last 5'].values[0]
    away_recent_xga = teams_xg_xga_df.loc[teams_xg_xga_df['Team'] == away_team, 'Average Away xGA Last 5'].values[0]
    
    # Compare recent form to season-long averages to get adjustment factors
    xg_home_attack_factor = home_recent_xg / xg_home_team_stats['per_match_home_xg'] if xg_home_team_stats['per_match_home_xg'] != 0 else 1
    xg_away_defence_factor = away_recent_xga / xg_away_team_stats['per_match_away_xga'] if xg_away_team_stats['per_match_away_xga'] != 0 else 1
    xg_away_attack_factor = away_recent_xg / xg_away_team_stats['per_match_away_xg'] if xg_away_team_stats['per_match_away_xg'] != 0 else 1
    xg_home_defence_factor = home_recent_xga / xg_home_team_stats['per_match_home_xga'] if xg_home_team_stats['per_match_home_xga'] != 0 else 1
    
    # Adjust metrics using the factors
    xg_home_attack_adjusted = xg_home_team_stats['per_match_home_xg'] * xg_home_attack_factor / season_avg_home_xg
    xg_away_defence_adjusted = xg_away_team_stats['per_match_away_xga'] * xg_away_defence_factor / season_avg_away_xga
    xg_away_attack_adjusted = xg_away_team_stats['per_match_away_xg'] * xg_away_attack_factor / season_avg_away_xg
    xg_home_defence_adjusted = xg_home_team_stats['per_match_home_xga'] * xg_home_defence_factor / season_avg_home_xga
    
    # Append the adjusted metrics for this game to the list
    xg_games_metrics_list.append({
        'Game Matchup': row['game_matchup'],
        'XG Home Attack': xg_home_attack_adjusted,
        'XG Away Defence': xg_away_defence_adjusted,
        'XG Away Attack': xg_away_attack_adjusted,
        'XG Home Defence': xg_home_defence_adjusted
    })

# Create the DataFrame from the list
xg_games_metrics_df = pd.DataFrame(xg_games_metrics_list)

# Split 'Game Matchup' into 'Home Team' and 'Away Team'
xg_games_metrics_df[['Home Team', 'Away Team']] = xg_games_metrics_df['Game Matchup'].str.split(' vs. ', expand=True)

xg_games_metrics_df


Unnamed: 0,Game Matchup,XG Home Attack,XG Away Defence,XG Away Attack,XG Home Defence,Home Team,Away Team
0,Bournemouth vs. Brentford,1.086351,0.74856,1.12568,0.938719,Bournemouth,Brentford
1,Everton vs. Sheffield United,0.767737,1.460536,0.89893,0.905723,Everton,Sheffield United
2,Fulham vs. Manchester City,0.721525,0.775102,1.143352,0.907457,Fulham,Manchester City
3,Newcastle United vs. Brighton,1.65644,0.976033,0.699752,0.797598,Newcastle United,Brighton
4,Nottingham Forest vs. Chelsea,0.796839,1.155634,1.060453,0.782812,Nottingham Forest,Chelsea
5,Tottenham vs. Burnley,1.14236,1.257655,1.057861,0.826629,Tottenham,Burnley
6,West Ham vs. Luton,0.721913,1.314594,0.719286,1.258735,West Ham,Luton
7,Wolverhampton Wanderers vs. Crystal Palace,0.54442,0.960537,0.627088,1.25295,Wolverhampton Wanderers,Crystal Palace
8,Manchester United vs. Arsenal,1.278172,0.48247,1.427568,1.452741,Manchester United,Arsenal
9,Aston Villa vs. Liverpool,0.72172,0.554634,1.513302,1.239214,Aston Villa,Liverpool


In [19]:
#GENERATE XG PROJECTED LIST

# Initialize an empty list for storing projected goals data
projected_xg_list = []

# Retrieve the league-wide average per match home and away xG
league_avg_per_match_home_xg = xg_teams_per_match_averages.loc['Average', 'per_match_home_xg']
league_avg_per_match_away_xg = xg_teams_per_match_averages.loc['Average', 'per_match_away_xg']

# Iterate over each row in the games_metrics_df to calculate projected goals
for index, row in xg_games_metrics_df.iterrows():
    # Extract home and away team names from the game matchup
    home_team, away_team = row['Game Matchup'].split(' vs. ')

    # Calculate metrics using the teams' attack and defence strengths
    xg_home_attack_adjusted = row['XG Home Attack']
    xg_away_defence_adjusted = row['XG Away Defence']
    xg_away_attack_adjusted = row['XG Away Attack']
    xg_home_defence_adjusted = row['XG Home Defence']

    # Calculate projected home and away goals using the league-wide average per game xG
    projected_home_xg = xg_home_attack_adjusted * xg_away_defence_adjusted * league_avg_per_match_home_xg
    projected_away_xg = xg_away_attack_adjusted * xg_home_defence_adjusted * league_avg_per_match_away_xg

    # Calculate total projected goals
    total_projected_xg = projected_home_xg + projected_away_xg

    # Append the results to the list
    projected_xg_list.append({
        'Game Matchup': row['Game Matchup'],
        'Projected Home Goals': projected_home_xg,
        'Projected Away Goals': projected_away_xg,
        'Total Projected Goals': total_projected_xg
    })

# Convert the list to a DataFrame
projected_xg_df = pd.DataFrame(projected_xg_list)

# Display the DataFrame
projected_xg_df


Unnamed: 0,Game Matchup,Projected Home Goals,Projected Away Goals,Total Projected Goals
0,Bournemouth vs. Brentford,1.558938,1.528679,3.087617
1,Everton vs. Sheffield United,2.149595,1.177842,3.327437
2,Fulham vs. Manchester City,1.072116,1.500969,2.573085
3,Newcastle United vs. Brighton,3.09936,0.807409,3.906769
4,Nottingham Forest vs. Chelsea,1.765317,1.200921,2.966238
5,Tottenham vs. Burnley,2.754207,1.265042,4.019249
6,West Ham vs. Luton,1.819317,1.309789,3.129106
7,Wolverhampton Wanderers vs. Crystal Palace,1.002489,1.136653,2.139143
8,Manchester United vs. Arsenal,1.182201,3.000202,4.182403
9,Aston Villa vs. Liverpool,0.767374,2.712922,3.480296


In [20]:
import soccerdata as sd
understat = sd.Understat(leagues="ENG-Premier League", seasons="2023/2024")
team_match_stats = understat.read_team_match_stats()
team_match_stats.head()

import pandas as pd

# Calculate total home_expected_points for each home_team
home_points = team_match_stats.groupby('home_team')['home_expected_points'].sum().reset_index(name='total_home_expected_points')
# Calculate total away_expected_points for each away_team
away_points = team_match_stats.groupby('away_team')['away_expected_points'].sum().reset_index(name='total_away_expected_points')
# Rename columns for a consistent merge
home_points.rename(columns={'home_team': 'team'}, inplace=True)
away_points.rename(columns={'away_team': 'team'}, inplace=True)

# Merge the two DataFrames on team names
total_points = pd.merge(home_points, away_points, on='team', how='outer').fillna(0)

# Calculate total expected points by summing home and away expected points
total_points['total_expected_points'] = total_points['total_home_expected_points'] + total_points['total_away_expected_points']

# Calculate total home matches for each team
home_matches = team_match_stats.groupby('home_team').size().reset_index(name='home_matches')

# Calculate total away matches for each team
away_matches = team_match_stats.groupby('away_team').size().reset_index(name='away_matches')

# Rename columns for a consistent merge
home_matches.rename(columns={'home_team': 'team'}, inplace=True)
away_matches.rename(columns={'away_team': 'team'}, inplace=True)

# Merge matches data with the total_points DataFrame
total_points = total_points.merge(home_matches, on='team', how='outer').merge(away_matches, on='team', how='outer').fillna(0)
# Calculate total possible points (3 points per match played)
total_points['total_possible_points'] = (total_points['home_matches'] + total_points['away_matches']) * 3

# Calculate the efficiency: points earned vs. possible points
total_points['points_efficiency'] = (total_points['total_expected_points'] / total_points['total_possible_points']) * 100
# Assuming total_points already includes 'home_matches' and 'away_matches' columns

# Calculate total possible home points (3 points per home match)
total_points['total_possible_home_points'] = total_points['home_matches'] * 3

# Calculate total possible away points (3 points per away match)
total_points['total_possible_away_points'] = total_points['away_matches'] * 3

# Calculate home points efficiency: home expected points vs. possible home points
total_points['home_points_efficiency'] = (total_points['total_home_expected_points'] / total_points['total_possible_home_points']) * 100

# Calculate away points efficiency: away expected points vs. possible away points
total_points['away_points_efficiency'] = (total_points['total_away_expected_points'] / total_points['total_possible_away_points']) * 100



In [21]:
# Assuming your DataFrame is named total_points

# Specify the new order of columns
new_column_order = [
    'team',
    'total_home_expected_points', 'total_possible_home_points', 'home_points_efficiency', 'total_away_expected_points', 'total_possible_away_points', 'away_points_efficiency', 'total_expected_points', 
    'total_possible_points', 
    'points_efficiency', 'home_matches', 'away_matches',
]

# Reorder the DataFrame according to the new column order
total_points = total_points[new_column_order]

# Now total_points has the columns organized in the order you specified.
total_points

Unnamed: 0,team,total_home_expected_points,total_possible_home_points,home_points_efficiency,total_away_expected_points,total_possible_away_points,away_points_efficiency,total_expected_points,total_possible_points,points_efficiency,home_matches,away_matches
0,Arsenal,41.9024,54,77.597037,34.7815,54,64.410185,76.6839,108,71.003611,18,18
1,Aston Villa,31.3104,54,57.982222,21.8427,54,40.449444,53.1531,108,49.215833,18,18
2,Bournemouth,28.8913,54,53.502407,22.1425,54,41.00463,51.0338,108,47.253519,18,18
3,Brentford,27.0377,54,50.069815,23.5261,54,43.566852,50.5638,108,46.818333,18,18
4,Brighton,29.1695,51,57.195098,20.9464,54,38.78963,50.1159,105,47.729429,17,18
5,Burnley,20.61,54,38.166667,11.8496,54,21.943704,32.4596,108,30.055185,18,18
6,Chelsea,39.2224,54,72.634074,21.0281,51,41.231569,60.2505,105,57.381429,18,17
7,Crystal Palace,29.7083,54,55.01537,15.8684,54,29.385926,45.5767,108,42.200648,18,18
8,Everton,31.4844,54,58.304444,19.0423,54,35.263519,50.5267,108,46.783981,18,18
9,Fulham,27.6713,54,51.243148,16.6116,54,30.762222,44.2829,108,41.002685,18,18


In [22]:
import pandas as pd

# Assuming team_match_stats is loaded and contains columns for expected points and dates

# Identify all unique teams
teams = pd.concat([team_match_stats['home_team'], team_match_stats['away_team']]).unique()

# Prepare a list to collect each team's average expected points for home and away games
xpts_team_averages = []

# Iterate through each team
for team in teams:
    # Filter home games involving the current team
    team_home_games = team_match_stats[team_match_stats['home_team'] == team]
    # Sort home games by date in descending order and select the last 5
    team_last_5_home_games = team_home_games.sort_values(by='date', ascending=False).head(5)
    
    # Filter away games involving the current team
    team_away_games = team_match_stats[team_match_stats['away_team'] == team]
    # Sort away games by date in descending order and select the last 5
    team_last_5_away_games = team_away_games.sort_values(by='date', ascending=False).head(5)
    
    # Calculate average expected points earned for home games
    team_home_xpts_avg = team_last_5_home_games['home_expected_points'].mean()
    
    # Calculate average expected points earned for away games
    team_away_xpts_avg = team_last_5_away_games['away_expected_points'].mean()
    
    # Append the averages to the list
    xpts_team_averages.append({
        'Team': team,
        'Average Home xPts Last 5': team_home_xpts_avg,
        'Average Away xPts Last 5': team_away_xpts_avg
    })

# Convert the list of averages into a DataFrame
teams_xpts_df = pd.DataFrame(xpts_team_averages)


# Ensure the 'team' columns in both DataFrames are in the same case to match correctly
teams_xpts_df['team'] = teams_xpts_df['Team'].str.lower()
total_points['team'] = total_points['team'].str.lower()
merged_xpts_df = pd.merge(teams_xpts_df, total_points, on='team', how='left')
merged_xpts_df.drop(columns=['team'], inplace=True)

# Display the merged DataFrame
print(merged_xpts_df.head())


          Team  Average Home xPts Last 5  Average Away xPts Last 5  \
0      Burnley                   1.52106                   0.87438   
1      Arsenal                   2.33520                   1.91384   
2  Bournemouth                   1.94864                   1.11478   
3     Brighton                   1.38822                   0.91480   
4      Everton                   1.43920                   0.81898   

   total_home_expected_points  total_possible_home_points  \
0                       20.61                          54   
1                     41.9024                          54   
2                     28.8913                          54   
3                     29.1695                          51   
4                     31.4844                          54   

   home_points_efficiency  total_away_expected_points  \
0               38.166667                     11.8496   
1               77.597037                     34.7815   
2               53.502407                

In [23]:
# Ensure you have pandas imported
import pandas as pd

# Your name_replacements dictionary
name_replacements = {
    'Brighton and Hove Albion': 'Brighton',
    'West Ham United': 'West Ham',
    'Tottenham Hotspur': 'Tottenham'
    # Add any other specific replacements you need here
}

# Apply the replacements to the 'Team' column in merged_xpts_df
# Use .replace() with the dictionary and specify inplace=True to modify the DataFrame in place
merged_xpts_df['Team'].replace(name_replacements, inplace=True)

# If 'Team' is not the column name, replace 'Team' with the correct column name in your DataFrame


In [24]:
import pandas as pd

# Assuming you have already created the following DataFrames:
# 1. games_metrics_df: Contains the original game matchups with xG and goal stats.
# 2. teams_goals_df: Contains the average goals scored and allowed for the last 5 games for each team.
# 3. merged_xpts_df: Contains the average expected points for the last 5 home and away games for each team, merged with other relevant team stats.

# Let's also assume you have these variables defined:
# league_avg_per_match_home_goals, league_avg_per_match_away_goals

# Weighting factors for goals and xG (their sum should be 1)
weight_goals = 0.3
weight_xg = 0.7

# Assuming merged_xpts_df exists and contains the necessary xPts data.

# Initialize additional lists for storing individual projected goals data
xpts_projected_goals_list = []
xpts_projected_xg_list = []
xpts_combined_projected_goals_list = []

# Iterate over each row in the games_metrics_df to calculate individual and combined projected goals
for index, row in games_metrics_df.iterrows():
    game_matchup = row['Game Matchup']
    
    # Extract team names from the game matchup
    home_team, away_team = game_matchup.split(' vs. ')  # Adjust based on your actual game matchup format

    # Find the corresponding xG metrics for the same game matchup
    xg_row = xg_games_metrics_df[xg_games_metrics_df['Game Matchup'] == game_matchup].iloc[0]
    
    # Retrieve the xPts data for both teams
    home_xpts_row = merged_xpts_df[merged_xpts_df['Team'].str.lower() == home_team.lower()].iloc[0]
    away_xpts_row = merged_xpts_df[merged_xpts_df['Team'].str.lower() == away_team.lower()].iloc[0]
    
    # Adjust attack and defense metrics based on xPts data
    home_attack_modifier = 1 + home_xpts_row['Average Home xPts Last 5'] / 100
    away_defense_modifier = 1 + away_xpts_row['Average Away xPts Last 5'] / 100
    away_attack_modifier = 1 + away_xpts_row['Average Away xPts Last 5'] / 100
    home_defense_modifier = 1 + home_xpts_row['Average Home xPts Last 5'] / 100

    # Individual projections
    projected_goals_home = row['Home Attack'] * row['Away Defence'] * league_avg_per_match_home_goals * home_attack_modifier
    projected_goals_away = row['Away Attack'] * row['Home Defence'] * league_avg_per_match_away_goals * away_attack_modifier
    projected_xg_home = xg_row['XG Home Attack'] * xg_row['XG Away Defence'] * league_avg_per_match_home_goals * home_attack_modifier
    projected_xg_away = xg_row['XG Away Attack'] * xg_row['XG Home Defence'] * league_avg_per_match_away_goals * away_attack_modifier
    
    # Combine metrics using the weights for both goals and xG, then adjust based on xPts modifiers
    combined_home_attack = (row['Home Attack'] * weight_goals + xg_row['XG Home Attack'] * weight_xg) * home_attack_modifier
    combined_away_defence = (row['Away Defence'] * weight_goals + xg_row['XG Away Defence'] * weight_xg) * away_defense_modifier
    combined_away_attack = (row['Away Attack'] * weight_goals + xg_row['XG Away Attack'] * weight_xg) * away_attack_modifier
    combined_home_defence = (row['Home Defence'] * weight_goals + xg_row['XG Home Defence'] * weight_xg) * home_defense_modifier
    
    # Calculate combined projected home and away goals
    combined_projected_home_goals = combined_home_attack * combined_away_defence * league_avg_per_match_home_goals
    combined_projected_away_goals = combined_away_attack * combined_home_defence * league_avg_per_match_away_goals

    # Append the results to the lists
    xpts_projected_goals_list.append({
        'Game Matchup': game_matchup,
        'Projected Goals Home': projected_goals_home,
        'Projected Goals Away': projected_goals_away,
        'Total Projected Goals': projected_goals_home + projected_goals_away
    })
    xpts_projected_xg_list.append({
        'Game Matchup': game_matchup,
        'Projected XG Home': projected_xg_home,
        'Projected XG Away': projected_xg_away,
        'Total Projected XG': projected_xg_home + projected_xg_away
    })
    xpts_combined_projected_goals_list.append({
        'Game Matchup': game_matchup,
        'Combined Projected Home Goals': combined_projected_home_goals,
        'Combined Projected Away Goals': combined_projected_away_goals,
        'Combined Total Projected Goals': combined_projected_home_goals + combined_projected_away_goals
    })

# Convert the lists to DataFrames
xpts_projected_goals_df = pd.DataFrame(xpts_projected_goals_list)
xpts_projected_xg_df = pd.DataFrame(xpts_projected_xg_list)
xpts_combined_projected_goals_df = pd.DataFrame(xpts_combined_projected_goals_list)




In [25]:
#SIMULATE RESULTS 1000 TIMES TO GENERATE WIN PROBABILITY PERCENTAGES

import numpy as np

# Number of simulations to run for each game
n_simulations = 10000

# Initialize lists to store the win/draw probabilities
home_win_probs = []
away_win_probs = []
draw_probs = []

for index, row in xpts_combined_projected_goals_df.iterrows():
    home_goals_proj = row['Combined Projected Home Goals']
    away_goals_proj = row['Combined Projected Away Goals']

    # Simulate match outcomes
    home_goals_sim = np.random.poisson(home_goals_proj, n_simulations)
    away_goals_sim = np.random.poisson(away_goals_proj, n_simulations)

    # Calculate outcomes
    home_wins = np.sum(home_goals_sim > away_goals_sim)
    away_wins = np.sum(away_goals_sim > home_goals_sim)
    draws = np.sum(home_goals_sim == away_goals_sim)

    # Calculate probabilities
    home_win_prob = home_wins / n_simulations
    away_win_prob = away_wins / n_simulations
    draw_prob = draws / n_simulations

    # Append probabilities to lists
    home_win_probs.append(home_win_prob)
    away_win_probs.append(away_win_prob)
    draw_probs.append(draw_prob)

# Add the calculated probabilities to the projected_goals_df DataFrame
xpts_combined_projected_goals_df['Home Win Probability'] = home_win_probs
xpts_combined_projected_goals_df['Away Win Probability'] = away_win_probs
xpts_combined_projected_goals_df['Draw Probability'] = draw_probs

# Convert probabilities to percentage format and append '%' sign
xpts_combined_projected_goals_df['Home Win Probability'] = (xpts_combined_projected_goals_df['Home Win Probability'] * 100).apply(lambda x: f'{x:.2f}%')
xpts_combined_projected_goals_df['Away Win Probability'] = (xpts_combined_projected_goals_df['Away Win Probability'] * 100).apply(lambda x: f'{x:.2f}%')
xpts_combined_projected_goals_df['Draw Probability'] = (xpts_combined_projected_goals_df['Draw Probability'] * 100).apply(lambda x: f'{x:.2f}%')

# Display the updated DataFrame with formatted probabilities
xpts_combined_projected_goals_df



Unnamed: 0,Game Matchup,Combined Projected Home Goals,Combined Projected Away Goals,Combined Total Projected Goals,Home Win Probability,Away Win Probability,Draw Probability
0,Bournemouth vs. Brentford,1.782646,1.63473,3.417376,41.97%,35.41%,22.62%
1,Everton vs. Sheffield United,2.20727,0.987123,3.194393,65.25%,15.91%,18.84%
2,Fulham vs. Manchester City,0.922267,1.634542,2.556809,21.35%,54.32%,24.33%
3,Newcastle United vs. Brighton,3.168963,0.659662,3.828625,86.06%,4.77%,9.17%
4,Nottingham Forest vs. Chelsea,1.69815,1.290488,2.988638,47.31%,29.06%,23.63%
5,Tottenham vs. Burnley,2.413756,1.51409,3.927846,57.34%,23.51%,19.15%
6,West Ham vs. Luton,1.840311,1.348584,3.188895,48.54%,29.04%,22.42%
7,Wolverhampton Wanderers vs. Crystal Palace,0.87783,1.043362,1.921193,30.12%,38.11%,31.77%
8,Manchester United vs. Arsenal,0.931085,3.110917,4.042002,7.89%,80.71%,11.40%
9,Aston Villa vs. Liverpool,0.968103,2.668778,3.636881,11.42%,72.56%,16.02%


In [26]:
import requests
import pandas as pd

# API key and endpoint configuration
api_key = 'fbfe4cd6eb74292626a2d30469872e96'  # Replace 'your_api_key' with the actual API key
url = 'https://api.the-odds-api.com/v4/sports/soccer_epl/odds/'

# Define the date range
start_date = '2024-04-20'
end_date = '2024-04-24'

params = {
    'apiKey': api_key,
    'regions': 'us',
    'markets': 'h2h',
    'bookmakers': 'pinnacle',
    'oddsFormat': 'decimal',
    'from': start_date,
    'to': end_date
}

response = requests.get(url, params=params)
odds_data = response.json() if response.status_code == 200 else []

# Initialize an empty list to hold the structured data for the DataFrame
odds_data_list = []

# Iterate over the fetched odds data
for event in odds_data:
    home_team = event.get('home_team')
    away_team = event.get('away_team')
    start_time = event.get('commence_time')
    home_odds = away_odds = draw_odds = 'N/A'  # Initialize odds as 'N/A'

    # Check if there are bookmakers and markets available
    for bookmaker in event.get('bookmakers', []):
        if bookmaker['key'].lower() == 'pinnacle':
            markets = bookmaker.get('markets', [])
            if markets:
                outcomes = markets[0].get('outcomes', [])
                for outcome in outcomes:
                    if outcome['name'] == home_team:
                        home_odds = outcome.get('price', 'N/A')
                    elif outcome['name'] == away_team:
                        away_odds = outcome.get('price', 'N/A')
                    elif outcome['name'].lower() == 'draw':
                        draw_odds = outcome.get('price', 'N/A')
    
    # Append structured data to the list
    odds_data_list.append({
        'Game Matchup': f"{home_team} vs. {away_team}",
        'Home Odds': home_odds,
        'Away Odds': away_odds,
        'Draw Odds': draw_odds,
        'Start Time': start_time
    })

# Convert the list to a DataFrame
odds_df = pd.DataFrame(odds_data_list)

name_replacements = {
    'Brighton and Hove Albion': 'Brighton',
    'West Ham United': 'West Ham',
    'Tottenham Hotspur': 'Tottenham'
    # Add any other specific replacements you need here
}
import pandas as pd

# Assuming odds_df is already defined

# Apply replacements in the 'Game Matchup' column
for original_name, new_name in name_replacements.items():
    odds_df['Game Matchup'] = odds_df['Game Matchup'].str.replace(original_name, new_name, regex=False)

# Assuming 'projected_goals_df' exists and has a 'Game Matchup' column
xpts_merged_df = pd.merge(xpts_combined_projected_goals_df, odds_df, on='Game Matchup', how='left')

# Now 'merged_df' contains both the projected probabilities and the odds
xpts_merged_df



Unnamed: 0,Game Matchup,Combined Projected Home Goals,Combined Projected Away Goals,Combined Total Projected Goals,Home Win Probability,Away Win Probability,Draw Probability,Home Odds,Away Odds,Draw Odds,Start Time
0,Bournemouth vs. Brentford,1.782646,1.63473,3.417376,41.97%,35.41%,22.62%,2.0,3.49,4.01,2024-05-11T14:00:00Z
1,Everton vs. Sheffield United,2.20727,0.987123,3.194393,65.25%,15.91%,18.84%,1.42,7.0,5.18,2024-05-11T14:00:00Z
2,Fulham vs. Manchester City,0.922267,1.634542,2.556809,21.35%,54.32%,24.33%,12.84,1.22,7.02,2024-05-11T11:30:00Z
3,Newcastle United vs. Brighton,3.168963,0.659662,3.828625,86.06%,4.77%,9.17%,1.61,4.77,4.88,2024-05-11T14:00:00Z
4,Nottingham Forest vs. Chelsea,1.69815,1.290488,2.988638,47.31%,29.06%,23.63%,3.49,2.03,3.91,2024-05-11T16:30:00Z
5,Tottenham vs. Burnley,2.413756,1.51409,3.927846,57.34%,23.51%,19.15%,1.36,7.06,5.98,2024-05-11T14:00:00Z
6,West Ham vs. Luton,1.840311,1.348584,3.188895,48.54%,29.04%,22.42%,1.83,3.73,4.49,2024-05-11T14:00:00Z
7,Wolverhampton Wanderers vs. Crystal Palace,0.87783,1.043362,1.921193,30.12%,38.11%,31.77%,2.88,2.49,3.51,2024-05-11T14:00:00Z
8,Manchester United vs. Arsenal,0.931085,3.110917,4.042002,7.89%,80.71%,11.40%,6.9,1.4,5.55,2024-05-12T15:30:00Z
9,Aston Villa vs. Liverpool,0.968103,2.668778,3.636881,11.42%,72.56%,16.02%,4.01,1.73,4.8,2024-05-13T19:00:00Z


In [27]:
# Drop rows where 'Home Odds' column has NaN values
xpts_merged_df = xpts_merged_df.dropna(subset=['Home Odds'])


In [28]:
xpts_merged_df

Unnamed: 0,Game Matchup,Combined Projected Home Goals,Combined Projected Away Goals,Combined Total Projected Goals,Home Win Probability,Away Win Probability,Draw Probability,Home Odds,Away Odds,Draw Odds,Start Time
0,Bournemouth vs. Brentford,1.782646,1.63473,3.417376,41.97%,35.41%,22.62%,2.0,3.49,4.01,2024-05-11T14:00:00Z
1,Everton vs. Sheffield United,2.20727,0.987123,3.194393,65.25%,15.91%,18.84%,1.42,7.0,5.18,2024-05-11T14:00:00Z
2,Fulham vs. Manchester City,0.922267,1.634542,2.556809,21.35%,54.32%,24.33%,12.84,1.22,7.02,2024-05-11T11:30:00Z
3,Newcastle United vs. Brighton,3.168963,0.659662,3.828625,86.06%,4.77%,9.17%,1.61,4.77,4.88,2024-05-11T14:00:00Z
4,Nottingham Forest vs. Chelsea,1.69815,1.290488,2.988638,47.31%,29.06%,23.63%,3.49,2.03,3.91,2024-05-11T16:30:00Z
5,Tottenham vs. Burnley,2.413756,1.51409,3.927846,57.34%,23.51%,19.15%,1.36,7.06,5.98,2024-05-11T14:00:00Z
6,West Ham vs. Luton,1.840311,1.348584,3.188895,48.54%,29.04%,22.42%,1.83,3.73,4.49,2024-05-11T14:00:00Z
7,Wolverhampton Wanderers vs. Crystal Palace,0.87783,1.043362,1.921193,30.12%,38.11%,31.77%,2.88,2.49,3.51,2024-05-11T14:00:00Z
8,Manchester United vs. Arsenal,0.931085,3.110917,4.042002,7.89%,80.71%,11.40%,6.9,1.4,5.55,2024-05-12T15:30:00Z
9,Aston Villa vs. Liverpool,0.968103,2.668778,3.636881,11.42%,72.56%,16.02%,4.01,1.73,4.8,2024-05-13T19:00:00Z


In [29]:
#CREATING IMPLIED ODDS BASED OFF THE WIN PROBABILITY

# Convert percentage strings to decimal probabilities
xpts_merged_df['Home Win Probability'] = xpts_merged_df['Home Win Probability'].str.rstrip('%').astype(float) / 100
xpts_merged_df['Away Win Probability'] = xpts_merged_df['Away Win Probability'].str.rstrip('%').astype(float) / 100
xpts_merged_df['Draw Probability'] = xpts_merged_df['Draw Probability'].str.rstrip('%').astype(float) / 100
# Calculate implied odds
xpts_merged_df['Implied Home Odds'] = 1 / xpts_merged_df['Home Win Probability']
xpts_merged_df['Implied Away Odds'] = 1 / xpts_merged_df['Away Win Probability']
xpts_merged_df['Implied Draw Odds'] = 1 / xpts_merged_df['Draw Probability']
# Create a new DataFrame with implied odds
xpts_implied_odds_df = xpts_merged_df[['Game Matchup', 'Implied Home Odds', 'Implied Away Odds', 'Implied Draw Odds', 'Home Odds', 'Away Odds', 'Draw Odds']].copy()


In [30]:
#CALCULATING EDGES BY COMPARING IMPLIED ODDS VS REAL ODDS

# Calculate edge for home win
xpts_implied_odds_df['Home Win Edge'] = round((1 / xpts_implied_odds_df['Implied Home Odds']) * xpts_implied_odds_df['Home Odds'] - 1, 2)

# Calculate edge for away win
xpts_implied_odds_df['Away Win Edge'] = round((1 / xpts_implied_odds_df['Implied Away Odds']) * xpts_implied_odds_df['Away Odds'] - 1, 2)

# Calculate edge for draw
xpts_implied_odds_df['Draw Edge'] = round((1 / xpts_implied_odds_df['Implied Draw Odds']) * xpts_implied_odds_df['Draw Odds'] - 1, 2)

# Round implied odds to 2 decimal points
xpts_implied_odds_df['Implied Home Odds'] = round(xpts_implied_odds_df['Implied Home Odds'], 2)
xpts_implied_odds_df['Implied Away Odds'] = round(xpts_implied_odds_df['Implied Away Odds'], 2)
xpts_implied_odds_df['Implied Draw Odds'] = round(xpts_implied_odds_df['Implied Draw Odds'], 2)

xpts_implied_odds_df


Unnamed: 0,Game Matchup,Implied Home Odds,Implied Away Odds,Implied Draw Odds,Home Odds,Away Odds,Draw Odds,Home Win Edge,Away Win Edge,Draw Edge
0,Bournemouth vs. Brentford,2.38,2.82,4.42,2.0,3.49,4.01,-0.16,0.24,-0.09
1,Everton vs. Sheffield United,1.53,6.29,5.31,1.42,7.0,5.18,-0.07,0.11,-0.02
2,Fulham vs. Manchester City,4.68,1.84,4.11,12.84,1.22,7.02,1.74,-0.34,0.71
3,Newcastle United vs. Brighton,1.16,20.96,10.91,1.61,4.77,4.88,0.39,-0.77,-0.55
4,Nottingham Forest vs. Chelsea,2.11,3.44,4.23,3.49,2.03,3.91,0.65,-0.41,-0.08
5,Tottenham vs. Burnley,1.74,4.25,5.22,1.36,7.06,5.98,-0.22,0.66,0.15
6,West Ham vs. Luton,2.06,3.44,4.46,1.83,3.73,4.49,-0.11,0.08,0.01
7,Wolverhampton Wanderers vs. Crystal Palace,3.32,2.62,3.15,2.88,2.49,3.51,-0.13,-0.05,0.12
8,Manchester United vs. Arsenal,12.67,1.24,8.77,6.9,1.4,5.55,-0.46,0.13,-0.37
9,Aston Villa vs. Liverpool,8.76,1.38,6.24,4.01,1.73,4.8,-0.54,0.26,-0.23


In [31]:
#MERGING ODDS DATA BACK WITH ORIGINAL DATAFRAME FOR PROJECTIONS

# Merge the implied odds DataFrame with the projected goals DataFrame on the 'Game Matchup' column
xpts_implied_odds_with_goals = xpts_implied_odds_df.merge(xpts_combined_projected_goals_df, on='Game Matchup', how='left')

# Display the DataFrame with projected goals added
xpts_implied_odds_with_goals

Unnamed: 0,Game Matchup,Implied Home Odds,Implied Away Odds,Implied Draw Odds,Home Odds,Away Odds,Draw Odds,Home Win Edge,Away Win Edge,Draw Edge,Combined Projected Home Goals,Combined Projected Away Goals,Combined Total Projected Goals,Home Win Probability,Away Win Probability,Draw Probability
0,Bournemouth vs. Brentford,2.38,2.82,4.42,2.0,3.49,4.01,-0.16,0.24,-0.09,1.782646,1.63473,3.417376,41.97%,35.41%,22.62%
1,Everton vs. Sheffield United,1.53,6.29,5.31,1.42,7.0,5.18,-0.07,0.11,-0.02,2.20727,0.987123,3.194393,65.25%,15.91%,18.84%
2,Fulham vs. Manchester City,4.68,1.84,4.11,12.84,1.22,7.02,1.74,-0.34,0.71,0.922267,1.634542,2.556809,21.35%,54.32%,24.33%
3,Newcastle United vs. Brighton,1.16,20.96,10.91,1.61,4.77,4.88,0.39,-0.77,-0.55,3.168963,0.659662,3.828625,86.06%,4.77%,9.17%
4,Nottingham Forest vs. Chelsea,2.11,3.44,4.23,3.49,2.03,3.91,0.65,-0.41,-0.08,1.69815,1.290488,2.988638,47.31%,29.06%,23.63%
5,Tottenham vs. Burnley,1.74,4.25,5.22,1.36,7.06,5.98,-0.22,0.66,0.15,2.413756,1.51409,3.927846,57.34%,23.51%,19.15%
6,West Ham vs. Luton,2.06,3.44,4.46,1.83,3.73,4.49,-0.11,0.08,0.01,1.840311,1.348584,3.188895,48.54%,29.04%,22.42%
7,Wolverhampton Wanderers vs. Crystal Palace,3.32,2.62,3.15,2.88,2.49,3.51,-0.13,-0.05,0.12,0.87783,1.043362,1.921193,30.12%,38.11%,31.77%
8,Manchester United vs. Arsenal,12.67,1.24,8.77,6.9,1.4,5.55,-0.46,0.13,-0.37,0.931085,3.110917,4.042002,7.89%,80.71%,11.40%
9,Aston Villa vs. Liverpool,8.76,1.38,6.24,4.01,1.73,4.8,-0.54,0.26,-0.23,0.968103,2.668778,3.636881,11.42%,72.56%,16.02%


In [32]:
#MAKING THE ODDS AMERICAN FORMAT

def decimal_to_american(decimal_odds):
    """
    Convert decimal odds to American odds, adding a '+' sign for positive odds.
    
    Parameters:
    - decimal_odds: float representing the decimal odds.
    
    Returns:
    - str: The American odds as a string, with '+' for positive odds.
    """
    if decimal_odds >= 2.00:
        american_odds = int((decimal_odds - 1) * 100)
    else:
        american_odds = int(-100 / (decimal_odds - 1))
    
    # Add '+' for positive odds
    if american_odds > 0:
        return f'+{american_odds}'
    else:
        return str(american_odds)

# Example usage on a DataFrame column
xpts_implied_odds_with_goals['Implied Home Odds American'] = xpts_implied_odds_with_goals['Implied Home Odds'].apply(decimal_to_american)
xpts_implied_odds_with_goals['Implied Away Odds American'] = xpts_implied_odds_with_goals['Implied Away Odds'].apply(decimal_to_american)
xpts_implied_odds_with_goals['Implied Draw Odds American'] = xpts_implied_odds_with_goals['Implied Draw Odds'].apply(decimal_to_american)
xpts_implied_odds_with_goals['Draw Odds American'] = xpts_implied_odds_with_goals['Draw Odds'].apply(decimal_to_american)
xpts_implied_odds_with_goals['Home Odds American'] = xpts_implied_odds_with_goals['Home Odds'].apply(decimal_to_american)
xpts_implied_odds_with_goals['Away Odds American'] = xpts_implied_odds_with_goals['Away Odds'].apply(decimal_to_american)
# Optionally, display the DataFrame to verify the conversion
xpts_implied_odds_with_goals[['Implied Home Odds', 'Implied Home Odds American', 
        'Implied Away Odds', 'Implied Away Odds American', 
        'Implied Draw Odds', 'Implied Draw Odds American']]


Unnamed: 0,Implied Home Odds,Implied Home Odds American,Implied Away Odds,Implied Away Odds American,Implied Draw Odds,Implied Draw Odds American
0,2.38,138,2.82,181,4.42,342
1,1.53,-188,6.29,529,5.31,430
2,4.68,368,1.84,-119,4.11,311
3,1.16,-625,20.96,1996,10.91,991
4,2.11,110,3.44,244,4.23,323
5,1.74,-135,4.25,325,5.22,422
6,2.06,106,3.44,244,4.46,346
7,3.32,231,2.62,162,3.15,215
8,12.67,1167,1.24,-416,8.77,777
9,8.76,776,1.38,-263,6.24,524


In [33]:
#SETTING UP AND CLEANING NEW DF

# Create a new DataFrame by copying the original DataFrame
xpts_new_df = xpts_implied_odds_with_goals.copy()

# Replace decimal odds columns with their American odds counterparts
xpts_new_df['Implied Home Odds'] = xpts_new_df['Implied Home Odds American']
xpts_new_df['Implied Away Odds'] = xpts_new_df['Implied Away Odds American']
xpts_new_df['Implied Draw Odds'] = xpts_new_df['Implied Draw Odds American']
xpts_new_df['Draw Odds'] = xpts_new_df['Draw Odds American']
xpts_new_df['Home Odds'] = xpts_new_df['Home Odds American']
xpts_new_df['Away Odds'] = xpts_new_df['Away Odds American']

# Drop the American odds columns as they've been swapped in, if desired
xpts_new_df.drop(columns=['Implied Home Odds American', 'Implied Away Odds American', 
                     'Implied Draw Odds American', 'Draw Odds American', 
                     'Home Odds American', 'Away Odds American'], inplace=True)

# Optionally, rename columns back to original if preferred, for clarity
# This step is optional and may be desired for presentation or further processing
column_renames = {
    'Implied Home Odds': 'Implied Home Odds',
    'Implied Away Odds': 'Implied Away Odds',
    'Implied Draw Odds': 'Implied Draw Odds',
    'Home Odds': 'Home Odds',
    'Away Odds': 'Away Odds',
    'Draw Odds': 'Draw Odds',
}
xpts_new_df.rename(columns=column_renames, inplace=True)

# Define the new column order
new_column_order = [
    'Game Matchup',
    'Combined Projected Home Goals',
    'Combined Projected Away Goals',
    'Combined Total Projected Goals',
    'Home Win Probability',
    'Away Win Probability',
    'Draw Probability',
    'Implied Home Odds',
    'Implied Away Odds',
    'Implied Draw Odds',
    'Home Odds',
    'Away Odds',
    'Draw Odds',
    'Home Win Edge',
    'Away Win Edge',
    'Draw Edge'
]

# Reorder the DataFrame according to the new column order
xpts_new_df = xpts_new_df[new_column_order]

# Round the "Combined Total Projected Goals" column to two decimal places
xpts_new_df['Combined Total Projected Goals'] = xpts_new_df['Combined Total Projected Goals'].round(2)
xpts_new_df['Combined Projected Home Goals'] = xpts_new_df['Combined Projected Home Goals'].round(2)
xpts_new_df['Combined Projected Away Goals'] = xpts_new_df['Combined Projected Away Goals'].round(2)
xpts_new_df['League'] = 'Premier League'

# Display the DataFrame to verify the changes
print(xpts_new_df)


# Now new_df contains the American odds in place of the decimal odds,
# with all other columns unchanged.


                                  Game Matchup  Combined Projected Home Goals  \
0                    Bournemouth vs. Brentford                           1.78   
1                 Everton vs. Sheffield United                           2.21   
2                   Fulham vs. Manchester City                           0.92   
3                Newcastle United vs. Brighton                           3.17   
4                Nottingham Forest vs. Chelsea                           1.70   
5                        Tottenham vs. Burnley                           2.41   
6                           West Ham vs. Luton                           1.84   
7   Wolverhampton Wanderers vs. Crystal Palace                           0.88   
8                Manchester United vs. Arsenal                           0.93   
9                    Aston Villa vs. Liverpool                           0.97   
10               Tottenham vs. Manchester City                           1.40   

    Combined Projected Away

In [34]:
# Save the DataFrame to a CSV file
xpts_new_df.to_csv('xpts_epl_odds_5-11.csv', index=False)


In [32]:
xpts_projected_goals_df

Unnamed: 0,Game Matchup,Projected Goals Home,Projected Goals Away,Total Projected Goals
0,Luton vs. Everton,1.889723,0.986984,2.876706
1,Arsenal vs. Bournemouth,2.075651,0.7735,2.849151
2,Brentford vs. Fulham,1.220452,1.742686,2.963138
3,Burnley vs. Newcastle United,0.995888,1.922125,2.918013
4,Manchester City vs. Wolverhampton Wanderers,2.677311,0.413562,3.090873
5,Sheffield United vs. Nottingham Forest,1.487721,2.216288,3.704009
6,Brighton vs. Aston Villa,1.084258,2.228001,3.312259
7,Chelsea vs. West Ham,5.048243,1.916153,6.964395
8,Liverpool vs. Tottenham,2.283139,0.689057,2.972196
9,Crystal Palace vs. Manchester United,3.259515,1.731362,4.990877


In [33]:
xpts_projected_xg_df

Unnamed: 0,Game Matchup,Projected XG Home,Projected XG Away,Total Projected XG
0,Luton vs. Everton,1.602664,2.005883,3.608547
1,Arsenal vs. Bournemouth,1.829232,0.914642,2.743875
2,Brentford vs. Fulham,1.508482,2.304145,3.812628
3,Burnley vs. Newcastle United,1.172843,0.576859,1.749701
4,Manchester City vs. Wolverhampton Wanderers,2.77673,0.571491,3.348221
5,Sheffield United vs. Nottingham Forest,0.88386,2.259134,3.142994
6,Brighton vs. Aston Villa,1.12995,1.368044,2.497994
7,Chelsea vs. West Ham,4.723247,1.162415,5.885662
8,Liverpool vs. Tottenham,3.169777,1.233736,4.403513
9,Crystal Palace vs. Manchester United,3.322228,1.168304,4.490533


In [34]:
xpts_combined_projected_goals_df

Unnamed: 0,Game Matchup,Combined Projected Home Goals,Combined Projected Away Goals,Combined Total Projected Goals,Home Win Probability,Away Win Probability,Draw Probability
0,Luton vs. Everton,1.696018,1.712949,3.408967,39.04%,38.41%,22.55%
1,Arsenal vs. Bournemouth,1.941869,0.894344,2.836213,61.99%,16.65%,21.36%
2,Brentford vs. Fulham,1.454415,2.157711,3.612126,26.25%,53.41%,20.34%
3,Burnley vs. Newcastle United,1.1859,0.923265,2.109165,42.67%,28.47%,28.86%
4,Manchester City vs. Wolverhampton Wanderers,2.780039,0.536243,3.316282,84.34%,4.77%,10.89%
5,Sheffield United vs. Nottingham Forest,1.081158,2.516482,3.597639,14.86%,67.75%,17.39%
6,Brighton vs. Aston Villa,1.13937,1.626205,2.765574,26.86%,49.07%,24.07%
7,Chelsea vs. West Ham,4.855961,1.418559,6.27452,88.74%,5.40%,5.86%
8,Liverpool vs. Tottenham,2.936181,1.084586,4.020767,74.66%,10.87%,14.47%
9,Crystal Palace vs. Manchester United,3.340045,1.356808,4.696853,75.57%,11.53%,12.90%


In [37]:
#GENERATING PROJECTED GOALS LIST NOW USING BOTH SETS OF STATS, 70% WEIGHT ON XG STATS AND 30% ON GOALS

# Weighting factors
weight_goals = 0.3
weight_xg = 0.7

# Extension to include separate projections for goals and xG

# Initialize additional lists for storing individual projected goals data
combined_projected_goals_list = []
projected_goals_list = []
projected_xg_list = []

# Iterate over each row in the games_metrics_df to calculate individual and combined projected goals
for index, row in games_metrics_df.iterrows():
    game_matchup = row['Game Matchup']
    
    # Find the corresponding xG metrics for the same game matchup
    xg_row = xg_games_metrics_df[xg_games_metrics_df['Game Matchup'] == game_matchup].iloc[0]
    
    # Individual projections
    projected_goals_home = row['Home Attack'] * row['Away Defence'] * league_avg_per_match_home_goals
    projected_goals_away = row['Away Attack'] * row['Home Defence'] * league_avg_per_match_away_goals
    projected_xg_home = xg_row['XG Home Attack'] * xg_row['XG Away Defence'] * league_avg_per_match_home_goals
    projected_xg_away = xg_row['XG Away Attack'] * xg_row['XG Home Defence'] * league_avg_per_match_away_goals
    
    # Combine metrics using the weights for both goals and xG
    combined_home_attack = (row['Home Attack'] * weight_goals) + (xg_row['XG Home Attack'] * weight_xg)
    combined_away_defence = (row['Away Defence'] * weight_goals) + (xg_row['XG Away Defence'] * weight_xg)
    combined_away_attack = (row['Away Attack'] * weight_goals) + (xg_row['XG Away Attack'] * weight_xg)
    combined_home_defence = (row['Home Defence'] * weight_goals) + (xg_row['XG Home Defence'] * weight_xg)
    
    # Calculate combined projected home and away goals
    combined_projected_home_goals = combined_home_attack * combined_away_defence * league_avg_per_match_home_goals
    combined_projected_away_goals = combined_away_attack * combined_home_defence * league_avg_per_match_away_goals

    # Calculate total projected goals for individual and combined projections
    total_projected_goals_combined = combined_projected_home_goals + combined_projected_away_goals
    total_projected_goals = projected_goals_home + projected_goals_away
    total_projected_xg = projected_xg_home + projected_xg_away

    # Append the results to the lists
    projected_goals_list.append({'Game Matchup': game_matchup, 'Projected Goals Home': projected_goals_home, 'Projected Goals Away': projected_goals_away, 'Total Projected Goals': total_projected_goals})
    projected_xg_list.append({'Game Matchup': game_matchup, 'Projected XG Home': projected_xg_home, 'Projected XG Away': projected_xg_away, 'Total Projected XG': total_projected_xg})
    combined_projected_goals_list.append({'Game Matchup': game_matchup, 'Combined Projected Home Goals': combined_projected_home_goals, 'Combined Projected Away Goals': combined_projected_away_goals, 'Combined Total Projected Goals': total_projected_goals_combined})

# Convert the lists to DataFrames
projected_goals_df = pd.DataFrame(projected_goals_list)
projected_xg_df = pd.DataFrame(projected_xg_list)
combined_projected_goals_df = pd.DataFrame(combined_projected_goals_list)



In [38]:
#SIMULATE RESULTS 1000 TIMES TO GENERATE WIN PROBABILITY PERCENTAGES

import numpy as np

# Number of simulations to run for each game
n_simulations = 10000

# Initialize lists to store the win/draw probabilities
home_win_probs = []
away_win_probs = []
draw_probs = []

for index, row in combined_projected_goals_df.iterrows():
    home_goals_proj = row['Combined Projected Home Goals']
    away_goals_proj = row['Combined Projected Away Goals']

    # Simulate match outcomes
    home_goals_sim = np.random.poisson(home_goals_proj, n_simulations)
    away_goals_sim = np.random.poisson(away_goals_proj, n_simulations)

    # Calculate outcomes
    home_wins = np.sum(home_goals_sim > away_goals_sim)
    away_wins = np.sum(away_goals_sim > home_goals_sim)
    draws = np.sum(home_goals_sim == away_goals_sim)

    # Calculate probabilities
    home_win_prob = home_wins / n_simulations
    away_win_prob = away_wins / n_simulations
    draw_prob = draws / n_simulations

    # Append probabilities to lists
    home_win_probs.append(home_win_prob)
    away_win_probs.append(away_win_prob)
    draw_probs.append(draw_prob)

# Add the calculated probabilities to the projected_goals_df DataFrame
combined_projected_goals_df['Home Win Probability'] = home_win_probs
combined_projected_goals_df['Away Win Probability'] = away_win_probs
combined_projected_goals_df['Draw Probability'] = draw_probs

# Convert probabilities to percentage format and append '%' sign
combined_projected_goals_df['Home Win Probability'] = (combined_projected_goals_df['Home Win Probability'] * 100).apply(lambda x: f'{x:.2f}%')
combined_projected_goals_df['Away Win Probability'] = (combined_projected_goals_df['Away Win Probability'] * 100).apply(lambda x: f'{x:.2f}%')
combined_projected_goals_df['Draw Probability'] = (combined_projected_goals_df['Draw Probability'] * 100).apply(lambda x: f'{x:.2f}%')

# Display the updated DataFrame with formatted probabilities
combined_projected_goals_df



Unnamed: 0,Game Matchup,Combined Projected Home Goals,Combined Projected Away Goals,Combined Total Projected Goals,Home Win Probability,Away Win Probability,Draw Probability
0,Bournemouth vs. Manchester United,2.552363,1.841363,4.393726,53.45%,27.74%,18.81%
1,Brentford vs. Sheffield United,1.607848,1.802242,3.410089,34.81%,42.38%,22.81%
2,Burnley vs. Brighton,0.960513,1.744156,2.704668,21.35%,55.85%,22.80%
3,Manchester City vs. Luton,3.23151,0.610372,3.841882,87.05%,3.77%,9.18%
4,Newcastle United vs. Tottenham,2.65293,1.99889,4.65182,51.38%,30.86%,17.76%
5,Nottingham Forest vs. Wolverhampton Wanderers,1.478228,0.986145,2.464372,48.93%,25.03%,26.04%
6,Arsenal vs. Aston Villa,2.635808,0.63978,3.275588,79.48%,7.21%,13.31%
7,Liverpool vs. Crystal Palace,2.76939,0.374017,3.143406,86.89%,2.97%,10.14%
8,West Ham vs. Fulham,1.342045,2.132862,3.474907,23.50%,55.72%,20.78%
9,Chelsea vs. Everton,2.642745,1.039042,3.681787,71.82%,12.56%,15.62%


In [39]:
projected_goals_df

Unnamed: 0,Game Matchup,Projected Goals Home,Projected Goals Away,Total Projected Goals
0,Bournemouth vs. Manchester United,2.038297,1.716896,3.755193
1,Brentford vs. Sheffield United,1.132387,2.180185,3.312573
2,Burnley vs. Brighton,0.792671,2.098428,2.8911
3,Manchester City vs. Luton,2.649786,0.490542,3.140328
4,Newcastle United vs. Tottenham,2.536548,2.452709,4.989256
5,Nottingham Forest vs. Wolverhampton Wanderers,1.630638,1.144597,2.775235
6,Arsenal vs. Aston Villa,2.898911,0.981083,3.879995
7,Liverpool vs. Crystal Palace,2.944207,0.545046,3.489253
8,West Ham vs. Fulham,1.993002,2.94325,4.936252
9,Chelsea vs. Everton,2.174184,0.899326,3.07351


In [40]:
projected_xg_df

Unnamed: 0,Game Matchup,Projected XG Home,Projected XG Away,Total Projected XG
0,Bournemouth vs. Manchester United,2.785685,1.894337,4.680022
1,Brentford vs. Sheffield United,1.809814,1.643821,3.453635
2,Burnley vs. Brighton,1.034418,1.567334,2.601752
3,Manchester City vs. Luton,3.471223,0.658136,4.129359
4,Newcastle United vs. Tottenham,2.700753,1.816562,4.517315
5,Nottingham Forest vs. Wolverhampton Wanderers,1.412235,0.910153,2.322388
6,Arsenal vs. Aston Villa,2.457262,0.515612,2.972874
7,Liverpool vs. Crystal Palace,2.685916,0.304742,2.990657
8,West Ham vs. Fulham,1.099293,1.821125,2.920419
9,Chelsea vs. Everton,2.8563,1.04182,3.89812


In [41]:
combined_projected_goals_df

Unnamed: 0,Game Matchup,Combined Projected Home Goals,Combined Projected Away Goals,Combined Total Projected Goals,Home Win Probability,Away Win Probability,Draw Probability
0,Bournemouth vs. Manchester United,2.552363,1.841363,4.393726,53.45%,27.74%,18.81%
1,Brentford vs. Sheffield United,1.607848,1.802242,3.410089,34.81%,42.38%,22.81%
2,Burnley vs. Brighton,0.960513,1.744156,2.704668,21.35%,55.85%,22.80%
3,Manchester City vs. Luton,3.23151,0.610372,3.841882,87.05%,3.77%,9.18%
4,Newcastle United vs. Tottenham,2.65293,1.99889,4.65182,51.38%,30.86%,17.76%
5,Nottingham Forest vs. Wolverhampton Wanderers,1.478228,0.986145,2.464372,48.93%,25.03%,26.04%
6,Arsenal vs. Aston Villa,2.635808,0.63978,3.275588,79.48%,7.21%,13.31%
7,Liverpool vs. Crystal Palace,2.76939,0.374017,3.143406,86.89%,2.97%,10.14%
8,West Ham vs. Fulham,1.342045,2.132862,3.474907,23.50%,55.72%,20.78%
9,Chelsea vs. Everton,2.642745,1.039042,3.681787,71.82%,12.56%,15.62%


In [42]:
#SIMULATE RESULTS 1000 TIMES TO GENERATE WIN PROBABILITY PERCENTAGES

import numpy as np

# Number of simulations to run for each game
n_simulations = 10000

# Initialize lists to store the win/draw probabilities
home_win_probs = []
away_win_probs = []
draw_probs = []

for index, row in combined_projected_goals_df.iterrows():
    home_goals_proj = row['Combined Projected Home Goals']
    away_goals_proj = row['Combined Projected Away Goals']

    # Simulate match outcomes
    home_goals_sim = np.random.poisson(home_goals_proj, n_simulations)
    away_goals_sim = np.random.poisson(away_goals_proj, n_simulations)

    # Calculate outcomes
    home_wins = np.sum(home_goals_sim > away_goals_sim)
    away_wins = np.sum(away_goals_sim > home_goals_sim)
    draws = np.sum(home_goals_sim == away_goals_sim)

    # Calculate probabilities
    home_win_prob = home_wins / n_simulations
    away_win_prob = away_wins / n_simulations
    draw_prob = draws / n_simulations

    # Append probabilities to lists
    home_win_probs.append(home_win_prob)
    away_win_probs.append(away_win_prob)
    draw_probs.append(draw_prob)

# Add the calculated probabilities to the projected_goals_df DataFrame
combined_projected_goals_df['Home Win Probability'] = home_win_probs
combined_projected_goals_df['Away Win Probability'] = away_win_probs
combined_projected_goals_df['Draw Probability'] = draw_probs

# Convert probabilities to percentage format and append '%' sign
combined_projected_goals_df['Home Win Probability'] = (combined_projected_goals_df['Home Win Probability'] * 100).apply(lambda x: f'{x:.2f}%')
combined_projected_goals_df['Away Win Probability'] = (combined_projected_goals_df['Away Win Probability'] * 100).apply(lambda x: f'{x:.2f}%')
combined_projected_goals_df['Draw Probability'] = (combined_projected_goals_df['Draw Probability'] * 100).apply(lambda x: f'{x:.2f}%')

# Display the updated DataFrame with formatted probabilities
combined_projected_goals_df



Unnamed: 0,Game Matchup,Combined Projected Home Goals,Combined Projected Away Goals,Combined Total Projected Goals,Home Win Probability,Away Win Probability,Draw Probability
0,Bournemouth vs. Manchester United,2.552363,1.841363,4.393726,53.91%,27.55%,18.54%
1,Brentford vs. Sheffield United,1.607848,1.802242,3.410089,34.65%,42.83%,22.52%
2,Burnley vs. Brighton,0.960513,1.744156,2.704668,20.19%,55.64%,24.17%
3,Manchester City vs. Luton,3.23151,0.610372,3.841882,87.27%,3.79%,8.94%
4,Newcastle United vs. Tottenham,2.65293,1.99889,4.65182,52.25%,29.72%,18.03%
5,Nottingham Forest vs. Wolverhampton Wanderers,1.478228,0.986145,2.464372,48.60%,24.54%,26.86%
6,Arsenal vs. Aston Villa,2.635808,0.63978,3.275588,80.29%,6.48%,13.23%
7,Liverpool vs. Crystal Palace,2.76939,0.374017,3.143406,86.44%,3.10%,10.46%
8,West Ham vs. Fulham,1.342045,2.132862,3.474907,23.40%,55.66%,20.94%
9,Chelsea vs. Everton,2.642745,1.039042,3.681787,71.53%,12.66%,15.81%


In [43]:
#LOADING IN ODDS DATA FOR LIST OF GAMES IN GAMES LIST DF

import requests
# An api key is emailed to you when you sign up to a plan
# Get a free API key at https://api.the-odds-api.com/
API_KEY = '6ea2753ab77393e63fe86bb67324fc40'

SPORT = 'soccer' # use the sport_key from the /sports endpoint below, or use 'upcoming' to see the next 8 games across all sports

REGIONS = 'us' # uk | us | eu | au. Multiple can be specified if comma delimited

MARKETS = 'h2h,spreads' # h2h | spreads | totals. Multiple can be specified if comma delimited

ODDS_FORMAT = 'decimal' # decimal | american

DATE_FORMAT = 'iso' # iso | unix

import requests

# Your API key for the odds API
api_key = '6ea2753ab77393e63fe86bb67324fc40'

# Define the endpoint URL
url = 'https://api.the-odds-api.com/v4/sports/soccer_epl/odds/'

# Parameters for the API request
params = {
    'apiKey': api_key,
    'regions': 'us',  # Adjust if targeting a different region
    'markets': 'h2h',  # Head-to-head odds; adjust if looking for different market types
    'bookmakers': 'bovada',
    'oddsFormat': 'decimal'
}

# Make the GET request
response = requests.get(url, params=params)

# Check if the request was successful
if response.status_code == 200:
    # Convert the response to JSON
    odds_data = response.json()
    # Process your data here
    print(odds_data)
else:
    print(f"Error fetching data: {response.status_code}")

# Initialize an empty list to hold the simplified odds data
simplified_odds_list = []

# Iterate over the odds data
for event in odds_data:
    # Extract the essential information
    event_name = event.get('sport_title', 'No sport title')
    home_team = event.get('home_team', 'No home team')
    away_team = event.get('away_team', 'No away team')
    start_time = event.get('commence_time', 'No start time')
    
    # Initialize default odds
    home_team_odds = 'N/A'
    away_team_odds = 'N/A'
    draw_odds = 'N/A'  # Initialize draw odds

    # If there are bookmakers data available
    if event.get('bookmakers'):
        # Look for the Bovada bookmaker in the list
        for bookmaker in event['bookmakers']:
            if bookmaker['key'].lower() == 'bovada':  # Ensure case-insensitive matching
                # Assuming 'markets' is a list and we're interested in the first item (h2h)
                markets = bookmaker.get('markets', [])
                if markets:
                    # Assuming the structure of 'outcomes' in the first market
                    outcomes = markets[0].get('outcomes', [])
                    for outcome in outcomes:
                        # Assign odds based on the team or if it's a draw
                        if outcome['name'] == home_team:
                            home_team_odds = outcome.get('price', 'N/A')
                        elif outcome['name'] == away_team:
                            away_team_odds = outcome.get('price', 'N/A')
                        elif outcome['name'].lower() == 'draw':  # Checking for a draw
                            draw_odds = outcome.get('price', 'N/A')
                
                # Once we've found Bovada and extracted the odds, no need to check other bookmakers
                break

    # Create a simplified representation of the event including draw odds
    simplified_event = f"{event_name}: {home_team} vs. {away_team}, Date: {start_time}, Home Odds: {home_team_odds}, Away Odds: {away_team_odds}, Draw Odds: {draw_odds}"
    
    # Add to the list
    simplified_odds_list.append(simplified_event)

# Now, 'simplified_odds_list' contains a more readable version of the odds data, including draw odds
for item in simplified_odds_list:
    print(item)

odds_data_list = []

for event in odds_data:
    home_team = event.get('home_team')
    away_team = event.get('away_team')
    start_time = event.get('commence_time')  # Assuming datetime format or additional parsing might be needed
    
    # Initialize odds
    home_odds = 'N/A'
    away_odds = 'N/A'
    draw_odds = 'N/A'  # Initialize draw odds variable

    # Iterate through bookmakers to find Bovada and extract the odds
    for bookmaker in event.get('bookmakers', []):
        if bookmaker['key'].lower() == 'bovada':  # Ensure case-insensitive matching
            markets = bookmaker.get('markets', [])
            if markets:
                outcomes = markets[0].get('outcomes', [])
                for outcome in outcomes:
                    # Check and assign odds based on the team or if it's a draw
                    if outcome['name'] == home_team:
                        home_odds = outcome.get('price', 'N/A')
                    elif outcome['name'] == away_team:
                        away_odds = outcome.get('price', 'N/A')
                    elif outcome['name'].lower() == 'draw':  # Check for draw odds
                        draw_odds = outcome.get('price', 'N/A')

    # Append the data including draw odds to the list
    odds_data_list.append({
        'Game Matchup': f"{home_team} vs. {away_team}",
        'Home Odds': home_odds,
        'Away Odds': away_odds,
        'Draw Odds': draw_odds,  # Include draw odds
        'Start Time': start_time
    })

# Convert the list to a DataFrame
odds_df = pd.DataFrame(odds_data_list)
name_replacements = {
    'Brighton and Hove Albion': 'Brighton',
    'West Ham United': 'West Ham',
    'Tottenham Hotspur': 'Tottenham'
    # Add any other specific replacements you need here
}
import pandas as pd

# Assuming odds_df is already defined

# Apply replacements in the 'Game Matchup' column
for original_name, new_name in name_replacements.items():
    odds_df['Game Matchup'] = odds_df['Game Matchup'].str.replace(original_name, new_name, regex=False)

# Assuming 'projected_goals_df' exists and has a 'Game Matchup' column
merged_df = pd.merge(combined_projected_goals_df, odds_df, on='Game Matchup', how='left')

# Now 'merged_df' contains both the projected probabilities and the odds
merged_df




[{'id': '6a75389b427acafacbd3404e0ca1b64e', 'sport_key': 'soccer_epl', 'sport_title': 'EPL', 'commence_time': '2024-04-13T11:30:00Z', 'home_team': 'Newcastle United', 'away_team': 'Tottenham Hotspur', 'bookmakers': [{'key': 'bovada', 'title': 'Bovada', 'last_update': '2024-04-11T05:17:27Z', 'markets': [{'key': 'h2h', 'last_update': '2024-04-11T05:17:27Z', 'outcomes': [{'name': 'Newcastle United', 'price': 2.75}, {'name': 'Tottenham Hotspur', 'price': 2.35}, {'name': 'Draw', 'price': 3.85}]}]}]}, {'id': '3d1fed4cc501d8791a1d7aee45ea37fd', 'sport_key': 'soccer_epl', 'sport_title': 'EPL', 'commence_time': '2024-04-13T14:00:00Z', 'home_team': 'Brentford', 'away_team': 'Sheffield United', 'bookmakers': [{'key': 'bovada', 'title': 'Bovada', 'last_update': '2024-04-11T05:17:27Z', 'markets': [{'key': 'h2h', 'last_update': '2024-04-11T05:17:27Z', 'outcomes': [{'name': 'Brentford', 'price': 1.43}, {'name': 'Sheffield United', 'price': 6.5}, {'name': 'Draw', 'price': 5.0}]}]}]}, {'id': '738fa0d34

Unnamed: 0,Game Matchup,Combined Projected Home Goals,Combined Projected Away Goals,Combined Total Projected Goals,Home Win Probability,Away Win Probability,Draw Probability,Home Odds,Away Odds,Draw Odds,Start Time
0,Bournemouth vs. Manchester United,2.552363,1.841363,4.393726,53.91%,27.55%,18.54%,2.4,2.65,3.85,2024-04-13T16:30:00Z
1,Brentford vs. Sheffield United,1.607848,1.802242,3.410089,34.65%,42.83%,22.52%,1.43,6.5,5.0,2024-04-13T14:00:00Z
2,Burnley vs. Brighton,0.960513,1.744156,2.704668,20.19%,55.64%,24.17%,3.4,2.0,3.9,2024-04-13T14:00:00Z
3,Manchester City vs. Luton,3.23151,0.610372,3.841882,87.27%,3.79%,8.94%,1.08,24.0,12.0,2024-04-13T14:00:00Z
4,Newcastle United vs. Tottenham,2.65293,1.99889,4.65182,52.25%,29.72%,18.03%,2.75,2.35,3.85,2024-04-13T11:30:00Z
5,Nottingham Forest vs. Wolverhampton Wanderers,1.478228,0.986145,2.464372,48.60%,24.54%,26.86%,2.2,3.2,3.55,2024-04-13T14:00:00Z
6,Arsenal vs. Aston Villa,2.635808,0.63978,3.275588,80.29%,6.48%,13.23%,1.29,9.0,6.0,2024-04-14T15:30:00Z
7,Liverpool vs. Crystal Palace,2.76939,0.374017,3.143406,86.44%,3.10%,10.46%,1.2,12.0,7.25,2024-04-14T13:00:00Z
8,West Ham vs. Fulham,1.342045,2.132862,3.474907,23.40%,55.66%,20.94%,2.3,2.85,3.75,2024-04-14T13:00:00Z
9,Chelsea vs. Everton,2.642745,1.039042,3.681787,71.53%,12.66%,15.81%,1.67,4.6,4.2,2024-04-15T19:00:00Z


In [44]:
#CREATING IMPLIED ODDS BASED OFF THE WIN PROBABILITY

# Convert percentage strings to decimal probabilities
merged_df['Home Win Probability'] = merged_df['Home Win Probability'].str.rstrip('%').astype(float) / 100
merged_df['Away Win Probability'] = merged_df['Away Win Probability'].str.rstrip('%').astype(float) / 100
merged_df['Draw Probability'] = merged_df['Draw Probability'].str.rstrip('%').astype(float) / 100
# Calculate implied odds
merged_df['Implied Home Odds'] = 1 / merged_df['Home Win Probability']
merged_df['Implied Away Odds'] = 1 / merged_df['Away Win Probability']
merged_df['Implied Draw Odds'] = 1 / merged_df['Draw Probability']
# Create a new DataFrame with implied odds
implied_odds_df = merged_df[['Game Matchup', 'Implied Home Odds', 'Implied Away Odds', 'Implied Draw Odds', 'Home Odds', 'Away Odds', 'Draw Odds']].copy()


In [45]:
#CALCULATING EDGES BY COMPARING IMPLIED ODDS VS REAL ODDS

# Calculate edge for home win
implied_odds_df['Home Win Edge'] = round((1 / implied_odds_df['Implied Home Odds']) * implied_odds_df['Home Odds'] - 1, 2)

# Calculate edge for away win
implied_odds_df['Away Win Edge'] = round((1 / implied_odds_df['Implied Away Odds']) * implied_odds_df['Away Odds'] - 1, 2)

# Calculate edge for draw
implied_odds_df['Draw Edge'] = round((1 / implied_odds_df['Implied Draw Odds']) * implied_odds_df['Draw Odds'] - 1, 2)

# Round implied odds to 2 decimal points
implied_odds_df['Implied Home Odds'] = round(implied_odds_df['Implied Home Odds'], 2)
implied_odds_df['Implied Away Odds'] = round(implied_odds_df['Implied Away Odds'], 2)
implied_odds_df['Implied Draw Odds'] = round(implied_odds_df['Implied Draw Odds'], 2)

implied_odds_df


Unnamed: 0,Game Matchup,Implied Home Odds,Implied Away Odds,Implied Draw Odds,Home Odds,Away Odds,Draw Odds,Home Win Edge,Away Win Edge,Draw Edge
0,Bournemouth vs. Manchester United,1.85,3.63,5.39,2.4,2.65,3.85,0.29,-0.27,-0.29
1,Brentford vs. Sheffield United,2.89,2.33,4.44,1.43,6.5,5.0,-0.5,1.78,0.13
2,Burnley vs. Brighton,4.95,1.8,4.14,3.4,2.0,3.9,-0.31,0.11,-0.06
3,Manchester City vs. Luton,1.15,26.39,11.19,1.08,24.0,12.0,-0.06,-0.09,0.07
4,Newcastle United vs. Tottenham,1.91,3.36,5.55,2.75,2.35,3.85,0.44,-0.3,-0.31
5,Nottingham Forest vs. Wolverhampton Wanderers,2.06,4.07,3.72,2.2,3.2,3.55,0.07,-0.21,-0.05
6,Arsenal vs. Aston Villa,1.25,15.43,7.56,1.29,9.0,6.0,0.04,-0.42,-0.21
7,Liverpool vs. Crystal Palace,1.16,32.26,9.56,1.2,12.0,7.25,0.04,-0.63,-0.24
8,West Ham vs. Fulham,4.27,1.8,4.78,2.3,2.85,3.75,-0.46,0.59,-0.21
9,Chelsea vs. Everton,1.4,7.9,6.33,1.67,4.6,4.2,0.19,-0.42,-0.34


In [46]:
#MERGING ODDS DATA BACK WITH ORIGINAL DATAFRAME FOR PROJECTIONS

# Merge the implied odds DataFrame with the projected goals DataFrame on the 'Game Matchup' column
implied_odds_with_goals = implied_odds_df.merge(combined_projected_goals_df, on='Game Matchup', how='left')

# Display the DataFrame with projected goals added
implied_odds_with_goals


Unnamed: 0,Game Matchup,Implied Home Odds,Implied Away Odds,Implied Draw Odds,Home Odds,Away Odds,Draw Odds,Home Win Edge,Away Win Edge,Draw Edge,Combined Projected Home Goals,Combined Projected Away Goals,Combined Total Projected Goals,Home Win Probability,Away Win Probability,Draw Probability
0,Bournemouth vs. Manchester United,1.85,3.63,5.39,2.4,2.65,3.85,0.29,-0.27,-0.29,2.552363,1.841363,4.393726,53.91%,27.55%,18.54%
1,Brentford vs. Sheffield United,2.89,2.33,4.44,1.43,6.5,5.0,-0.5,1.78,0.13,1.607848,1.802242,3.410089,34.65%,42.83%,22.52%
2,Burnley vs. Brighton,4.95,1.8,4.14,3.4,2.0,3.9,-0.31,0.11,-0.06,0.960513,1.744156,2.704668,20.19%,55.64%,24.17%
3,Manchester City vs. Luton,1.15,26.39,11.19,1.08,24.0,12.0,-0.06,-0.09,0.07,3.23151,0.610372,3.841882,87.27%,3.79%,8.94%
4,Newcastle United vs. Tottenham,1.91,3.36,5.55,2.75,2.35,3.85,0.44,-0.3,-0.31,2.65293,1.99889,4.65182,52.25%,29.72%,18.03%
5,Nottingham Forest vs. Wolverhampton Wanderers,2.06,4.07,3.72,2.2,3.2,3.55,0.07,-0.21,-0.05,1.478228,0.986145,2.464372,48.60%,24.54%,26.86%
6,Arsenal vs. Aston Villa,1.25,15.43,7.56,1.29,9.0,6.0,0.04,-0.42,-0.21,2.635808,0.63978,3.275588,80.29%,6.48%,13.23%
7,Liverpool vs. Crystal Palace,1.16,32.26,9.56,1.2,12.0,7.25,0.04,-0.63,-0.24,2.76939,0.374017,3.143406,86.44%,3.10%,10.46%
8,West Ham vs. Fulham,4.27,1.8,4.78,2.3,2.85,3.75,-0.46,0.59,-0.21,1.342045,2.132862,3.474907,23.40%,55.66%,20.94%
9,Chelsea vs. Everton,1.4,7.9,6.33,1.67,4.6,4.2,0.19,-0.42,-0.34,2.642745,1.039042,3.681787,71.53%,12.66%,15.81%


In [47]:
#MAKING THE ODDS AMERICAN FORMAT

def decimal_to_american(decimal_odds):
    """
    Convert decimal odds to American odds, adding a '+' sign for positive odds.
    
    Parameters:
    - decimal_odds: float representing the decimal odds.
    
    Returns:
    - str: The American odds as a string, with '+' for positive odds.
    """
    if decimal_odds >= 2.00:
        american_odds = int((decimal_odds - 1) * 100)
    else:
        american_odds = int(-100 / (decimal_odds - 1))
    
    # Add '+' for positive odds
    if american_odds > 0:
        return f'+{american_odds}'
    else:
        return str(american_odds)

# Example usage on a DataFrame column
implied_odds_with_goals['Implied Home Odds American'] = implied_odds_with_goals['Implied Home Odds'].apply(decimal_to_american)
implied_odds_with_goals['Implied Away Odds American'] = implied_odds_with_goals['Implied Away Odds'].apply(decimal_to_american)
implied_odds_with_goals['Implied Draw Odds American'] = implied_odds_with_goals['Implied Draw Odds'].apply(decimal_to_american)
implied_odds_with_goals['Draw Odds American'] = implied_odds_with_goals['Draw Odds'].apply(decimal_to_american)
implied_odds_with_goals['Home Odds American'] = implied_odds_with_goals['Home Odds'].apply(decimal_to_american)
implied_odds_with_goals['Away Odds American'] = implied_odds_with_goals['Away Odds'].apply(decimal_to_american)
# Optionally, display the DataFrame to verify the conversion
implied_odds_with_goals[['Implied Home Odds', 'Implied Home Odds American', 
        'Implied Away Odds', 'Implied Away Odds American', 
        'Implied Draw Odds', 'Implied Draw Odds American']]


Unnamed: 0,Implied Home Odds,Implied Home Odds American,Implied Away Odds,Implied Away Odds American,Implied Draw Odds,Implied Draw Odds American
0,1.85,-117,3.63,263,5.39,438
1,2.89,189,2.33,133,4.44,344
2,4.95,395,1.8,-125,4.14,313
3,1.15,-666,26.39,2539,11.19,1019
4,1.91,-109,3.36,236,5.55,455
5,2.06,106,4.07,307,3.72,272
6,1.25,-400,15.43,1443,7.56,656
7,1.16,-625,32.26,3126,9.56,856
8,4.27,326,1.8,-125,4.78,378
9,1.4,-250,7.9,690,6.33,533


In [48]:
#SETTING UP AND CLEANING NEW DF

# Create a new DataFrame by copying the original DataFrame
new_df = implied_odds_with_goals.copy()

# Replace decimal odds columns with their American odds counterparts
new_df['Implied Home Odds'] = new_df['Implied Home Odds American']
new_df['Implied Away Odds'] = new_df['Implied Away Odds American']
new_df['Implied Draw Odds'] = new_df['Implied Draw Odds American']
new_df['Draw Odds'] = new_df['Draw Odds American']
new_df['Home Odds'] = new_df['Home Odds American']
new_df['Away Odds'] = new_df['Away Odds American']

# Drop the American odds columns as they've been swapped in, if desired
new_df.drop(columns=['Implied Home Odds American', 'Implied Away Odds American', 
                     'Implied Draw Odds American', 'Draw Odds American', 
                     'Home Odds American', 'Away Odds American'], inplace=True)

# Optionally, rename columns back to original if preferred, for clarity
# This step is optional and may be desired for presentation or further processing
column_renames = {
    'Implied Home Odds': 'Implied Home Odds',
    'Implied Away Odds': 'Implied Away Odds',
    'Implied Draw Odds': 'Implied Draw Odds',
    'Home Odds': 'Home Odds',
    'Away Odds': 'Away Odds',
    'Draw Odds': 'Draw Odds',
}
new_df.rename(columns=column_renames, inplace=True)

# Define the new column order
new_column_order = [
    'Game Matchup',
    'Combined Projected Home Goals',
    'Combined Projected Away Goals',
    'Combined Total Projected Goals',
    'Home Win Probability',
    'Away Win Probability',
    'Draw Probability',
    'Implied Home Odds',
    'Implied Away Odds',
    'Implied Draw Odds',
    'Home Odds',
    'Away Odds',
    'Draw Odds',
    'Home Win Edge',
    'Away Win Edge',
    'Draw Edge'
]

# Reorder the DataFrame according to the new column order
new_df = new_df[new_column_order]

# Round the "Combined Total Projected Goals" column to two decimal places
new_df['Combined Total Projected Goals'] = new_df['Combined Total Projected Goals'].round(2)
new_df['Combined Projected Home Goals'] = new_df['Combined Projected Home Goals'].round(2)
new_df['Combined Projected Away Goals'] = new_df['Combined Projected Away Goals'].round(2)
new_df['League'] = 'Premier League'

# Display the DataFrame to verify the changes
print(new_df)


# Now new_df contains the American odds in place of the decimal odds,
# with all other columns unchanged.


                                    Game Matchup  \
0              Bournemouth vs. Manchester United   
1                 Brentford vs. Sheffield United   
2                           Burnley vs. Brighton   
3                      Manchester City vs. Luton   
4                 Newcastle United vs. Tottenham   
5  Nottingham Forest vs. Wolverhampton Wanderers   
6                        Arsenal vs. Aston Villa   
7                   Liverpool vs. Crystal Palace   
8                            West Ham vs. Fulham   
9                            Chelsea vs. Everton   

   Combined Projected Home Goals  Combined Projected Away Goals  \
0                           2.55                           1.84   
1                           1.61                           1.80   
2                           0.96                           1.74   
3                           3.23                           0.61   
4                           2.65                           2.00   
5                        

In [49]:
# Save the DataFrame to a CSV file
new_df.to_csv('epl_odds_4-13.csv', index=False)
