In [71]:
API_KEY = '643ae0f34c3ab5bb0e287e137f8bb576'

In [72]:
import requests
import pandas as pd

# Your OddsAPI key

SPORT = 'soccer_epl'  # Sport key for English Premier League
BASE_URL = f'https://api.the-odds-api.com/v4/sports/{SPORT}/odds'

# Parameters for the API request
params = {
    'apiKey': API_KEY,
    'regions': 'us',  # Specify the region for the bookmakers
    'markets': 'h2h',  # We're focusing on head-to-head odds
    'oddsFormat': 'decimal',  # Use decimal odds format
    'dateFormat': 'iso'  # ISO format for dates
}

# Make the API request
response = requests.get(BASE_URL, params=params)

if response.status_code == 200:
    data = response.json()
    
    # Prepare a list to collect odds data
    odds_list = []
    
    for event in data:
        match = f"{event['home_team']} vs {event['away_team']}"
        
        for bookmaker in event['bookmakers']:
            for market in bookmaker['markets']:
                for outcome in market['outcomes']:
                    odds_list.append({
                        'match': match,
                        'bookmaker': bookmaker['title'],
                        'outcome': outcome['name'],
                        'odds': outcome['price']
                    })
    
    # Convert the list to a DataFrame
    df_odds = pd.DataFrame(odds_list)
    
    # Save the odds data to a CSV file
    df_odds.to_csv('epl_odds.csv', index=False)
    print("Odds data saved to epl_odds.csv")

else:
    print(f"Failed to retrieve data: {response.status_code}")


Odds data saved to epl_odds.csv


In [73]:
df_odds

Unnamed: 0,match,bookmaker,outcome,odds
0,Manchester United vs Fulham,BetRivers,Fulham,5.50
1,Manchester United vs Fulham,BetRivers,Manchester United,1.60
2,Manchester United vs Fulham,BetRivers,Draw,4.10
3,Manchester United vs Fulham,DraftKings,Fulham,4.90
4,Manchester United vs Fulham,DraftKings,Manchester United,1.67
...,...,...,...,...
253,Leicester City vs Tottenham Hotspur,BetUS,Tottenham Hotspur,1.57
254,Leicester City vs Tottenham Hotspur,BetUS,Draw,4.50
255,Leicester City vs Tottenham Hotspur,Bovada,Leicester City,5.20
256,Leicester City vs Tottenham Hotspur,Bovada,Tottenham Hotspur,1.57


In [74]:
import pandas as pd

# Load the odds data from the CSV file
df_odds = pd.read_csv('epl_odds.csv')

# Group by match and outcome, then calculate the average odds
average_odds = df_odds.groupby(['match', 'outcome'])['odds'].mean().reset_index()

# Save the average odds to another CSV file for feeding into your model
average_odds.to_csv('epl_average_odds.csv', index=False)
print("Average odds data saved to epl_average_odds.csv")


Average odds data saved to epl_average_odds.csv


In [75]:
average_odds

Unnamed: 0,match,outcome,odds
0,Arsenal vs Wolverhampton Wanderers,Arsenal,1.172222
1,Arsenal vs Wolverhampton Wanderers,Draw,7.716667
2,Arsenal vs Wolverhampton Wanderers,Wolverhampton Wanderers,15.055556
3,Brentford vs Crystal Palace,Brentford,2.425556
4,Brentford vs Crystal Palace,Crystal Palace,2.914444
5,Brentford vs Crystal Palace,Draw,3.4
6,Chelsea vs Manchester City,Chelsea,3.944444
7,Chelsea vs Manchester City,Draw,3.922222
8,Chelsea vs Manchester City,Manchester City,1.857778
9,Everton vs Brighton and Hove Albion,Brighton and Hove Albion,2.6575


In [76]:
# Split the 'match' column into 'home_team' and 'away_team'
df_odds[['home_team', 'away_team']] = df_odds['match'].str.split(' vs ', expand=True)
# Drop the 'match' column
df_odds = df_odds.drop(columns=['match'])

# Display the updated DataFrame
print(df_odds.head())

    bookmaker            outcome  odds          home_team away_team
0   BetRivers             Fulham  5.50  Manchester United    Fulham
1   BetRivers  Manchester United  1.60  Manchester United    Fulham
2   BetRivers               Draw  4.10  Manchester United    Fulham
3  DraftKings             Fulham  4.90  Manchester United    Fulham
4  DraftKings  Manchester United  1.67  Manchester United    Fulham


In [77]:
df_odds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 258 entries, 0 to 257
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   bookmaker  258 non-null    object 
 1   outcome    258 non-null    object 
 2   odds       258 non-null    float64
 3   home_team  258 non-null    object 
 4   away_team  258 non-null    object 
dtypes: float64(1), object(4)
memory usage: 10.2+ KB


In [78]:
import pandas as pd

# Assuming df_odds is your original DataFrame
# Step 1: Create a new column 'result_type' to classify the outcome
def classify_result(row):
    if row['outcome'] == row['home_team']:
        return 'home_win_odds'
    elif row['outcome'] == row['away_team']:
        return 'away_win_odds'
    else:
        return 'draw_odds'

df_odds['result_type'] = df_odds.apply(classify_result, axis=1)

# Step 2: Pivot the DataFrame to create separate columns for each outcome based on the bookmaker
df_pivot = df_odds.pivot_table(index=['home_team', 'away_team'], 
                               columns=['bookmaker', 'result_type'], 
                               values='odds').reset_index()

# Step 3: Flatten the MultiIndex columns
df_pivot.columns = ['_'.join(col).strip() if col[1] else col[0] for col in df_pivot.columns.values]

# Step 4: Rename the columns to a more readable format
df_pivot.columns = df_pivot.columns.str.replace(' ', '_').str.replace('__', '_')

# Display the resulting DataFrame
print(df_pivot.head())

# Save the reshaped DataFrame to a new CSV if needed
df_pivot.to_csv('epl_reshaped_odds.csv', index=False)


      home_team                 away_team  BetMGM_away_win_odds  \
0       Arsenal   Wolverhampton Wanderers                 13.50   
1     Brentford            Crystal Palace                  2.85   
2       Chelsea           Manchester City                  1.83   
3       Everton  Brighton and Hove Albion                   NaN   
4  Ipswich Town                 Liverpool                  1.33   

   BetMGM_draw_odds  BetMGM_home_win_odds  BetOnline.ag_away_win_odds  \
0              7.25                   1.2                       15.00   
1              3.40                   2.4                        2.90   
2              4.00                   3.9                        1.88   
3               NaN                   NaN                        2.70   
4              5.75                   8.0                        1.33   

   BetOnline.ag_draw_odds  BetOnline.ag_home_win_odds  \
0                    8.40                        1.18   
1                    3.45                   

In [79]:
df_pivot

Unnamed: 0,home_team,away_team,BetMGM_away_win_odds,BetMGM_draw_odds,BetMGM_home_win_odds,BetOnline.ag_away_win_odds,BetOnline.ag_draw_odds,BetOnline.ag_home_win_odds,BetRivers_away_win_odds,BetRivers_draw_odds,...,Caesars_home_win_odds,DraftKings_away_win_odds,DraftKings_draw_odds,DraftKings_home_win_odds,FanDuel_away_win_odds,FanDuel_draw_odds,FanDuel_home_win_odds,LowVig.ag_away_win_odds,LowVig.ag_draw_odds,LowVig.ag_home_win_odds
0,Arsenal,Wolverhampton Wanderers,13.5,7.25,1.2,15.0,8.4,1.18,17.0,7.5,...,1.17,14.0,8.0,1.18,15.0,7.5,1.15,15.0,8.4,1.18
1,Brentford,Crystal Palace,2.85,3.4,2.4,2.9,3.45,2.5,2.88,3.4,...,2.35,2.95,3.4,2.45,2.95,3.4,2.4,2.9,3.45,2.5
2,Chelsea,Manchester City,1.83,4.0,3.9,1.88,3.9,4.05,1.85,3.8,...,4.0,1.87,3.9,3.95,1.87,3.9,3.9,1.88,3.9,4.05
3,Everton,Brighton and Hove Albion,,,,2.7,3.39,2.7,2.6,3.45,...,2.7,2.7,3.35,2.65,2.65,3.3,2.65,2.7,3.39,2.7
4,Ipswich Town,Liverpool,1.33,5.75,8.0,1.33,5.85,8.8,1.3,6.25,...,9.0,1.32,6.0,8.0,1.31,5.9,8.5,1.33,5.85,8.8
5,Leicester City,Tottenham Hotspur,1.62,4.2,4.8,1.62,4.35,5.25,1.56,4.5,...,5.25,1.57,4.4,5.25,1.57,4.5,5.5,1.62,4.35,5.25
6,Manchester United,Fulham,4.8,4.2,1.62,,,,5.5,4.1,...,1.59,4.9,4.1,1.67,5.2,4.0,1.67,,,
7,Newcastle United,Southampton,,,,8.38,5.67,1.35,8.5,5.8,...,1.31,8.0,5.75,1.32,8.0,5.7,1.32,8.38,5.67,1.35
8,Nottingham Forest,Bournemouth,2.8,3.4,2.45,2.95,3.46,2.46,2.88,3.45,...,2.35,2.95,3.35,2.45,2.9,3.4,2.4,2.95,3.46,2.46
9,West Ham United,Aston Villa,2.7,3.6,2.45,2.8,3.7,2.45,2.75,3.7,...,2.4,2.75,3.65,2.45,2.75,3.6,2.4,2.8,3.7,2.45


In [80]:
df_pivot.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 29 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   home_team                   10 non-null     object 
 1   away_team                   10 non-null     object 
 2   BetMGM_away_win_odds        8 non-null      float64
 3   BetMGM_draw_odds            8 non-null      float64
 4   BetMGM_home_win_odds        8 non-null      float64
 5   BetOnline.ag_away_win_odds  9 non-null      float64
 6   BetOnline.ag_draw_odds      9 non-null      float64
 7   BetOnline.ag_home_win_odds  9 non-null      float64
 8   BetRivers_away_win_odds     10 non-null     float64
 9   BetRivers_draw_odds         10 non-null     float64
 10  BetRivers_home_win_odds     10 non-null     float64
 11  BetUS_away_win_odds         10 non-null     float64
 12  BetUS_draw_odds             10 non-null     float64
 13  BetUS_home_win_odds         10 non-nul