# Applying the Model to Betting

Finally we will take the results of our model, specifically the predicted outcome probabilites of the test set, and use them to test different betting strategies.

### Calculate Probabilities from Odds

Here we will take the closing odds from Pinnacle for all the games in the test set and calculate the implied odds. These implied odds will always add up to a number slightly above 100, which is the bookie tax. The bookie tax ensures that the bookies always make a profit(but not with us!).

In [25]:
import pandas as pd
import numpy as np

In [26]:
df = pd.read_csv('logreg_preds.csv')

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2280 entries, 0 to 2279
Data columns (total 27 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Unnamed: 0                      2280 non-null   int64  
 1   Pinnacle Closing Home Win Odds  2280 non-null   float64
 2   Pinnacle Closing Draw Odds      2280 non-null   float64
 3   Pinnacle Closing Away Win Odds  2280 non-null   float64
 4   home_team_elo                   2280 non-null   float64
 5   away_team_elo                   2280 non-null   float64
 6   home_xG_to_date                 2280 non-null   float64
 7   away_xG_to_date                 2280 non-null   float64
 8   home_xG_against_to_date         2280 non-null   float64
 9   away_xG_against_to_date         2280 non-null   float64
 10  home_goals_scored_to_date       2280 non-null   int64  
 11  away_goals_scored_to_date       2280 non-null   int64  
 12  home_goals_conceded_to_date     22

In [28]:
# Create the new columns with the calculated values
df['implied home win prob'] = (1 / df['Pinnacle Closing Home Win Odds']) * 100
df['implied draw prob'] = (1 / df['Pinnacle Closing Draw Odds']) * 100
df['implied away win prob'] = (1 / df['Pinnacle Closing Away Win Odds']) * 100

In [29]:
selected_columns = df[['home_team','away_team','Pinnacle Closing Home Win Odds', 'Pinnacle Closing Draw Odds', 'Pinnacle Closing Away Win Odds', 'implied home win prob', 'implied draw prob', 'implied away win prob', 'match_result']]

In [30]:
selected_columns

Unnamed: 0,home_team,away_team,Pinnacle Closing Home Win Odds,Pinnacle Closing Draw Odds,Pinnacle Closing Away Win Odds,implied home win prob,implied draw prob,implied away win prob,match_result
0,arsenal,leicester,1.49,4.73,7.25,67.114094,21.141649,13.793103,0
1,brighton,manchester city,11.75,6.15,1.29,8.510638,16.260163,77.519380,2
2,chelsea,burnley,1.33,5.40,12.25,75.187970,18.518519,8.163265,2
3,crystal palace,huddersfield,1.79,3.56,5.51,55.865922,28.089888,18.148820,2
4,everton,stoke,1.82,3.49,5.42,54.945055,28.653295,18.450185,0
...,...,...,...,...,...,...,...,...,...
2275,everton,bournemouth,1.65,4.26,5.31,60.606061,23.474178,18.832392,0
2276,leeds,tottenham,2.80,3.95,2.39,35.714286,25.316456,41.841004,2
2277,leicester,west ham,2.11,4.01,3.29,47.393365,24.937656,30.395137,0
2278,manchester utd,fulham,1.49,5.09,6.14,67.114094,19.646365,16.286645,0


In [31]:
df['Total_implied_probability']= df['implied home win prob'] + df['implied draw prob'] + df['implied away win prob']

In [32]:
selected_columns = df[['home_team','away_team','Pinnacle Closing Home Win Odds', 'Pinnacle Closing Draw Odds', 'Pinnacle Closing Away Win Odds', 'implied home win prob', 'implied draw prob', 'implied away win prob', 'Total_implied_probability', 'match_result']]

In [33]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2280 entries, 0 to 2279
Data columns (total 31 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   Unnamed: 0                      2280 non-null   int64  
 1   Pinnacle Closing Home Win Odds  2280 non-null   float64
 2   Pinnacle Closing Draw Odds      2280 non-null   float64
 3   Pinnacle Closing Away Win Odds  2280 non-null   float64
 4   home_team_elo                   2280 non-null   float64
 5   away_team_elo                   2280 non-null   float64
 6   home_xG_to_date                 2280 non-null   float64
 7   away_xG_to_date                 2280 non-null   float64
 8   home_xG_against_to_date         2280 non-null   float64
 9   away_xG_against_to_date         2280 non-null   float64
 10  home_goals_scored_to_date       2280 non-null   int64  
 11  away_goals_scored_to_date       2280 non-null   int64  
 12  home_goals_conceded_to_date     22

In [34]:
# Assuming 'df' is your DataFrame
EV_calc_df = df.dropna()

In [35]:
EV_calc_df.columns

Index(['Unnamed: 0', 'Pinnacle Closing Home Win Odds',
       'Pinnacle Closing Draw Odds', 'Pinnacle Closing Away Win Odds',
       'home_team_elo', 'away_team_elo', 'home_xG_to_date', 'away_xG_to_date',
       'home_xG_against_to_date', 'away_xG_against_to_date',
       'home_goals_scored_to_date', 'away_goals_scored_to_date',
       'home_goals_conceded_to_date', 'away_goals_conceded_to_date',
       'home_points_to_date', 'away_points_to_date', 'home_form', 'away_form',
       'match_result', 'Probability_Home_win', 'Probability_Draw',
       'Probability_Away_win', 'week', 'date', 'score', 'home_team',
       'away_team', 'implied home win prob', 'implied draw prob',
       'implied away win prob', 'Total_implied_probability'],
      dtype='object')

In [36]:
# Assuming 'df' is your DataFrame
# List the names of the columns you want to drop
columns_to_drop = ['Unnamed: 0', 'home_xG_to_date', 'away_xG_to_date',
       'home_xG_against_to_date', 'away_xG_against_to_date',
       'home_goals_scored_to_date', 'away_goals_scored_to_date',
       'home_goals_conceded_to_date', 'away_goals_conceded_to_date',
       'home_points_to_date', 'away_points_to_date', 'home_form', 'away_form','implied home win prob', 'implied draw prob',
       'implied away win prob', 'Total_implied_probability', 'week', 'date', 'score']

# Dropping the specified columns
EV_calc_df = EV_calc_df.drop(columns=columns_to_drop)

# If you want to modify the original DataFrame in place
# df.drop(columns=columns_to_drop, inplace=True)


In [37]:
EV_calc_df

Unnamed: 0,Pinnacle Closing Home Win Odds,Pinnacle Closing Draw Odds,Pinnacle Closing Away Win Odds,home_team_elo,away_team_elo,match_result,Probability_Home_win,Probability_Draw,Probability_Away_win,home_team,away_team
0,1.49,4.73,7.25,1848.286499,1716.994873,0,0.276477,0.259282,0.464240,arsenal,leicester
1,11.75,6.15,1.29,1583.799805,1866.807007,2,0.226818,0.236149,0.537033,brighton,manchester city
2,1.33,5.40,12.25,1909.399658,1628.988403,2,0.206330,0.245506,0.548164,chelsea,burnley
3,1.79,3.56,5.51,1642.862427,1475.799316,2,0.110661,0.195446,0.693893,crystal palace,huddersfield
4,1.82,3.49,5.42,1751.501343,1662.613770,0,0.392357,0.274732,0.332910,everton,stoke
...,...,...,...,...,...,...,...,...,...,...,...
375,4.85,3.72,1.80,1651.556885,1858.670288,0,0.516922,0.221536,0.261542,newcastle utd,chelsea
376,6.32,4.78,1.51,1657.315552,1973.498779,2,0.319765,0.209411,0.470824,southampton,manchester city
377,2.08,3.56,3.82,1630.464355,1620.356567,2,0.370552,0.228762,0.400686,swansea,stoke
378,1.38,5.50,8.15,1913.709106,1700.989502,0,0.680317,0.173513,0.146170,tottenham,leicester


## Calculating Expected Value

Expected Value(EV) shows us how much we can expect to win per bet(on average). In other words, it is a great indicator of what is a good bet.

In [38]:
# Recalculating EV using the correct formula

# Constants
stake = 10

# Recalculate EV for each outcome using the correct formula
EV_calc_df['Correct_EV_Home_Win'] = (EV_calc_df['Probability_Home_win'] * ((EV_calc_df['Pinnacle Closing Home Win Odds']-1) * stake)) - ((1 - EV_calc_df['Probability_Home_win']) * stake)
EV_calc_df['Correct_EV_Draw'] = (EV_calc_df['Probability_Draw'] * ((EV_calc_df['Pinnacle Closing Draw Odds']-1) * stake)) - ((1 - EV_calc_df['Probability_Draw']) * stake)
EV_calc_df['Correct_EV_Away_Win'] = (EV_calc_df['Probability_Away_win'] * ((EV_calc_df['Pinnacle Closing Away Win Odds']-1) * stake)) - ((1 - EV_calc_df['Probability_Away_win']) * stake)

# Display the updated DataFrame with corrected EV columns
EV_calc_df[['home_team', 'away_team', 'Correct_EV_Home_Win', 'Correct_EV_Draw', 'Correct_EV_Away_Win']].head()

Unnamed: 0,home_team,away_team,Correct_EV_Home_Win,Correct_EV_Draw,Correct_EV_Away_Win
0,arsenal,leicester,-5.880488,2.264049,23.657433
1,brighton,manchester city,16.651097,4.523188,-3.072278
2,chelsea,burnley,-7.255814,3.25734,57.150082
3,crystal palace,huddersfield,-8.019174,-3.042109,28.233502
4,everton,stoke,-2.859097,-0.411842,8.043743


## Simulating Betting Strategies

Now we will attempt a couple of different of betting strategies, and calculate the total profit and the return on investment(ROI) of each.

#### Betting on best EV outcome for each match

In [39]:
def find_highest_ev_bet(df):
    """
    Function to find the highest EV bet for each match.

    Parameters:
    df (DataFrame): A DataFrame containing EV calculations for each outcome.

    Returns:
    DataFrame: The original DataFrame with two new columns:
               'Highest_EV_Bet' indicating the type of bet (Home Win, Draw, Away Win),
               'Highest_EV_Value' indicating the value of the highest EV.
    """
    # Identifying the highest EV bet for each match
    bet_columns = ['Correct_EV_Home_Win', 'Correct_EV_Draw', 'Correct_EV_Away_Win']
    df['Highest_EV_Bet'] = df[bet_columns].idxmax(axis=1)
    df['Highest_EV_Value'] = df[bet_columns].max(axis=1)
    return df

# Applying the function to the DataFrame
EV_calc_df = find_highest_ev_bet(EV_calc_df)
EV_calc_df[['home_team', 'away_team', 'Highest_EV_Bet', 'Highest_EV_Value']].head()


Unnamed: 0,home_team,away_team,Highest_EV_Bet,Highest_EV_Value
0,arsenal,leicester,Correct_EV_Away_Win,23.657433
1,brighton,manchester city,Correct_EV_Home_Win,16.651097
2,chelsea,burnley,Correct_EV_Away_Win,57.150082
3,crystal palace,huddersfield,Correct_EV_Away_Win,28.233502
4,everton,stoke,Correct_EV_Away_Win,8.043743


In [40]:
# Constants
stake = 10

# Define a function to find the best EV bet for each match
def best_ev_bet(row):
    # Extracting EV values and corresponding odds
    ev_values = {
        'Correct_EV_Home_Win': (row['Correct_EV_Home_Win'], row['Pinnacle Closing Home Win Odds']),
        'Correct_EV_Draw': (row['Correct_EV_Draw'], row['Pinnacle Closing Draw Odds']),
        'Correct_EV_Away_Win': (row['Correct_EV_Away_Win'], row['Pinnacle Closing Away Win Odds'])
    }

    # Filter only positive EV bets
    positive_ev_bets = {k: v for k, v in ev_values.items() if v[0] > 0}

    # If no positive EV bets, return no bet
    if not positive_ev_bets:
        return None, "No Bet"

    # Find the bet with the highest EV
    best_bet = max(positive_ev_bets, key=lambda x: positive_ev_bets[x][0])
    return positive_ev_bets[best_bet][1], best_bet  # Return the odds and type of bet

# Apply the function to find the best bet for each match
EV_calc_df[['Best_Bet_Odds', 'Best_Bet_Type']] = EV_calc_df.apply(best_ev_bet, axis=1, result_type='expand')

# Define a function to calculate the outcome of the best bet
def calculate_best_bet_outcome(row):
    bet_type_to_result_mapping = {
        'Correct_EV_Home_Win': 0,
        'Correct_EV_Draw': 1,
        'Correct_EV_Away_Win': 2
    }
    bet_type = row['Best_Bet_Type']

    if bet_type != "No Bet" and bet_type_to_result_mapping[bet_type] == row['match_result']:
        # Bet won: Calculate profit using fractional odds
        profit = (row['Best_Bet_Odds'] - 1) * stake
        return profit
    elif bet_type != "No Bet":
        # Bet lost
        return -stake
    else:
        # No bet placed
        return 0

# Calculate the outcome of the best bet for each match
EV_calc_df['Best_Bet_Outcome'] = EV_calc_df.apply(calculate_best_bet_outcome, axis=1)

# Calculate the total profit/loss
total_profit_loss_best_bet = EV_calc_df['Best_Bet_Outcome'].sum()

# Displaying total profit/loss and a snippet of the DataFrame
total_profit_loss_best_bet, EV_calc_df[['Best_Bet_Type', 'Best_Bet_Odds', 'Best_Bet_Outcome']].head()



(-363.5999999999999,
          Best_Bet_Type  Best_Bet_Odds  Best_Bet_Outcome
 0  Correct_EV_Away_Win           7.25             -10.0
 1  Correct_EV_Home_Win          11.75             -10.0
 2  Correct_EV_Away_Win          12.25             112.5
 3  Correct_EV_Away_Win           5.51              45.1
 4  Correct_EV_Away_Win           5.42             -10.0)

In [41]:
# Define a function to find the best EV bet for each match
def best_ev_bet(row):
    # Extracting EV values and corresponding odds
    ev_values = {
        'Correct_EV_Home_Win': (row['Correct_EV_Home_Win'], row['Pinnacle Closing Home Win Odds']),
        'Correct_EV_Draw': (row['Correct_EV_Draw'], row['Pinnacle Closing Draw Odds']),
        'Correct_EV_Away_Win': (row['Correct_EV_Away_Win'], row['Pinnacle Closing Away Win Odds'])
    }

    # Filter only positive EV bets
    positive_ev_bets = {k: v for k, v in ev_values.items() if v[0] > 0}

    # If no positive EV bets, return no bet
    if not positive_ev_bets:
        return 0, None

    # Find the bet with the highest EV
    best_bet = max(positive_ev_bets, key=lambda x: positive_ev_bets[x][0])
    return positive_ev_bets[best_bet][1], best_bet  # Return the odds and type of bet

# Apply the function to find the best bet for each match
EV_calc_df['Best_Bet_Odds'], EV_calc_df['Best_Bet_Type'] = zip(*EV_calc_df.apply(best_ev_bet, axis=1))

# Define a function to calculate the outcome of the best bet
def calculate_best_bet_outcome(row):
    bet_type_to_result_mapping = {
        'Correct_EV_Home_Win': 0,
        'Correct_EV_Draw': 1,
        'Correct_EV_Away_Win': 2
    }
    bet_type = row['Best_Bet_Type']

    if bet_type and bet_type_to_result_mapping[bet_type] == row['match_result']:
        # Bet won
        return ((row['Best_Bet_Odds']) * stake) - stake
    elif bet_type:
        # Bet lost
        return -stake
    else:
        # No bet placed
        return 0

# Calculate the outcome of the best bet for each match
EV_calc_df['Best_Bet_Outcome'] = EV_calc_df.apply(calculate_best_bet_outcome, axis=1)

# Calculate the total profit/loss
total_profit_loss_best_bet = EV_calc_df['Best_Bet_Outcome'].sum()

total_profit_loss_best_bet, EV_calc_df[['Best_Bet_Type', 'Best_Bet_Odds', 'Best_Bet_Outcome']]


(-363.5999999999999,
            Best_Bet_Type  Best_Bet_Odds  Best_Bet_Outcome
 0    Correct_EV_Away_Win           7.25             -10.0
 1    Correct_EV_Home_Win          11.75             -10.0
 2    Correct_EV_Away_Win          12.25             112.5
 3    Correct_EV_Away_Win           5.51              45.1
 4    Correct_EV_Away_Win           5.42             -10.0
 ..                   ...            ...               ...
 375  Correct_EV_Home_Win           4.85              38.5
 376  Correct_EV_Home_Win           6.32             -10.0
 377  Correct_EV_Away_Win           3.82              28.2
 378  Correct_EV_Away_Win           8.15             -10.0
 379  Correct_EV_Away_Win           3.28             -10.0
 
 [380 rows x 3 columns])

In [42]:
EV_calc_df.head()

Unnamed: 0,Pinnacle Closing Home Win Odds,Pinnacle Closing Draw Odds,Pinnacle Closing Away Win Odds,home_team_elo,away_team_elo,match_result,Probability_Home_win,Probability_Draw,Probability_Away_win,home_team,away_team,Correct_EV_Home_Win,Correct_EV_Draw,Correct_EV_Away_Win,Highest_EV_Bet,Highest_EV_Value,Best_Bet_Odds,Best_Bet_Type,Best_Bet_Outcome
0,1.49,4.73,7.25,1848.286499,1716.994873,0,0.276477,0.259282,0.46424,arsenal,leicester,-5.880488,2.264049,23.657433,Correct_EV_Away_Win,23.657433,7.25,Correct_EV_Away_Win,-10.0
1,11.75,6.15,1.29,1583.799805,1866.807007,2,0.226818,0.236149,0.537033,brighton,manchester city,16.651097,4.523188,-3.072278,Correct_EV_Home_Win,16.651097,11.75,Correct_EV_Home_Win,-10.0
2,1.33,5.4,12.25,1909.399658,1628.988403,2,0.20633,0.245506,0.548164,chelsea,burnley,-7.255814,3.25734,57.150082,Correct_EV_Away_Win,57.150082,12.25,Correct_EV_Away_Win,112.5
3,1.79,3.56,5.51,1642.862427,1475.799316,2,0.110661,0.195446,0.693893,crystal palace,huddersfield,-8.019174,-3.042109,28.233502,Correct_EV_Away_Win,28.233502,5.51,Correct_EV_Away_Win,45.1
4,1.82,3.49,5.42,1751.501343,1662.61377,0,0.392357,0.274732,0.33291,everton,stoke,-2.859097,-0.411842,8.043743,Correct_EV_Away_Win,8.043743,5.42,Correct_EV_Away_Win,-10.0


#### Betting on All positive EV outcomes

Using Updated EV

In [43]:
# Adjusting the function to bet on all positive EV bets

# Define a function to calculate the outcome of positive EV bets
def calculate_positive_ev_outcomes(row):
    outcomes = {
        'Correct_EV_Home_Win': (0, row['Correct_EV_Home_Win'], row['Pinnacle Closing Home Win Odds']),
        'Correct_EV_Draw': (1, row['Correct_EV_Draw'], row['Pinnacle Closing Draw Odds']),
        'Correct_EV_Away_Win': (2, row['Correct_EV_Away_Win'], row['Pinnacle Closing Away Win Odds'])
    }

    total_outcome = 0
    for bet_type, (result_code, ev, odds) in outcomes.items():
        if ev > 0:
            if result_code == row['match_result']:
                # Bet won
                total_outcome += (odds * stake) - stake
            else:
                # Bet lost
                total_outcome -= stake

    return total_outcome

# Apply the function to calculate outcomes for all positive EV bets
EV_calc_df['Positive_EV_Outcomes'] = EV_calc_df.apply(calculate_positive_ev_outcomes, axis=1)

# Calculate the total profit/loss for all positive EV bets
total_profit_loss_positive_ev = EV_calc_df['Positive_EV_Outcomes'].sum()

total_profit_loss_positive_ev

-392.0

In [44]:
EV_calc_df

Unnamed: 0,Pinnacle Closing Home Win Odds,Pinnacle Closing Draw Odds,Pinnacle Closing Away Win Odds,home_team_elo,away_team_elo,match_result,Probability_Home_win,Probability_Draw,Probability_Away_win,home_team,away_team,Correct_EV_Home_Win,Correct_EV_Draw,Correct_EV_Away_Win,Highest_EV_Bet,Highest_EV_Value,Best_Bet_Odds,Best_Bet_Type,Best_Bet_Outcome,Positive_EV_Outcomes
0,1.49,4.73,7.25,1848.286499,1716.994873,0,0.276477,0.259282,0.464240,arsenal,leicester,-5.880488,2.264049,23.657433,Correct_EV_Away_Win,23.657433,7.25,Correct_EV_Away_Win,-10.0,-20.0
1,11.75,6.15,1.29,1583.799805,1866.807007,2,0.226818,0.236149,0.537033,brighton,manchester city,16.651097,4.523188,-3.072278,Correct_EV_Home_Win,16.651097,11.75,Correct_EV_Home_Win,-10.0,-20.0
2,1.33,5.40,12.25,1909.399658,1628.988403,2,0.206330,0.245506,0.548164,chelsea,burnley,-7.255814,3.257340,57.150082,Correct_EV_Away_Win,57.150082,12.25,Correct_EV_Away_Win,112.5,102.5
3,1.79,3.56,5.51,1642.862427,1475.799316,2,0.110661,0.195446,0.693893,crystal palace,huddersfield,-8.019174,-3.042109,28.233502,Correct_EV_Away_Win,28.233502,5.51,Correct_EV_Away_Win,45.1,45.1
4,1.82,3.49,5.42,1751.501343,1662.613770,0,0.392357,0.274732,0.332910,everton,stoke,-2.859097,-0.411842,8.043743,Correct_EV_Away_Win,8.043743,5.42,Correct_EV_Away_Win,-10.0,-10.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,4.85,3.72,1.80,1651.556885,1858.670288,0,0.516922,0.221536,0.261542,newcastle utd,chelsea,15.070697,-1.758846,-5.292244,Correct_EV_Home_Win,15.070697,4.85,Correct_EV_Home_Win,38.5,38.5
376,6.32,4.78,1.51,1657.315552,1973.498779,2,0.319765,0.209411,0.470824,southampton,manchester city,10.209148,0.009864,-2.890563,Correct_EV_Home_Win,10.209148,6.32,Correct_EV_Home_Win,-10.0,-20.0
377,2.08,3.56,3.82,1630.464355,1620.356567,2,0.370552,0.228762,0.400686,swansea,stoke,-2.292510,-1.856086,5.306204,Correct_EV_Away_Win,5.306204,3.82,Correct_EV_Away_Win,28.2,28.2
378,1.38,5.50,8.15,1913.709106,1700.989502,0,0.680317,0.173513,0.146170,tottenham,leicester,-0.611629,-0.456782,1.912875,Correct_EV_Away_Win,1.912875,8.15,Correct_EV_Away_Win,-10.0,-10.0


#### Calculating the ROI for all EV bets strategy

In [45]:
# Counting the number of bets placed based on positive EV in any of the EV columns

# Counting positive EV bets in each column
num_bets_home_win = EV_calc_df[EV_calc_df['Correct_EV_Home_Win'] > 0].shape[0]
num_bets_draw = EV_calc_df[EV_calc_df['Correct_EV_Draw'] > 0].shape[0]
num_bets_away_win = EV_calc_df[EV_calc_df['Correct_EV_Away_Win'] > 0].shape[0]

# Total number of positive EV bets
total_positive_ev_bets = num_bets_home_win + num_bets_draw + num_bets_away_win

# Recalculate Total Investment
total_investment_corrected = total_positive_ev_bets * stake

# Recalculate ROI with the corrected total investment
roi_corrected = (total_profit_loss_positive_ev / total_investment_corrected) * 100

roi_corrected, total_positive_ev_bets, total_investment_corrected


(-7.495219885277246, 523, 5230)

#### Calculating the ROI for best EV bets strategy

In [46]:
# Calculating ROI for the strategy of betting on the best EV outcome per match

# Count the number of matches where a bet was placed (i.e., Best_Bet_Type is not None)
num_best_ev_bets = EV_calc_df[EV_calc_df['Best_Bet_Type'].notna()].shape[0]

# Total investment for best EV bet per match
total_investment_best_ev = num_best_ev_bets * stake

# Total profit from the best EV bet per match strategy
total_profit_best_ev = EV_calc_df['Best_Bet_Outcome'].sum()

# Calculate ROI for best EV bet per match strategy
roi_best_ev = (total_profit_best_ev / total_investment_best_ev) * 100

roi_best_ev, num_best_ev_bets, total_investment_best_ev


(-9.593667546174139, 379, 3790)

As we can see from our ROI results, betting the most positive EV bet for each game using pinnacle's odd is very effective, giving us a 43.78% return on our investment of 4560$, over the course of 456 matches.

#### Sanity checking

Here are going to check how good Pinnacle is at predicting the winner of soccer matches across the test set. For reference, our model had an accuracy of .528.

In [47]:
def determine_implied_result(row):
    if row['implied home win prob'] > max(row['implied draw prob'], row['implied away win prob']):
        return 0
    elif row['implied draw prob'] > max(row['implied home win prob'], row['implied away win prob']):
        return 1
    else:
        return 2

EV_calc_df['implied_match_result'] = EV_calc_df.apply(determine_implied_result, axis=1)


KeyError: 'implied home win prob'

In [None]:
matching_results_count = (EV_calc_df['implied_match_result'] == EV_calc_df['match_result']).sum()

matching_results_count 

In [None]:
matching_results_count/(len(EV_calc_df))

As we can see here if we takes Pinnacle's favorite, according to their odds, as they pick for the match winner, they have an accuracy of only 47.15%. This means that our betting strategies are not out of whack and we do have an advantage over the bookie.

In [None]:
# Counting positive vs. negative outcomes in the 'Best_Bet_Outcome' column
positive_outcomes = (EV_calc_df['Best_Bet_Outcome'] > 0).sum()
negative_outcomes = (EV_calc_df['Best_Bet_Outcome'] < 0).sum()

positive_outcomes, negative_outcomes


In [None]:
# Counting positive vs. negative outcomes in the 'Best_Bet_Outcome' column
positive_outcomes = (EV_calc_df['Positive_EV_Outcomes'] > 0).sum()
negative_outcomes = (EV_calc_df['Positive_EV_Outcomes'] < 0).sum()

positive_outcomes, negative_outcomes


In [None]:
csv_file_path = '/Users/lkimball/Desktop/Flatiron/CapstoneProject/EV_calc_df.csv'
EV_calc_df.to_csv(csv_file_path, index=True)

In [None]:
# Since we're using the index to represent time, we'll calculate the cumulative sum of the 'Best_Bet_Outcome' column
EV_calc_df['Cumulative_Best_Bet_Outcome'] = EV_calc_df['Best_Bet_Outcome'].cumsum()

# Importing matplotlib for plotting
import matplotlib.pyplot as plt

# Plotting the data
plt.figure(figsize=(12, 6))
plt.plot(EV_calc_df.index, EV_calc_df['Cumulative_Best_Bet_Outcome'], marker='o', linestyle='-')
plt.title('Profit of Best Bet Outcome')
plt.xlabel('Match #')
plt.ylabel('Cumulative Sum of Best Bet Outcome')
plt.grid(True)
plt.show()


In [None]:
#Since we're using the index to represent time, we'll calculate the cumulative sum of the 'Best_Bet_Outcome' column
EV_calc_df['Cumulative_Positive_EV_Outcomes'] = EV_calc_df['Positive_EV_Outcomes'].cumsum()

# Importing matplotlib for plotting
import matplotlib.pyplot as plt

# Plotting the data
plt.figure(figsize=(12, 6))
plt.plot(EV_calc_df.index, EV_calc_df['Cumulative_Positive_EV_Outcomes'], marker='o', linestyle='-')
plt.title('Profit of +EV Outcome')
plt.xlabel('Match #')
plt.ylabel('Cumulative Sum of +EV Outcomes')
plt.grid(True)
plt.show()


In [None]:
EV_calc_df.columns