In [None]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cs231n/assignments/assignment3/'
FOLDERNAME = 'CS229/Project/'
assert FOLDERNAME is not None, "[!] Enter the foldername."

# now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))
%cd drive/My\ Drive/$FOLDERNAME

Mounted at /content/drive
/content/drive/My Drive/CS229/Project


In [None]:
import numpy as np
import pandas as pd

from util import load_dataset
from util import place_bets
from util import evaluate_bets

# for auto-reloading external modules
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

# Setting pandas options
pd.set_option('display.max_rows', 300)
pd.set_option('display.max_columns', None)

In [None]:
# In contrast to custom_loss implemented in custom_loss.py, this function returns a vector with the loss for each individual bet, as opposed to a sum total

def custom_loss(true_label, predict_label, odds_home, odds_away):
    """ Computes total betting loss given a series of bets to be made and odds at each time point

    Args:
        true_label: outcomes of the match, 0 is for home team loses, 1 is for home team wins, size (n,)
        predict_label: predictions of the model, size (n,)
        odds_home: odds that bookies pay for a win at home, size (n,)
        odds_away: odds that bookies pay for a win away, size (n,)

    Returns:
        loss: Total betting loss, scalar
    """
    loss_vec = true_label * (predict_label * (odds_home - 1) + predict_label - 1) + (1-true_label) * ((1-predict_label)*(odds_away-1) - predict_label)
    return loss_vec

# New Section

In [None]:
df = pd.read_csv('Load_Data/Moneyline_alldata.csv', index_col=0)

In [None]:
# Training
x_train = load_dataset("Load_Data/x_train.csv", intercept=True)
y_train = load_dataset("Load_Data/y_train.csv").to_numpy().flatten()

# Validation
x_val = load_dataset("Load_Data/x_val.csv", intercept=True)
y_val = load_dataset("Load_Data/y_val.csv").to_numpy().flatten()

# Test
x_test = load_dataset("Load_Data/x_test.csv", intercept=True)
y_test = load_dataset("Load_Data/y_test.csv").to_numpy().flatten()

In [None]:
x_train

Unnamed: 0,Intercept,Time_left,Spread,Odds_Home,Odds_Away,Initial_odds_home,Initial_odds_away,Q_2.0,Q_3.0,Q_4.0,Team_Home_BKN Nets,Team_Home_Boston Celtics,Team_Home_CHA Hornets,Team_Home_Chicago Bulls,Team_Home_Cleveland Cavaliers,Team_Home_DAL Mavericks,Team_Home_Denver Nuggets,Team_Home_Detroit Pistons,Team_Home_Golden State Warriors,Team_Home_Houston Rockets,Team_Home_Indiana Pacers,Team_Home_LA Lakers,Team_Home_Los Angeles Clippers,Team_Home_MIA Heat,Team_Home_Memphis Grizzlies,Team_Home_Milwaukee Bucks,Team_Home_Minnesota Timberwolves,Team_Home_New Orleans Pelicans,Team_Home_New York Knicks,Team_Home_Oklahoma City Thunder,Team_Home_Orlando Magic,Team_Home_Philadelphia 76ers,Team_Home_Phoenix Suns,Team_Home_Portland Trail Blazers,Team_Home_Sacramento Kings,Team_Home_San Antonio Spurs,Team_Home_Toronto Raptors,Team_Home_Utah Jazz,Team_Home_Washington Wizards,Team_Away_BKN Nets,Team_Away_Boston Celtics,Team_Away_CHA Hornets,Team_Away_Chicago Bulls,Team_Away_Cleveland Cavaliers,Team_Away_DAL Mavericks,Team_Away_Denver Nuggets,Team_Away_Detroit Pistons,Team_Away_Golden State Warriors,Team_Away_Houston Rockets,Team_Away_Indiana Pacers,Team_Away_LA Lakers,Team_Away_Los Angeles Clippers,Team_Away_MIA Heat,Team_Away_Memphis Grizzlies,Team_Away_Milwaukee Bucks,Team_Away_Minnesota Timberwolves,Team_Away_New Orleans Pelicans,Team_Away_New York Knicks,Team_Away_Oklahoma City Thunder,Team_Away_Orlando Magic,Team_Away_Philadelphia 76ers,Team_Away_Phoenix Suns,Team_Away_Portland Trail Blazers,Team_Away_Sacramento Kings,Team_Away_San Antonio Spurs,Team_Away_Toronto Raptors,Team_Away_Utah Jazz,Team_Away_Washington Wizards
0,1,48.000000,0,1.95,1.860,1.95,1.86,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,1,48.000000,0,2.30,1.660,1.95,1.86,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,1,48.000000,0,2.25,1.680,1.95,1.86,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,1,48.000000,0,2.30,1.660,1.95,1.86,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,1,48.000000,0,2.35,1.640,1.95,1.86,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299741,1,0.716667,-7,17.50,1.012,4.20,1.25,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
299742,1,0.716667,-7,15.50,1.028,4.20,1.25,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
299743,1,0.716667,-7,17.50,1.012,4.20,1.25,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0
299744,1,0.550000,-6,19.00,1.006,4.20,1.25,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0


## Baseline - Betting on the home team

In [None]:
# Question 1: In what percentage does the home team win?
winners = df.groupby(['ID']).max()['Winner']
wins = winners.sum()
loss = len(winners) - wins
    
print(f"Number of wins from home team: {wins}, {100 * wins / (wins + loss):.2f}%")
print(f"Number of wins from away team: {loss}, {100 * loss / (wins + loss):.2f}%")

Number of wins from home team: 579, 55.41%
Number of wins from away team: 466, 44.59%


In [None]:
# Question 2: How much do we make if we always bet on the home team?

# On training set
y_pred_tr = np.ones((x_train.shape[0]))
profit_bs1_tr = custom_loss(y_train, y_pred_tr, x_train['Odds_Home'], x_train['Odds_Away'])

# On validation set
y_pred = np.ones((x_val.shape[0]))
profit_bs1 = custom_loss(y_val, y_pred, x_val['Odds_Home'], x_val['Odds_Away'])

# Printing output
print(f"In the training set, we make profit of {profit_bs1_tr.sum():.2f} over {len(y_pred_tr)} bets. Yielding profit of {100* profit_bs1_tr.sum() / len(y_pred_tr):.4f}%")
print(f"In the validation set, we make profit of {profit_bs1.sum():.2f} over {len(y_pred)} bets. Yielding profit of {100 * profit_bs1.sum() / len(y_pred):.4f}%")

In the training set, we make profit of -12181.34 over 211591 bets. Yielding profit of -5.7570%
In the validation set, we make profit of -2927.90 over 46749 bets. Yielding profit of -6.2630%


## Baseline - Betting on the favorite at beginning of each match

In [None]:
# Question 1: How often does the favorite win the match?

# On training data
fav_tr = np.where((x_train.Initial_odds_home < x_train.Initial_odds_away), 1, 0)
score_fav_tr = np.mean(fav_tr == y_train)

# fav is 1 when payout is lower for home team (higher probability of winning) and 0 elsewise
fav_val = np.where((x_val.Initial_odds_home < x_val.Initial_odds_away), 1, 0)
score_fav_val = np.mean(fav == y_val)

# Printing results
print(f"Always betting on the favorite yields {100*score_fav_tr:.2f}% accuracy on training data")
print(f"Always betting on the favorite yields {100*score_fav_val:.2f}% accuracy on validation data")

Always betting on the favorite yields 63.84% accuracy on training data
Always betting on the favorite yields 62.30% accuracy on validation data


In [None]:
# Question 2: How much do we make if we always bet on the favorite?

# On training set
profit_bs2_tr = custom_loss(y_train, fav_tr, x_train['Odds_Home'], x_train['Odds_Away'])

# On validation set
profit_bs2_val = custom_loss(y_val, fav_val, x_val['Odds_Home'], x_val['Odds_Away'])

# Output
print(f"In the training set, we make profit of {profit_bs2_tr.sum():.2f} over {len(fav_tr)} bets. Yielding profit of {100 * profit_bs2_tr.sum() / len(fav_tr):.4f}%")
print(f"In the validation set, we make profit of {profit_bs2_val.sum():.2f} over {len(fav_val)} bets. Yielding profit of {100*profit_bs2_val.sum() / len(fav_val):.4f}% ")

In the training set, we make profit of 2704.24 over 211591 bets. Yielding profit of 1.2780%
In the validation set, we make profit of -5423.83 over 46749 bets. Yielding profit of -11.6020% 


## Baseline - Betting on the favorite at each timestamp $t$

In [None]:
# Question 1: How often does the team that has the better odds win the match?

# On training set
adv_tr = np.where((x_train.Odds_Home < x_train.Odds_Away), 1, 0)
score_adv_tr = np.mean(adv_tr == y_train)

# On validation set
adv_val = np.where((x_val.Odds_Home < x_val.Odds_Away), 1, 0)
score_adv_val = np.mean(adv_val == y_val)

# Printing results
print(f"Betting on favorite at each timestep yields {100*score_adv_tr:.2f}% accuracy on training data")
print(f"Betting on favorite at each timestep yields {100*score_adv_val:.2f}% accuracy on validation data")

Betting on favorite at each timestep yields 73.61% accuracy on training data
Betting on favorite at each timestep yields 77.66% accuracy on validation data


In [None]:
# Question 2: How much do we make if we bet on the favorite at each timestep t?

# On training set
profit_bs3_tr = custom_loss(y_train, adv_tr, x_train['Odds_Home'], x_train['Odds_Away'])

# On validation set
profit_bs3_val = custom_loss(y_val, adv_val, x_val['Odds_Home'], x_val['Odds_Away'])

# Output
print(f"In the training set, we make profit of {profit_bs3_tr.sum():.2f} over {len(fav_tr)} bets. Yielding profit of {100*profit_bs3_tr.sum() / len(fav_tr):.4f}%")
print(f"In the validation set, we make profit of {profit_bs3_val.sum():.2f} over {len(fav_val)} bets. Yielding profit of {100*profit_bs3_val.sum() / len(fav_val):.4f}%")

In the training set, we make profit of -9924.12 over 211591 bets. Yielding profit of -4.6902%
In the validation set, we make profit of -469.42 over 46749 bets. Yielding profit of -1.0041%
