In [258]:
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import time
import matplotlib.pyplot as plt

In [259]:
year = 2010
filename = 'Rolling Average Stats/' + str(year) + ".csv"
df = pd.read_csv(filename)

In [260]:
#I dont actually use the betting lines in the model. I don't drop it here because I need it to calculate vegas probabilities
X = df.drop(['date', 'home_team', 'away_team', 'home_score', 'away_score', 'home_pitcher', 'away_pitcher'], 1)
y = df.home_score > df.away_score # 1 if home team wins, 0 otherwise
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

In [261]:
def calcVegasprobs(X_test):
    vegas = []
    homeline = X_test.homeLine
    for i in range(len(homeline)):
        if (homeline.iloc[i] < 0): #home team is the favorite
            vegas.append(homeline.iloc[i] / (homeline.iloc[i] - 100))
        else:
            vegas.append(100 / (100 + homeline.iloc[i]))
    return vegas

In [262]:
vegas = calcVegasprobs(X_test)

In [263]:
X_test_temp = X_test.drop(['homeLine', 'awayLine'], 1) #this is what I will use as X_test
X_train_temp = X_train.drop(['homeLine', 'awayLine'], 1)
params = {'gamma': 10,
 'learning_rate': 0.01,
 'max_depth': 6,
 'min_child_weight': 5,
 'n_estimators': 200,
 'subsample': 0.5}

num_models = 10
us = np.zeros(len(y_test))
seeds = [1, 3, 5, 7, 11, 13, 17, 19, 23, 47]
for seed in seeds:
    clfi =  XGBClassifier(**params, silent=False, seed=seed)
    clfi.fit(X_train_temp, y_train.values.ravel())
    clfi_pred = clfi.predict_proba(X_test_temp) [:,1]
    us = us + (1/num_models)*clfi_pred


In [264]:
games_to_bet = []
threshold = 0.20
for i in range(len(us)):
    if us[i] >= (vegas[i] + threshold):
        games_to_bet.append((i, 1))
    elif us[i] <= (vegas[i] - threshold):
        games_to_bet.append((i, 0))


In [265]:
gains = 0
for game, team_to_bet in games_to_bet:
    current_game = X_test.iloc[game]
    homeWin = y_test.iloc[game]
    homeLine = current_game['homeLine']
    awayLine = current_game['awayLine']
    #bet on home team
    if team_to_bet:
        if homeWin:
            if homeLine < 0:
                gains += (100*100)/homeLine
            else:
                gains += homeLine
        else:
            gains -= 100
    #bet on away team
    else:
        if not homeWin:
            if awayLine < 0:
                gains += (100*100)/awayLine
            else:
                gains += awayLine
        else:
            gains -= 100
            
gains

-100