In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, roc_auc_score
from sklearn import preprocessing
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV

# Pre Processing

In [2]:
def extract_x(file):
    
    df = pd.read_csv(file)
    
    if file == 'epl.csv':
        x = df.drop(['fixture_date', 'fixture_id', 'Home Team', 'Away Team', 'Full Time Result',
                    'Result No Draw', 'A Win', 'B Win'], axis=1)
        x = pd.get_dummies(x, columns=["Promoted"], prefix="Promoted")

    elif file == 'nba.csv':
        x = df.drop([ 'Home Win', 'MATCH_DATE', 'HOME_TEAM', 'AWAY_TEAM', 
                                        'A Win', 'B Win'], axis=1)
        
    else:
        x = df.drop(['FIGHTER_A', 'FIGHTER_B', 'Result', 'A Win', 'B Win', 'A_CUMULATIVE_RDS', 'B_CUMULATIVE_RDS'], axis=1)
    
    return x

In [3]:
def extract_y(file):
    
    df = pd.read_csv(file)
    
    if file == 'epl.csv':
        y = df['Result No Draw']
    elif file == 'nba.csv':
        y = df['Home Win']
    else:
        y = df['Result']
    
    return y

In [4]:
def extract_odds(file):
    
    df = pd.read_csv(file)
    odds = df[['A Win', 'B Win']]
    
    return odds

In [39]:
epl_x = extract_x('epl.csv')
epl_y = extract_y('epl.csv')
epl_odds = extract_odds('epl.csv')

nba_x = extract_x('nba.csv')
nba_y = extract_y('nba.csv')
nba_odds = extract_odds('nba.csv')

ufc_x = extract_x('ufc.csv')
ufc_y = extract_y('ufc.csv')
ufc_odds = extract_odds('ufc.csv')

# Static Variables

In [6]:
scoring = {'accuracy': make_scorer(accuracy_score),
           'precision': make_scorer(precision_score),
           'recall': make_scorer(recall_score),
           'f1_score': make_scorer(f1_score),
           'auc': make_scorer(roc_auc_score)}

In [7]:
logreg = LogisticRegression(max_iter=10000)
knn = KNeighborsClassifier(n_neighbors=5)
forest = RandomForestClassifier()
svm = CalibratedClassifierCV(LinearSVC(dual=False))

# Cross Validation

In [8]:
def cross_validation_results(model):
    
    epl = cross_validate(model, epl_x, epl_y, cv=5, scoring=scoring)
    nba = cross_validate(model, nba_x, nba_y, cv=5, scoring=scoring)
    ufc = cross_validate(model, ufc_x, ufc_y, cv=5, scoring=scoring)
    
    epl_results = pd.DataFrame(epl)
    nba_results = pd.DataFrame(nba)
    ufc_results = pd.DataFrame(ufc)
    
    epl_results = epl_results.describe()
    nba_results = nba_results.describe()
    ufc_results = ufc_results.describe()
    
    combined = epl_results.join(nba_results, lsuffix='_epl', rsuffix='_nba').join(ufc_results)
    
    return combined

### Logistic Regression

In [9]:
cross_validation_results(logreg)

Unnamed: 0,fit_time_epl,score_time_epl,test_accuracy_epl,test_precision_epl,test_recall_epl,test_f1_score_epl,test_auc_epl,fit_time_nba,score_time_nba,test_accuracy_nba,...,test_recall_nba,test_f1_score_nba,test_auc_nba,fit_time,score_time,test_accuracy,test_precision,test_recall,test_f1_score,test_auc
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,0.270543,0.004486,0.65885,0.644656,0.526279,0.579295,0.646035,0.194826,0.005245,0.643054,...,0.767295,0.707816,0.623475,0.210914,0.004976,0.60046,0.599902,0.615472,0.607371,0.600384
std,0.013179,0.000155,0.005045,0.009644,0.016055,0.008556,0.004948,0.021207,5e-05,0.019882,...,0.083504,0.033705,0.015938,0.039762,0.000134,0.014009,0.016312,0.019699,0.012713,0.014035
min,0.254757,0.004341,0.652655,0.634731,0.5,0.567416,0.640376,0.16025,0.005166,0.621943,...,0.635623,0.65625,0.604902,0.155321,0.004863,0.585731,0.584071,0.58945,0.594595,0.585662
25%,0.261437,0.004381,0.654867,0.635294,0.524752,0.574526,0.643327,0.193123,0.005229,0.630535,...,0.732247,0.69235,0.614594,0.188703,0.004875,0.589183,0.586813,0.605505,0.599327,0.589103
50%,0.273432,0.004402,0.659292,0.64497,0.529703,0.580645,0.644,0.197658,0.005259,0.635403,...,0.818605,0.72093,0.619799,0.217409,0.004896,0.598389,0.594017,0.612385,0.602579,0.59817
75%,0.274055,0.004621,0.663717,0.652439,0.534653,0.584699,0.650851,0.207476,0.005273,0.65786,...,0.82093,0.731606,0.632178,0.23775,0.00512,0.609896,0.616307,0.633867,0.614365,0.609967
max,0.289036,0.004684,0.663717,0.655844,0.542289,0.589189,0.651622,0.215622,0.005296,0.669531,...,0.82907,0.737945,0.645903,0.255388,0.005124,0.619102,0.618304,0.636156,0.625989,0.619017


### K Nearest Neighbours

In [10]:
cross_validation_results(knn)

Unnamed: 0,fit_time_epl,score_time_epl,test_accuracy_epl,test_precision_epl,test_recall_epl,test_f1_score_epl,test_auc_epl,fit_time_nba,score_time_nba,test_accuracy_nba,...,test_recall_nba,test_f1_score_nba,test_auc_nba,fit_time,score_time,test_accuracy,test_precision,test_recall,test_f1_score,test_auc
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,0.001996,0.028435,0.600885,0.557444,0.507394,0.530911,0.59182,0.002168,0.184568,0.592042,...,0.676096,0.65195,0.578795,0.002164,0.075521,0.551899,0.551815,0.571939,0.561543,0.551805
std,0.00039,0.003062,0.029912,0.034503,0.053635,0.043789,0.031912,9.7e-05,0.021548,0.023216,...,0.063819,0.034086,0.018552,0.000316,0.007628,0.014317,0.013506,0.029179,0.019791,0.014271
min,0.001648,0.026006,0.55531,0.502762,0.450495,0.475196,0.545248,0.002023,0.1679,0.564442,...,0.592549,0.611779,0.555475,0.001839,0.07116,0.530495,0.531963,0.534404,0.533181,0.530481
25%,0.001704,0.026351,0.59292,0.549708,0.467662,0.505376,0.580444,0.002146,0.167987,0.573034,...,0.621653,0.618413,0.569975,0.001908,0.071438,0.548907,0.550691,0.548165,0.549425,0.548909
50%,0.00185,0.026662,0.603982,0.564246,0.5,0.530184,0.594,0.002171,0.175133,0.595112,...,0.722093,0.669542,0.575114,0.002185,0.072059,0.552359,0.551724,0.585812,0.568257,0.552165
75%,0.002197,0.030106,0.617257,0.57754,0.534653,0.55527,0.609327,0.002217,0.193573,0.607662,...,0.722093,0.676471,0.58964,0.002254,0.073919,0.558113,0.554852,0.588101,0.578022,0.557957
max,0.00258,0.033052,0.634956,0.592965,0.584158,0.588529,0.630079,0.002285,0.218249,0.61996,...,0.722093,0.683544,0.603772,0.002634,0.089031,0.56962,0.569845,0.603211,0.578829,0.569513


### Random Forest Classifier

In [11]:
cross_validation_results(forest)

Unnamed: 0,fit_time_epl,score_time_epl,test_accuracy_epl,test_precision_epl,test_recall_epl,test_f1_score_epl,test_auc_epl,fit_time_nba,score_time_nba,test_accuracy_nba,...,test_recall_nba,test_f1_score_nba,test_auc_nba,fit_time,score_time,test_accuracy,test_precision,test_recall,test_f1_score,test_auc
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,0.309956,0.018535,0.632301,0.608312,0.493508,0.544462,0.618856,1.205084,0.03287,0.633011,...,0.744035,0.696024,0.615518,0.728585,0.023337,0.603682,0.605978,0.603568,0.604696,0.603674
std,0.010078,0.001227,0.018404,0.023261,0.043297,0.033443,0.020506,0.009469,0.000338,0.024311,...,0.069944,0.034053,0.019604,0.006796,0.000254,0.016488,0.017677,0.016303,0.015154,0.016504
min,0.299113,0.017184,0.610619,0.581699,0.442786,0.502825,0.593903,1.19111,0.032586,0.600132,...,0.63213,0.642224,0.595117,0.720743,0.022895,0.583429,0.585034,0.590389,0.587699,0.583389
25%,0.303537,0.017903,0.615044,0.588608,0.460396,0.516667,0.600198,1.202788,0.032593,0.626156,...,0.718277,0.686318,0.600791,0.724081,0.023383,0.592635,0.594037,0.594037,0.594037,0.59263
50%,0.306522,0.017989,0.641593,0.60989,0.49505,0.552486,0.627525,1.204106,0.032838,0.627231,...,0.786047,0.705208,0.612961,0.728213,0.023392,0.601841,0.603687,0.600917,0.602299,0.601844
75%,0.317021,0.019409,0.641593,0.625,0.519802,0.572207,0.632752,1.211653,0.032921,0.646631,...,0.787209,0.716481,0.624675,0.731634,0.023502,0.619102,0.618834,0.600917,0.614302,0.61903
max,0.323586,0.020189,0.652655,0.636364,0.549505,0.578125,0.639901,1.21576,0.033415,0.664904,...,0.796512,0.729888,0.644044,0.738255,0.023512,0.621404,0.628297,0.631579,0.625142,0.621475


### Support Vector Machine

In [12]:
cross_validation_results(svm)

Unnamed: 0,fit_time_epl,score_time_epl,test_accuracy_epl,test_precision_epl,test_recall_epl,test_f1_score_epl,test_auc_epl,fit_time_nba,score_time_nba,test_accuracy_nba,...,test_recall_nba,test_f1_score_nba,test_auc_nba,fit_time,score_time,test_accuracy,test_precision,test_recall,test_f1_score,test_auc
count,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,...,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0
mean,0.07015,0.008271,0.661504,0.650867,0.522314,0.579375,0.648053,0.180939,0.009665,0.640411,...,0.771948,0.707519,0.619683,0.200202,0.009637,0.602532,0.602689,0.613181,0.607703,0.602471
std,0.005672,0.000462,0.003832,0.009702,0.014051,0.006737,0.003614,0.039376,0.000124,0.022498,...,0.084068,0.035408,0.017507,0.009472,0.000113,0.011085,0.013391,0.018742,0.010894,0.011104
min,0.064669,0.007809,0.654867,0.640244,0.5,0.570621,0.641901,0.135272,0.009456,0.614673,...,0.640279,0.653595,0.600315,0.189344,0.009466,0.589183,0.588764,0.594037,0.594779,0.589142
25%,0.06645,0.007912,0.661504,0.642857,0.519802,0.57377,0.648,0.162557,0.009671,0.627892,...,0.734575,0.691507,0.61066,0.19195,0.0096,0.593786,0.593258,0.600917,0.599319,0.593745
50%,0.067351,0.008246,0.663717,0.652439,0.524752,0.582418,0.649135,0.180648,0.00968,0.63144,...,0.825581,0.71875,0.611171,0.200754,0.009656,0.602992,0.598712,0.605505,0.607268,0.602787
75%,0.074498,0.008419,0.663717,0.654321,0.529703,0.584699,0.650376,0.183878,0.009752,0.661823,...,0.82907,0.736082,0.6353,0.20905,0.009694,0.612198,0.611607,0.627002,0.61794,0.612112
max,0.07778,0.008969,0.663717,0.664474,0.537313,0.585366,0.650851,0.242338,0.009767,0.666226,...,0.830233,0.737662,0.640968,0.209911,0.009767,0.614499,0.621103,0.638444,0.619209,0.61457


# Betting Odds Analysis

In [13]:
def run_model(model, sport):
    
    if sport == 'epl':
        x_train, x_test, y_train, y_test = train_test_split(epl_x, epl_y, test_size = 0.2, random_state=42)
    elif sport == 'nba':
        x_train, x_test, y_train, y_test = train_test_split(nba_x, nba_y, test_size = 0.2, random_state=42)
    elif sport == 'ufc':
        x_train, x_test, y_train, y_test = train_test_split(ufc_x, ufc_y, test_size = 0.2, random_state=42)
    else:
        pass
    
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    y_pred_prob = model.predict_proba(x_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)
    
    prediction_values_a = []
    prediction_values_b = []
    
    for i in y_pred_prob:
        prediction_values_a.append(i[0])
        prediction_values_b.append(i[1])

    return accuracy, precision, recall, f1, auc, y_pred, y_test, prediction_values_a, prediction_values_b

In [14]:
def show_stats(results):
    
    print("Accuracy:", results[0])
    print("Precision:", results[1])
    print("Recall", results[2])
    print("F1 Score:", results[3])
    print("AUC:", results[4])

In [15]:
def calc_basic_returns(row):
    
    if row['predicted'] == row['actual']:
        if row['predicted'] == 1:
            return (100 * row['A Win'])
        else:
            return (100 * row['B Win'])
    else:
        return 0

In [48]:
def calc_returns_70(row):
    
    if row['A Win Prob'] > 0.70:
        if row['predicted'] == row['actual']:
            return (100 * row['A Win'])
        else:
            return 0
    elif row['B Win Prob'] > 0.70:
        if row['predicted'] == row['actual']:
            return (100 * row['B Win'])
        else:
            return 0
    else:
        return np.nan

In [17]:
def underdog_only(row):
    
    if row['predicted'] == 1:
        if row['A Win'] > row['B Win']:
            if row['predicted'] == row['actual']:
                return (100 * row['A Win'])
            else:
                return 0
        else:
            return np.nan
    else:
        if row['B Win'] > row['A Win']:
            if row['predicted'] == row['actual']:
                return (100 * row['B Win'])
            else:
                return 0
        else:
            return np.nan

In [40]:
def predictions_dataframe(results, odds):
    
    df = pd.DataFrame(data={'predicted': results[5], 'actual': results[6], 'A Win Prob': results[8], 'B Win Prob': results[7]})
    
    df = df.join(odds)
    
    df['returns'] = df.apply (lambda row: calc_basic_returns(row), axis=1)
    df['returns_70pct'] = df.apply (lambda row: calc_returns_70(row), axis=1)
    df['returns_dogs'] = df.apply (lambda row: underdog_only(row), axis=1)
    
    
    return df

In [19]:
def betting_summary(df):
    
    base_returns = df['returns'].sum()
    total_bet_base = len(df.index) * 100
    profit_base = base_returns - total_bet_base
    
    returns_70 = df['returns_70pct'].sum()
    total_bets_70 = (len(df.index) - df['returns_70pct'].isnull().sum()) * 100
    profit_70 = returns_70 - total_bets_70
    
    
    returns_dogs = df['returns_dogs'].sum()
    total_bets_dogs = (len(df.index) - df['returns_dogs'].isnull().sum()) * 100
    profit_dogs = returns_dogs - total_bets_dogs
    
    print('Basic Strategy - Returns:', base_returns, 'Total Bet:', total_bet_base, 'ROI%:', (profit_base / total_bet_base) * 100) 
    print('70pct Strategy - Returns:', returns_70, 'Total Bet:', total_bets_70, 'ROI%:', (profit_70 / total_bets_70) * 100)
    print('Dogs Strategy - Returns:', returns_dogs, 'Total Bet:', total_bets_dogs, 'ROI%', (profit_dogs / total_bets_dogs) * 100)

## EPL

### Logistic Regression

In [49]:
regression_epl = run_model(logreg, 'epl')

In [50]:
show_stats(regression_epl)

Accuracy: 0.668141592920354
Precision: 0.6962025316455697
Recall 0.5188679245283019
F1 Score: 0.5945945945945946
AUC: 0.6594339622641511


In [51]:
epl_predictions = predictions_dataframe(regression_epl, epl_odds)

In [52]:
epl_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
1574,1,1,0.720672,0.279328,1.46,2.80,146.0,146.0,
771,0,0,0.458596,0.541404,1.95,1.89,189.0,,
1190,0,1,0.498048,0.501952,1.82,2.04,0.0,,0.0
1128,1,1,0.775087,0.224913,1.31,3.52,131.0,131.0,
56,0,1,0.233461,0.766539,3.94,1.26,0.0,0.0,
...,...,...,...,...,...,...,...,...,...
1375,0,0,0.363351,0.636649,2.04,1.81,181.0,,
8,1,1,0.584450,0.415550,1.38,3.08,138.0,,
1512,0,1,0.319444,0.680556,2.19,1.70,0.0,,
2096,0,0,0.295981,0.704019,3.58,1.31,131.0,131.0,


In [53]:
betting_summary(epl_predictions)

Basic Strategy - Returns: 43285.0 Total Bet: 45200 ROI%: -4.236725663716815
70pct Strategy - Returns: 15735.0 Total Bet: 16300 ROI%: -3.4662576687116564
Dogs Strategy - Returns: 4445.0 Total Bet: 5600 ROI% -20.625


### K Nearest Neighbours

In [54]:
knn_epl = run_model(knn, 'epl')

In [55]:
show_stats(knn_epl)

Accuracy: 0.6128318584070797
Precision: 0.5989304812834224
Recall 0.5283018867924528
F1 Score: 0.5614035087719297
AUC: 0.6079009433962264


In [57]:
epl_predictions = predictions_dataframe(knn_epl, epl_odds)

In [58]:
epl_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
1574,1,1,0.6,0.4,1.46,2.80,146.0,,
771,0,0,0.4,0.6,1.95,1.89,189.0,,
1190,0,1,0.4,0.6,1.82,2.04,0.0,,0.0
1128,1,1,1.0,0.0,1.31,3.52,131.0,131.0,
56,0,1,0.4,0.6,3.94,1.26,0.0,,
...,...,...,...,...,...,...,...,...,...
1375,1,0,0.8,0.2,2.04,1.81,0.0,0.0,0.0
8,1,1,0.6,0.4,1.38,3.08,138.0,,
1512,1,1,0.6,0.4,2.19,1.70,219.0,,219.0
2096,0,0,0.2,0.8,3.58,1.31,131.0,131.0,


In [59]:
betting_summary(epl_predictions)

Basic Strategy - Returns: 41546.0 Total Bet: 45200 ROI%: -8.084070796460177
70pct Strategy - Returns: 21297.0 Total Bet: 22200 ROI%: -4.0675675675675675
Dogs Strategy - Returns: 8595.0 Total Bet: 11000 ROI% -21.863636363636363


### Random Forest

In [61]:
forest_epl = run_model(forest, 'epl')

In [62]:
show_stats(forest_epl)

Accuracy: 0.6305309734513275
Precision: 0.6573426573426573
Recall 0.44339622641509435
F1 Score: 0.5295774647887325
AUC: 0.6196147798742139


In [63]:
epl_predictions = predictions_dataframe(forest_epl, epl_odds)

In [64]:
epl_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
1574,1,1,0.68,0.32,1.46,2.80,146.0,,
771,0,0,0.46,0.54,1.95,1.89,189.0,,
1190,1,1,0.52,0.48,1.82,2.04,182.0,,
1128,1,1,0.68,0.32,1.31,3.52,131.0,,
56,0,1,0.30,0.70,3.94,1.26,0.0,,
...,...,...,...,...,...,...,...,...,...
1375,0,0,0.31,0.69,2.04,1.81,181.0,,
8,1,1,0.61,0.39,1.38,3.08,138.0,,
1512,0,1,0.35,0.65,2.19,1.70,0.0,,
2096,0,0,0.32,0.68,3.58,1.31,131.0,,


In [65]:
betting_summary(epl_predictions)

Basic Strategy - Returns: 41079.0 Total Bet: 45200 ROI%: -9.117256637168142
70pct Strategy - Returns: 14916.0 Total Bet: 15100 ROI%: -1.218543046357616
Dogs Strategy - Returns: 5452.0 Total Bet: 7800 ROI% -30.1025641025641


### Support Vector Machine

In [66]:
svm_epl = run_model(svm, 'epl')

In [67]:
show_stats(svm_epl)

Accuracy: 0.661504424778761
Precision: 0.6903225806451613
Recall 0.5047169811320755
F1 Score: 0.5831062670299729
AUC: 0.6523584905660378


In [68]:
epl_predictions = predictions_dataframe(svm_epl, epl_odds)

In [69]:
epl_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
1574,1,1,0.687509,0.312491,1.46,2.80,146.0,,
771,0,0,0.457610,0.542390,1.95,1.89,189.0,,
1190,0,1,0.490072,0.509928,1.82,2.04,0.0,,0.0
1128,1,1,0.751362,0.248638,1.31,3.52,131.0,131.0,
56,0,1,0.251034,0.748966,3.94,1.26,0.0,0.0,
...,...,...,...,...,...,...,...,...,...
1375,0,0,0.367310,0.632690,2.04,1.81,181.0,,
8,1,1,0.559630,0.440370,1.38,3.08,138.0,,
1512,0,1,0.329191,0.670809,2.19,1.70,0.0,,
2096,0,0,0.310390,0.689610,3.58,1.31,131.0,,


In [70]:
betting_summary(epl_predictions)

Basic Strategy - Returns: 42688.0 Total Bet: 45200 ROI%: -5.557522123893805
70pct Strategy - Returns: 13935.0 Total Bet: 14600 ROI%: -4.554794520547945
Dogs Strategy - Returns: 4018.0 Total Bet: 5500 ROI% -26.945454545454545


## NBA

### Logistic Regression

In [71]:
regression_nba = run_model(logreg, 'nba')

In [72]:
show_stats(regression_nba)

Accuracy: 0.6281373844121533
Precision: 0.6448692152917505
Recall 0.7532314923619271
F1 Score: 0.694850948509485
AUC: 0.6104015682020797


In [73]:
nba_predictions = predictions_dataframe(regression_nba, nba_odds)

In [74]:
nba_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
7401,1,1,0.510667,0.489333,3.75,1.30,375.0,,375.0
1277,1,1,0.960940,0.039060,1.07,9.76,107.0,107.0,
2312,0,0,0.419013,0.580987,3.10,1.38,138.0,,
586,1,1,0.596293,0.403707,1.67,2.30,167.0,,
6607,1,1,0.695254,0.304746,1.33,3.50,133.0,,
...,...,...,...,...,...,...,...,...,...
6702,1,1,0.664005,0.335995,1.11,7.00,111.0,,
5893,0,0,0.429882,0.570118,1.74,2.15,215.0,,215.0
347,1,1,0.708016,0.291984,1.29,3.85,129.0,129.0,
6680,1,0,0.548654,0.451346,2.65,1.54,0.0,,0.0


In [75]:
betting_summary(nba_predictions)

Basic Strategy - Returns: 143320.0 Total Bet: 151400 ROI%: -5.336856010568032
70pct Strategy - Returns: 35264.0 Total Bet: 38200 ROI%: -7.68586387434555
Dogs Strategy - Returns: 27352.0 Total Bet: 27100 ROI% 0.9298892988929889


### K Nearest Neighbours

In [78]:
knn_nba = run_model(knn, 'nba')

In [79]:
show_stats(knn_nba)

Accuracy: 0.5766182298546896
Precision: 0.6164079822616408
Recall 0.6533490011750881
F1 Score: 0.6343411294922989
AUC: 0.5657393572994596


In [80]:
nba_predictions = predictions_dataframe(knn_nba, nba_odds)

In [81]:
nba_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
7401,0,1,0.4,0.6,3.75,1.30,0.0,,
1277,1,1,1.0,0.0,1.07,9.76,107.0,107.0,
2312,0,0,0.4,0.6,3.10,1.38,138.0,,
586,0,1,0.4,0.6,1.67,2.30,0.0,,0.0
6607,1,1,0.6,0.4,1.33,3.50,133.0,,
...,...,...,...,...,...,...,...,...,...
6702,1,1,0.6,0.4,1.11,7.00,111.0,,
5893,1,0,0.6,0.4,1.74,2.15,0.0,,
347,1,1,0.8,0.2,1.29,3.85,129.0,129.0,
6680,1,0,0.8,0.2,2.65,1.54,0.0,0.0,0.0


In [82]:
betting_summary(nba_predictions)

Basic Strategy - Returns: 142214.0 Total Bet: 151400 ROI%: -6.067371202113606
70pct Strategy - Returns: 75086.0 Total Bet: 78400 ROI%: -4.2270408163265305
Dogs Strategy - Returns: 42793.0 Total Bet: 43600 ROI% -1.8509174311926606


### Random Forest

In [83]:
forest_nba = run_model(forest, 'nba')

In [84]:
show_stats(forest_nba)

Accuracy: 0.6136063408190224
Precision: 0.639412997903564
Recall 0.7168037602820212
F1 Score: 0.6759002770083102
AUC: 0.5989750324788687


In [85]:
nba_predictions = predictions_dataframe(forest_nba, nba_odds)

In [86]:
nba_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
7401,1,1,0.60,0.40,3.75,1.30,375.0,,375.0
1277,1,1,0.90,0.10,1.07,9.76,107.0,107.0,
2312,0,0,0.31,0.69,3.10,1.38,138.0,,
586,1,1,0.65,0.35,1.67,2.30,167.0,,
6607,1,1,0.60,0.40,1.33,3.50,133.0,,
...,...,...,...,...,...,...,...,...,...
6702,1,1,0.71,0.29,1.11,7.00,111.0,111.0,
5893,0,0,0.43,0.57,1.74,2.15,215.0,,215.0
347,1,1,0.69,0.31,1.29,3.85,129.0,,
6680,1,0,0.66,0.34,2.65,1.54,0.0,,0.0


In [87]:
betting_summary(nba_predictions)

Basic Strategy - Returns: 141328.0 Total Bet: 151400 ROI%: -6.652575957727873
70pct Strategy - Returns: 31995.0 Total Bet: 37100 ROI%: -13.760107816711589
Dogs Strategy - Returns: 29543.0 Total Bet: 30400 ROI% -2.8190789473684212


### Support Vector Machine

In [89]:
svm_nba = run_model(svm, 'nba')

In [90]:
show_stats(svm_nba)

Accuracy: 0.630779392338177
Precision: 0.6445544554455446
Recall 0.7649823736780259
F1 Score: 0.6996238581407846
AUC: 0.6117521219823011


In [91]:
nba_predictions = predictions_dataframe(svm_nba, nba_odds)

In [92]:
nba_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
7401,1,1,0.519231,0.480769,3.75,1.30,375.0,,375.0
1277,1,1,0.941304,0.058696,1.07,9.76,107.0,107.0,
2312,0,0,0.428273,0.571727,3.10,1.38,138.0,,
586,1,1,0.592285,0.407715,1.67,2.30,167.0,,
6607,1,1,0.689586,0.310414,1.33,3.50,133.0,,
...,...,...,...,...,...,...,...,...,...
6702,1,1,0.655647,0.344353,1.11,7.00,111.0,,
5893,0,0,0.444816,0.555184,1.74,2.15,215.0,,215.0
347,1,1,0.696337,0.303663,1.29,3.85,129.0,,
6680,1,0,0.547997,0.452003,2.65,1.54,0.0,,0.0


In [93]:
betting_summary(nba_predictions)

Basic Strategy - Returns: 144080.0 Total Bet: 151400 ROI%: -4.834874504623514
70pct Strategy - Returns: 32256.0 Total Bet: 34600 ROI%: -6.774566473988439
Dogs Strategy - Returns: 27957.0 Total Bet: 27300 ROI% 2.4065934065934065


## UFC

### Logistic Regression

In [95]:
regression_ufc = run_model(logreg, 'ufc')

In [96]:
show_stats(regression_ufc)

Accuracy: 0.6075949367088608
Precision: 0.6122448979591837
Recall 0.6136363636363636
F1 Score: 0.6129398410896709
AUC: 0.6075174825174826


In [97]:
ufc_predictions = predictions_dataframe(regression_ufc, ufc_odds)

In [98]:
ufc_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
2860,1,1,0.611442,0.388558,1.59,2.60,159.0,,
1627,1,1,0.640535,0.359465,1.38,3.20,138.0,,
903,1,0,0.698101,0.301899,2.20,1.71,0.0,,0.0
598,1,1,0.676576,0.323424,1.29,3.85,129.0,,
4048,0,0,0.483328,0.516672,4.35,1.24,124.0,,
...,...,...,...,...,...,...,...,...,...
2588,1,0,0.547745,0.452255,3.15,1.39,0.0,,0.0
322,1,0,0.580346,0.419654,2.25,1.69,0.0,,0.0
805,0,0,0.436420,0.563580,2.45,1.61,161.0,,
4051,0,0,0.000726,0.999274,3.20,1.38,138.0,138.0,


In [99]:
betting_summary(ufc_predictions)

Basic Strategy - Returns: 89864.0 Total Bet: 86900 ROI%: 3.410817031070196
70pct Strategy - Returns: 14805.0 Total Bet: 11700 ROI%: 26.53846153846154
Dogs Strategy - Returns: 30365.0 Total Bet: 27600 ROI% 10.018115942028984


### K Nearest Neighbours

In [101]:
knn_ufc = run_model(knn, 'ufc')

In [102]:
show_stats(knn_ufc)

Accuracy: 0.5466052934407365
Precision: 0.5520361990950227
Recall 0.5545454545454546
F1 Score: 0.5532879818594105
AUC: 0.5465034965034965


In [103]:
ufc_predictions = predictions_dataframe(knn_ufc, ufc_odds)

In [104]:
ufc_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
2860,0,1,0.2,0.8,1.59,2.60,0.0,0.0,0.0
1627,1,1,0.6,0.4,1.38,3.20,138.0,,
903,1,0,0.6,0.4,2.20,1.71,0.0,,0.0
598,1,1,0.6,0.4,1.29,3.85,129.0,,
4048,0,0,0.4,0.6,4.35,1.24,124.0,,
...,...,...,...,...,...,...,...,...,...
2588,1,0,0.8,0.2,3.15,1.39,0.0,0.0,0.0
322,0,0,0.4,0.6,2.25,1.69,169.0,,
805,0,0,0.4,0.6,2.45,1.61,161.0,,
4051,0,0,0.0,1.0,3.20,1.38,138.0,138.0,


In [105]:
betting_summary(ufc_predictions)

Basic Strategy - Returns: 83827.0 Total Bet: 86900 ROI%: -3.5362485615650177
70pct Strategy - Returns: 38695.0 Total Bet: 36400 ROI%: 6.304945054945055
Dogs Strategy - Returns: 31655.0 Total Bet: 33000 ROI% -4.075757575757575


### Random Forest

In [106]:
ufc_forest = run_model(forest, 'ufc')

In [107]:
show_stats(ufc_forest)

Accuracy: 0.620253164556962
Precision: 0.6368159203980099
Recall 0.5818181818181818
F1 Score: 0.6080760095011876
AUC: 0.6207459207459207


In [108]:
ufc_predictions = predictions_dataframe(ufc_forest, ufc_odds)

In [109]:
ufc_predictions

Unnamed: 0,predicted,actual,A Win Prob,B Win Prob,A Win,B Win,returns,returns_70pct,returns_dogs
2860,1,1,0.57,0.43,1.59,2.60,159.0,,
1627,1,1,0.75,0.25,1.38,3.20,138.0,138.0,
903,0,0,0.42,0.58,2.20,1.71,171.0,,
598,1,1,0.76,0.24,1.29,3.85,129.0,129.0,
4048,0,0,0.50,0.50,4.35,1.24,124.0,,
...,...,...,...,...,...,...,...,...,...
2588,0,0,0.50,0.50,3.15,1.39,139.0,,
322,1,0,0.62,0.38,2.25,1.69,0.0,,0.0
805,0,0,0.48,0.52,2.45,1.61,161.0,,
4051,0,0,0.36,0.64,3.20,1.38,138.0,,


In [110]:
betting_summary(ufc_predictions)

Basic Strategy - Returns: 93123.0 Total Bet: 86900 ROI%: 7.161104718066744
70pct Strategy - Returns: 10792.0 Total Bet: 9800 ROI%: 10.122448979591837
Dogs Strategy - Returns: 33801.0 Total Bet: 29000 ROI% 16.555172413793105


### Support Vector Machines

In [112]:
svm_ufc = run_model(svm, 'ufc')

In [113]:
show_stats(svm_ufc)

Accuracy: 0.6029919447640967
Precision: 0.6072234762979684
Recall 0.6113636363636363
F1 Score: 0.6092865232163079
AUC: 0.6028846153846154


In [114]:
ufc_predictions = predictions_dataframe(svm_ufc, ufc_odds)

In [115]:
betting_summary(ufc_predictions)

Basic Strategy - Returns: 89439.0 Total Bet: 86900 ROI%: 2.9217491369390105
70pct Strategy - Returns: 7893.0 Total Bet: 5900 ROI%: 33.779661016949156
Dogs Strategy - Returns: 30862.0 Total Bet: 28400 ROI% 8.669014084507044
