In [120]:
# Imports dependencies
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import numpy as np
import pandas as pd
import seaborn as sns

sns.set(font_scale=2)

import warnings
warnings.filterwarnings('ignore')

plt.style.use('ggplot')

%matplotlib inline

In [121]:
full_matches = pd.read_csv("data/afl_2018_test.csv", index_col = 0)

In [122]:
full_matches.head(3)

Unnamed: 0,Date,h_team,a_team,Home Score,Away Score,play_off,home_odds,away_odds,season,margin,h_prevladder_score,a_prevladder_score,margins_prev_match1,margins_prev_match2,margins_prev_match3,margins_prev_match4,margins_prev_match5,win_loc
0,2018-03-25,West Coast,Sydney,0,0,0,2.6,1.5,2018,0,48.0,56.0,26.0,-39.0,52.0,-28.0,-34.0,0
1,2018-03-25,Melbourne,Geelong,0,0,0,2.35,1.6,2018,0,48.0,62.0,-29.0,-111.0,24.0,-66.0,-68.0,0
2,2018-03-25,GWS Giants,Western Bulldogs,0,0,0,1.45,2.75,2018,0,60.0,44.0,48.0,2.0,-6.0,25.0,-45.0,0


In [123]:
# Sets the year of the model: year
year = 2018

In [124]:
import patsy

features = '~ season + h_prevladder_score + a_prevladder_score + play_off + margins_prev_match1 + margins_prev_match2 + \
            + margins_prev_match3 + margins_prev_match4 + margins_prev_match5 + home_odds + away_odds + C(h_team) + C(a_team) - 1'

In [125]:
# Initiates the predictors: X
X = patsy.dmatrix(features, data=full_matches, return_type='dataframe')

# Initiates the target as winloc: y
y = full_matches[["season", "win_loc"]]

In [126]:
X.shape

(959, 46)

In [127]:
y.shape

(959, 2)

In [128]:
X_train = X[(X["season"] >= year-5) & (X["season"] <= year-1)].drop(["season"], axis=1)
X_test  = X[X["season"] == year].drop(["season"], axis=1)
y_train = y[(y["season"] >= year-5) & (y["season"] <= year-1)]["win_loc"]
y_test  = y[y["season"] == year]["win_loc"]

In [129]:
def five_year_pred(clf, X, y, year):
    
    global baseline
    baseline = full_matches[full_matches["season"] == year]["win_loc"].mean()
    
    global X_train
    X_train = X[(X["season"] >= year-5) & (X["season"] <= year-1)].drop(["season"], axis=1)
    global X_test
    X_test  = X[X["season"] == year].drop(["season"], axis=1)
    global y_train
    y_train = y[(y["season"] >= year-5) & (y["season"] <= year-1)]["win_loc"]
    global y_test
    y_test  = y[y["season"] == year]["win_loc"]
  
    clf.fit(X_train, y_train)

    predictions =  clf.predict_proba(X_test)
    
    year_data = full_matches[full_matches["season"] == year]

    cols_to_keep = ["win_loc", "season", "h_team", "a_team", "home_odds", "away_odds"]
    year_data = year_data[cols_to_keep].reset_index(drop=True)
    
    probs_df = pd.DataFrame(predictions, columns = ["a_prob_us", "h_prob_us"]).round(2)
    probs_df = probs_df[["h_prob_us", "a_prob_us"]]
    
    year_odds_comparison = year_data.join(probs_df, how="outer")

    return year_odds_comparison

In [130]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier 
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
from sklearn.naive_bayes import BernoulliNB

clf_lr      = LogisticRegression(penalty='l2', solver="liblinear")
clf_svc     = SVC(kernel = 'rbf', probability = True)
clf_xgboost = GradientBoostingClassifier()
clf_forest   = RandomForestClassifier()
clf_tree    = DecisionTreeClassifier()

clf = clf_lr

probs_df = five_year_pred(clf, X, y, year = year)

In [131]:
probs_df.drop("win_loc", errors="ignore", inplace=True, axis=1)

In [132]:
betting_outcomes = probs_df[["h_team", "a_team", "home_odds", "away_odds", "h_prob_us", "a_prob_us"]]
betting_outcomes.head(3)

Unnamed: 0,h_team,a_team,home_odds,away_odds,h_prob_us,a_prob_us
0,West Coast,Sydney,2.6,1.5,0.24,0.76
1,Melbourne,Geelong,2.35,1.6,0.42,0.58
2,GWS Giants,Western Bulldogs,1.45,2.75,0.66,0.34


In [133]:
def team_to_bet(home_odds, away_odds, h_prob_us, a_prob_us, bet_type="home"):
    """
    Advises on which team to bet on.
    
    By comparing the odds of a model and public odds, returns which team should be bet on.

    Parameters
    ----------
    
    Returns
    -------
    int: The return value.
    0 for away team.
    1 for home team. 
    """
    
    # This bet type plays the odds, in an ideal case where we have an accurate model, playing the odds will have a greater
    # net gain compared to all other models.
    if bet_type == "odds":
        # Calculates bookie probabilities based on their odds: home_prob_bookie, away_prob_bookie
        h_prob_bookie = 1/home_odds
        a_prob_bookie = 1/away_odds

        # Calculates differences in probabilities: home_prob_diff, away_prob_diff
        home_prob_diff = h_prob_us - h_prob_bookie
        away_prob_diff = a_prob_us - a_prob_bookie

        # Returns 1 if we are more sure that the home team will win.
        if (home_prob_diff > away_prob_diff):
            return 1

        # Returns -1 if we are more sure that the away team will win.
        if (away_prob_diff > home_prob_diff):
            return 0    
    
    # This bet type only predominantly bets on the home teams
    if bet_type == "home":
        
        # Returns 1 if home team are favored
        if (h_prob_us > a_prob_us):
            return 1
        
        # Returns 0 if away team are favored
        else:
            return 0

    
        
def kelly_criterion(prob, odds):
    """
    Applies the kelly criterion to advise what percentage of bank to bet.
    
    Uses the probability of team winning with the expected odds to 
    
    Returns
    -------
    float: The return value. 2 d.p.
    The percentage of ones bank to bet on a game.
    """
    
    # Calculates the percentage to bet based on probability and odds
    bet_perc = (prob * (odds + 1)-1) / odds
    
    # Returns bet_perc
    return abs(round(bet_perc,2))

def calc_win(bet, odds):
    """
    Calculates the amount won from a winning bet given odds and the bet_size
    """
    return (bet) * (odds-1)

In [134]:
# Creates a column that advises which team to bet on
betting_outcomes["bet_on"] = betting_outcomes.apply(lambda row: team_to_bet(row["home_odds"], row["away_odds"],
                                                                            row["h_prob_us"], row["a_prob_us"],
                                                                            bet_type = "home"), axis = 1)
# Creates a column that advises how much to bet
betting_outcomes["bet_perc"] = betting_outcomes.apply(lambda row: kelly_criterion(row["h_prob_us"], row["home_odds"]), axis = 1)/3


In [135]:
betting_outcomes

Unnamed: 0,h_team,a_team,home_odds,away_odds,h_prob_us,a_prob_us,bet_on,bet_perc
0,West Coast,Sydney,2.6,1.5,0.24,0.76,0,0.016667
1,Melbourne,Geelong,2.35,1.6,0.42,0.58,0,0.056667
2,GWS Giants,Western Bulldogs,1.45,2.75,0.66,0.34,1,0.143333
3,Hawthorn,Collingwood,1.95,1.85,0.73,0.27,1,0.196667
4,Gold Coast,North Melbourne,1.85,1.95,0.42,0.58,0,0.036667
5,Port Adelaide,Fremantle,1.25,4.0,0.66,0.34,1,0.13
6,St Kilda,Brisbane,1.31,3.5,0.67,0.33,1,0.14
7,Essendon,Adelaide,2.05,1.78,0.75,0.25,1,0.21
8,Richmond,Carlton,1.22,4.35,0.84,0.16,1,0.236667
