# AFL Fantasy and Tipping Machine Learning Models

In [1]:
import pandas as pd
import numpy as np

#Supresses scientific notation
pd.set_option('display.float_format', lambda x: '%.2f' % x)

import warnings
warnings.filterwarnings('ignore')

print(pd.__version__)

1.4.2


## Importing Dataframe

In [2]:
# Requires AFL_dataset.csv, which was created in the AFL_DataCleaning notebook
df_final = pd.read_csv('AFL_dataset.csv', index_col=0, skipinitialspace=True, low_memory=False)

df_final.head()

Unnamed: 0,year,round,Supercoach Points,AFL Fantasy Points,height,Age,weight,team,gameNumber,venue,...,Total Points Scored_prevous_year_mean_round_norm,Total Points Scored_prevous_year_mean_game_norm,Game Result_prevous_year_mean_round_norm,Game Result_prevous_year_mean_game_norm,gameNumber_prevous_year_mean_round_norm,gameNumber_prevous_year_mean_game_norm,AFL Fantasy Points_prevous_year_mean_round_norm,AFL Fantasy Points_prevous_year_mean_game_norm,Supercoach Points_prevous_year_mean_round_norm,Supercoach Points_prevous_year_mean_game_norm
80362,2012,1,129.0,99,171,29.28,70,Adelaide,4,Carrara,...,,,,,,,,,,
80363,2012,1,108.0,89,189,21.99,92,Adelaide,65,Carrara,...,,,,,,,,,,
80364,2012,1,82.0,65,177,32.66,81,Adelaide,208,Carrara,...,,,,,,,,,,
80365,2012,1,146.0,109,181,25.15,79,Adelaide,96,Carrara,...,,,,,,,,,,
80366,2012,1,34.0,28,188,23.55,89,Adelaide,31,Carrara,...,,,,,,,,,,


In [3]:
df_final.set_index('homeTeam', append=True, inplace=True)
df_final.set_index('team', drop=False, append=True, inplace=True)

## Creating dummies columns and eliminating NaN and infinities

In [4]:
df_final_w_dummies = pd.get_dummies(df_final)

In [5]:
numerical_list = list(df_final_w_dummies.select_dtypes('number').columns)

In [6]:
total_rows = df_final_w_dummies.shape[0]

for col in numerical_list:
    total_nans = df_final_w_dummies[col].isna().sum()
    total_infs = df_final_w_dummies[col].isin([np.inf]).sum()
    total_neg_infs = df_final_w_dummies[col].isin([-np.inf]).sum()
    
    if total_rows in [total_nans, total_infs, total_neg_infs]:
        df_final_w_dummies.drop(col, axis=1, inplace=True)
        continue
    
    if df_final_w_dummies[col].isna().sum() != 0:
        df_final_w_dummies[f'{col}_nan'] = np.where(df_final_w_dummies[col].isna(), 1, 0)
    if df_final_w_dummies[col].isin([np.inf]).sum() != 0:
        df_final_w_dummies[f'{col}_inf'] = np.where(df_final_w_dummies[col].isin([np.inf]), 1, 0)
    if df_final_w_dummies[col].isin([-np.inf]).sum() != 0:
        df_final_w_dummies[f'{col}_neg_inf'] = np.where(df_final_w_dummies[col].isin([-np.inf]), 1, 0)
        
df_final_w_dummies.fillna(0, inplace=True)
df_final_w_dummies.replace(np.inf, 0, inplace=True)
df_final_w_dummies.replace(-np.inf, 0, inplace=True)

In [7]:
df_final_w_dummies.isin([np.inf, -np.inf, np.nan]).sum().where(lambda x: x > 0).dropna()

Series([], dtype: float64)

In [8]:
df_final_w_dummies.shape

(72864, 4702)

## Spliting datasets into train and test for AFL Fantasy, Supercoach, and tipping

In [9]:
train_df = df_final_w_dummies.loc[~ df_final_w_dummies['year'].isin([2020, 2021])]
test_df = df_final_w_dummies.loc[df_final_w_dummies['year'].isin([2020, 2021])]

In [10]:
y_train_fantasy = train_df['AFL Fantasy Points']
y_test_fantasy = test_df['AFL Fantasy Points']

y_train_super = train_df['Supercoach Points']
y_test_super = test_df['Supercoach Points']

X_train = train_df.drop(['AFL Fantasy Points', 'Supercoach Points', 'Target Margin'], axis=1)
X_test = test_df.drop(['AFL Fantasy Points', 'Supercoach Points', 'Target Margin'], axis=1)

### Aggregating datasets for tipping

In [11]:
def create_tipping_df(df, cols=None):
    df_mean = df.groupby(['year', 'round', 'homeTeam', 'team']).mean()
    
    df_std = df.groupby(['year', 'round', 'homeTeam', 'team']).std()
    
    if cols is None:
        df_std = df_std.loc[:, (df_std != 0).any(axis=0)]
    else:
        df_std = df_std[cols]
    
    df_combined = df_mean.merge(df_std, on=['year', 'round', 'homeTeam', 'team'],
                               suffixes=('_mean', '_std'))
    
    df_home = df_combined.query('homeTeam == team').reset_index('team', drop=True)
    df_away = df_combined.query('homeTeam != team').reset_index('team', drop=True)
    
    target_margin = df_home['Target Margin']
    
    df_final = df_home.subtract(df_away)
    
    df_final['Target Margin'] = target_margin
    
    return df_final, df_std.columns

In [12]:
train_tipping = train_df.copy()
test_tipping = test_df.copy()

for col in train_tipping.columns:
    if 'AFL Fantasy Points' in col or 'Supercoach Points' in col:
        train_tipping.drop(col, axis=1, inplace=True)
        test_tipping.drop(col, axis=1, inplace=True)

In [13]:
train_tipping_final, cols = create_tipping_df(train_tipping)
test_tipping_final, _ = create_tipping_df(test_tipping, cols)

In [14]:
train_tipping_final.shape

(1655, 7745)

In [15]:
train_tipping_final

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,height_mean,Age_mean,weight_mean,gameNumber_mean,Start Time Hour,Start Time Minute,Is Home Team,Month,Day of Year,Weekday,...,Opponent Team Score_prevous_year_mean_round_norm_nan_std,Opponent Team Score_prevous_year_mean_game_norm_nan_std,Margin_prevous_year_mean_round_norm_nan_std,Margin_prevous_year_mean_game_norm_nan_std,Total Points Scored_prevous_year_mean_round_norm_nan_std,Total Points Scored_prevous_year_mean_game_norm_nan_std,Game Result_prevous_year_mean_round_norm_nan_std,Game Result_prevous_year_mean_game_norm_nan_std,gameNumber_prevous_year_mean_round_norm_nan_std,gameNumber_prevous_year_mean_game_norm_nan_std
year,round,homeTeam,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
2012,1,Fremantle,2.00,-0.52,0.89,-27.94,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2012,1,Gold Coast,2.22,-2.55,0.61,-31.00,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2012,1,Greater Western Sydney,1.67,-2.92,0.39,-62.39,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2012,1,Hawthorn,-0.44,2.37,-2.11,35.89,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2012,1,Melbourne,-1.28,0.42,-1.22,1.33,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019,25,Brisbane Lions,-0.44,-0.88,0.72,-7.06,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2019,25,Geelong,0.94,-1.15,2.33,-25.39,0.00,0.00,1.00,0.00,0.00,0.00,...,0.32,0.32,0.32,0.32,0.32,0.32,0.32,0.32,0.32,0.32
2019,26,Collingwood,-0.67,0.33,-1.50,14.67,0.00,0.00,1.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00
2019,26,Richmond,-3.61,-0.16,-4.17,0.44,0.00,0.00,1.00,0.00,0.00,0.00,...,-0.32,-0.32,-0.32,-0.32,-0.32,-0.32,-0.32,-0.32,-0.32,-0.32


In [16]:
y_train_tipping = train_tipping_final['Target Margin']
y_test_tipping = test_tipping_final['Target Margin']

X_train_tipping = train_tipping_final.drop('Target Margin', axis=1)
X_test_tipping = test_tipping_final.drop('Target Margin', axis=1)

## Creating models

In [17]:
import xgboost as xgb
from sklearn import metrics

### Fantasy Predictions

In [18]:
rf_fantasy = xgb.XGBRegressor()

In [19]:
rf_fantasy.fit(X_train, y_train_fantasy)

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, ...)

In [20]:
predict_X_train_fantasy = rf_fantasy.predict(X_train)

In [21]:
predict_X_test_fantasy = rf_fantasy.predict(X_test)

In [22]:
metrics.mean_absolute_error(y_train_fantasy, predict_X_train_fantasy)

13.098651552824576

In [23]:
metrics.mean_absolute_error(y_test_fantasy, predict_X_test_fantasy)

18.91577383740341

In [24]:
metrics.r2_score(y_test_fantasy, predict_X_test_fantasy)

0.20842311849315787

In [25]:
y_train_fantasy.describe()

count   59580.00
mean       73.64
std        27.09
min        -3.00
25%        55.00
50%        72.00
75%        91.00
max       204.00
Name: AFL Fantasy Points, dtype: float64

### Supercoach Predictions

In [26]:
rf_super = xgb.XGBRegressor()

In [27]:
rf_super.fit(X_train, y_train_super)

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, ...)

In [28]:
predict_X_train_super = rf_super.predict(X_train)

In [29]:
predict_X_test_super = rf_super.predict(X_test)

In [30]:
metrics.mean_absolute_error(y_train_super, predict_X_train_super)

19.406725061480014

In [31]:
metrics.mean_absolute_error(y_test_super, predict_X_test_super)

28.055788057678186

In [32]:
metrics.r2_score(y_test_super, predict_X_test_super)

0.3251736870840761

In [33]:
y_train_super.describe()

count   59580.00
mean      102.54
std        46.26
min        -4.00
25%        72.00
50%        97.00
75%       126.00
max       484.00
Name: Supercoach Points, dtype: float64

### Tipping Predictions

In [34]:
from sklearn.linear_model import ElasticNet

In [35]:
rf_tipping = ElasticNet()

In [36]:
rf_tipping.fit(X_train_tipping, y_train_tipping)

ElasticNet()

In [37]:
predict_X_train_tipping = rf_tipping.predict(X_train_tipping)

In [38]:
predict_X_test_tipping = rf_tipping.predict(X_test_tipping)

In [39]:
metrics.mean_absolute_error(y_train_tipping, predict_X_train_tipping)

25.166105050503866

In [40]:
metrics.mean_absolute_error(y_test_tipping, predict_X_test_tipping)

27.465875998467737

In [41]:
metrics.r2_score(y_test_tipping, predict_X_test_tipping)

0.028518166517410015

In [42]:
correct = 0
incorrect = 0

for a, b in zip(y_test_tipping, predict_X_test_tipping):
    if a>0 and b>0 or a<0 and b<0 or a==0 and b==0:
        correct += 1
    else:
        incorrect += 1

correct / (correct + incorrect)

0.6233062330623306

In [43]:
y_train_tipping.describe()

count   1655.00
mean       6.35
std       44.48
min     -138.00
25%      -23.00
50%        5.00
75%       35.00
max      162.00
Name: Target Margin, dtype: float64

### Exporting Models

In [44]:
import joblib

In [45]:
joblib.dump(rf_fantasy, 'fantasy model.pkl', compress=9)
joblib.dump(rf_super, 'supercoach model.pkl', compress=9)
joblib.dump(rf_tipping, 'tipping model.pkl', compress=9);