# AFL Fantasy and Tipping Machine Learning Models

In [1]:
import pandas as pd
import numpy as np

#Supresses scientific notation
pd.set_option('display.float_format', lambda x: '%.2f' % x)

import warnings
warnings.filterwarnings('ignore')

print(pd.__version__)

1.4.2


## Importing Dataframe

In [2]:
# Requires AFL_dataset.csv, which was created in the AFL_DataCleaning notebook
df_final = pd.read_csv('AFL_dataset.csv',
                       index_col=0,
                       skipinitialspace=True,
                       low_memory=False)

df_final.head()

Unnamed: 0,Supercoach Points,AFL Fantasy Points,year,round,height,Age,weight,team,gameNumber,venue,...,Opponent Team Score_prevous_year_mean_round_norm,Opponent Team Score_prevous_year_mean_game_norm,Margin_prevous_year_mean_round_norm,Margin_prevous_year_mean_game_norm,Total Points Scored_prevous_year_mean_round_norm,Total Points Scored_prevous_year_mean_game_norm,Game Result_prevous_year_mean_round_norm,Game Result_prevous_year_mean_game_norm,gameNumber_prevous_year_mean_round_norm,gameNumber_prevous_year_mean_game_norm
0,62.0,50,2021,1,181,19.1,81,Adelaide,1,Adelaide Oval,...,,,,,,,,,,
1,24.0,21,2021,1,181,28.49,81,Adelaide,168,Adelaide Oval,...,,,,,,,,,,
2,50.0,35,2021,1,198,21.22,83,Adelaide,3,Adelaide Oval,...,,,,,,,,,,
3,90.0,65,2021,1,188,24.05,88,Adelaide,31,Adelaide Oval,...,,,,,,,,,,
4,147.0,83,2021,1,200,24.33,90,Adelaide,9,Adelaide Oval,...,,,,,,,,,,


## Creating dummies columns and eliminating NaN and infinities

In [3]:
df_final_w_dummies = pd.get_dummies(df_final)

In [4]:
numerical_list = list(df_final_w_dummies.select_dtypes('number').columns)

In [5]:
total_rows = df_final_w_dummies.shape[0]

for col in numerical_list:
    total_nans = df_final_w_dummies[col].isna().sum()
    total_infs = df_final_w_dummies[col].isin([np.inf]).sum()
    total_neg_infs = df_final_w_dummies[col].isin([-np.inf]).sum()
    
    if total_rows in [total_nans, total_infs, total_neg_infs]:
        df_final_w_dummies.drop(col, axis=1, inplace=True)
        continue
    
    if df_final_w_dummies[col].isna().sum() != 0:
        df_final_w_dummies[f'{col}_nan'] = np.where(df_final_w_dummies[col].isna(), 1, 0)
    if df_final_w_dummies[col].isin([np.inf]).sum() != 0:
        df_final_w_dummies[f'{col}_inf'] = np.where(df_final_w_dummies[col].isin([np.inf]), 1, 0)
    if df_final_w_dummies[col].isin([-np.inf]).sum() != 0:
        df_final_w_dummies[f'{col}_neg_inf'] = np.where(df_final_w_dummies[col].isin([-np.inf]), 1, 0)
        
df_final_w_dummies.fillna(0, inplace=True)
df_final_w_dummies.replace(np.inf, 0, inplace=True)
df_final_w_dummies.replace(-np.inf, 0, inplace=True)

In [6]:
df_final_w_dummies.isin([np.inf, -np.inf, np.nan]).sum().where(lambda x: x > 0).dropna()

Series([], dtype: float64)

### Dropping rows where the player scored zero or fewer points

In [7]:
df_final_w_dummies = df_final_w_dummies.drop(df_final_w_dummies[(df_final_w_dummies['AFL Fantasy Points'] <=0) 
                                               | (df_final_w_dummies['Supercoach Points'] <=0)].index)

In [8]:
df_final_w_dummies.shape

(89174, 4408)

## Spliting datasets into train and test for AFL Fantasy and Supercoach

In [9]:
train_df = df_final_w_dummies.loc[~ df_final_w_dummies['year'].isin([2020, 2021])]
test_df = df_final_w_dummies.loc[df_final_w_dummies['year'].isin([2020, 2021])]

In [10]:
y_train_fantasy = train_df['AFL Fantasy Points']
y_test_fantasy = test_df['AFL Fantasy Points']

y_train_super = train_df['Supercoach Points']
y_test_super = test_df['Supercoach Points']


X_train = train_df.drop(['AFL Fantasy Points', 'Supercoach Points', 'Target Margin'], axis=1)
X_test = test_df.drop(['AFL Fantasy Points', 'Supercoach Points', 'Target Margin'], axis=1)

## Creating models

In [11]:
import xgboost as xgb
from sklearn.model_selection import RandomizedSearchCV
from sklearn import metrics

In [12]:
hyperparameter_grid = {
    'n_estimators': [100, 400, 800],
    'max_depth': [3, 6, 9],
    'learning_rate': [0.05, 0.1, 0.20],
    'min_child_weight': [1, 10, 100]
    }

### Fantasy Predictions

In [13]:
rf_fantasy = xgb.XGBRegressor()

In [14]:
rf_fantasy.fit(X_train, y_train_fantasy)

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, ...)

In [15]:
predict_X_train_fantasy = rf_fantasy.predict(X_train)

In [16]:
predict_X_test_fantasy = rf_fantasy.predict(X_test)

In [17]:
metrics.mean_absolute_error(y_train_fantasy, predict_X_train_fantasy)

13.613856849023406

In [18]:
metrics.mean_absolute_error(y_test_fantasy, predict_X_test_fantasy)

18.326989929486402

In [19]:
metrics.r2_score(y_test_fantasy, predict_X_test_fantasy)

0.23049116487275223

### Supercoach Predictions

In [20]:
rf_super = xgb.XGBRegressor()

In [21]:
rf_super.fit(X_train, y_train_super)

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, ...)

In [22]:
predict_X_train_super = rf_super.predict(X_train)

In [23]:
predict_X_test_super = rf_super.predict(X_test)

In [24]:
metrics.mean_absolute_error(y_train_super, predict_X_train_super)

20.10976973156177

In [25]:
metrics.mean_absolute_error(y_test_super, predict_X_test_super)

27.135256731839807

In [26]:
metrics.r2_score(y_test_super, predict_X_test_super)

0.3511172078582838

### Exporting Models

In [None]:
import joblib

In [None]:
joblib.dump(rf_fantasy, 'fantasy model.pkl', compress=9)
joblib.dump(rf_super, 'supercoach model.pkl', compress=9);