# AFL Fantasy and Tipping Machine Learning Models

In [1]:
import pandas as pd
import numpy as np

#Supresses scientific notation
pd.set_option('display.float_format', lambda x: '%.2f' % x)

import warnings
warnings.filterwarnings('ignore')

print(pd.__version__)

1.4.2


## Importing Dataframe

In [2]:
# Requires AFL_dataset.csv, which was created in the AFL_DataCleaning notebook
df_final = pd.read_csv('AFL_dataset.csv', index_col=0, skipinitialspace=True, low_memory=False)

df_final.head()

Unnamed: 0,year,round,Supercoach Points,AFL Fantasy Points,height,Age,weight,team,gameNumber,venue,...,Total Points Scored_prevous_year_mean_round_norm,Total Points Scored_prevous_year_mean_game_norm,Game Result_prevous_year_mean_round_norm,Game Result_prevous_year_mean_game_norm,gameNumber_prevous_year_mean_round_norm,gameNumber_prevous_year_mean_game_norm,AFL Fantasy Points_prevous_year_mean_round_norm,AFL Fantasy Points_prevous_year_mean_game_norm,Supercoach Points_prevous_year_mean_round_norm,Supercoach Points_prevous_year_mean_game_norm
0,2021,1,62.0,50,181,19.1,81,Adelaide,1,Adelaide Oval,...,,,,,,,,,,
2,2021,1,50.0,35,198,21.22,83,Adelaide,3,Adelaide Oval,...,,,,,,,,,,
3,2021,1,90.0,65,188,24.05,88,Adelaide,31,Adelaide Oval,...,,,,,,,,,,
4,2021,1,147.0,83,200,24.33,90,Adelaide,9,Adelaide Oval,...,,,,,,,,,,
5,2021,1,42.0,37,186,20.34,71,Adelaide,9,Adelaide Oval,...,,,,,,,,,,


In [3]:
df_final.set_index('homeTeam', append=True, inplace=True)
df_final.set_index('team', drop=False, append=True, inplace=True)

## Creating dummies columns and eliminating NaN and infinities

In [4]:
df_final_w_dummies = pd.get_dummies(df_final)

In [5]:
numerical_list = list(df_final_w_dummies.select_dtypes('number').columns)

In [6]:
total_rows = df_final_w_dummies.shape[0]

for col in numerical_list:
    total_nans = df_final_w_dummies[col].isna().sum()
    total_infs = df_final_w_dummies[col].isin([np.inf]).sum()
    total_neg_infs = df_final_w_dummies[col].isin([-np.inf]).sum()
    
    if total_rows in [total_nans, total_infs, total_neg_infs]:
        df_final_w_dummies.drop(col, axis=1, inplace=True)
        continue
    
    if df_final_w_dummies[col].isna().sum() != 0:
        df_final_w_dummies[f'{col}_nan'] = np.where(df_final_w_dummies[col].isna(), 1, 0)
    if df_final_w_dummies[col].isin([np.inf]).sum() != 0:
        df_final_w_dummies[f'{col}_inf'] = np.where(df_final_w_dummies[col].isin([np.inf]), 1, 0)
    if df_final_w_dummies[col].isin([-np.inf]).sum() != 0:
        df_final_w_dummies[f'{col}_neg_inf'] = np.where(df_final_w_dummies[col].isin([-np.inf]), 1, 0)
        
df_final_w_dummies.fillna(0, inplace=True)
df_final_w_dummies.replace(np.inf, 0, inplace=True)
df_final_w_dummies.replace(-np.inf, 0, inplace=True)

In [7]:
df_final_w_dummies.isin([np.inf, -np.inf, np.nan]).sum().where(lambda x: x > 0).dropna()

Series([], dtype: float64)

In [8]:
df_final_w_dummies.shape

(86848, 4672)

## Spliting datasets into train and test for AFL Fantasy, Supercoach, and tipping

In [9]:
train_df = df_final_w_dummies.loc[~ df_final_w_dummies['year'].isin([2020, 2021])]
test_df = df_final_w_dummies.loc[df_final_w_dummies['year'].isin([2020, 2021])]

In [10]:
y_train_fantasy = train_df['AFL Fantasy Points']
y_test_fantasy = test_df['AFL Fantasy Points']

y_train_super = train_df['Supercoach Points']
y_test_super = test_df['Supercoach Points']

X_train = train_df.drop(['AFL Fantasy Points', 'Supercoach Points', 'Target Margin'], axis=1)
X_test = test_df.drop(['AFL Fantasy Points', 'Supercoach Points', 'Target Margin'], axis=1)

### Aggregating datasets for tipping

In [11]:
def create_tipping_df(df, cols=None):
    df_mean = df.groupby(['year', 'round', 'homeTeam', 'team']).mean()
    
    df_std = df.groupby(['year', 'round', 'homeTeam', 'team']).std()
    
    if cols is None:
        df_std = df_std.loc[:, (df_std != 0).any(axis=0)]
    else:
        df_std = df_std[cols]
    
    df_combined = df_mean.merge(df_std, on=['year', 'round', 'homeTeam', 'team'],
                               suffixes=('_mean', '_std'))
    
    df_home = df_combined.query('homeTeam == team')
    df_away = df_combined.query('homeTeam != team')
    df_final = df_home.merge(df_away, on=['year', 'round', 'homeTeam'], 
                             suffixes=('_home', '_away'))
    
    df_final.rename(columns={'Target Margin_home': 'Target Margin'}, inplace=True)
    df_final.drop('Target Margin_away', axis=1, inplace=True)
    
    return df_final, df_std.columns

In [12]:
train_tipping = train_df.copy()
test_tipping = test_df.copy()

for col in train_tipping.columns:
    if 'AFL Fantasy Points' in col or 'Supercoach Points' in col:
        train_tipping.drop(col, axis=1, inplace=True)
        test_tipping.drop(col, axis=1, inplace=True)

In [13]:
train_tipping_final, cols = create_tipping_df(train_tipping)
test_tipping_final, _ = create_tipping_df(test_tipping, cols)

In [14]:
train_tipping_final.shape

(1655, 16431)

In [15]:
y_train_tipping = train_tipping_final['Target Margin']
y_test_tipping = test_tipping_final['Target Margin']

X_train_tipping = train_tipping_final.drop('Target Margin', axis=1)
X_test_tipping = test_tipping_final.drop('Target Margin', axis=1)

## Creating models

In [16]:
import xgboost as xgb
from sklearn import metrics

### Fantasy Predictions

In [17]:
rf_fantasy = xgb.XGBRegressor()

In [18]:
rf_fantasy.fit(X_train, y_train_fantasy)

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, ...)

In [19]:
predict_X_train_fantasy = rf_fantasy.predict(X_train)

In [20]:
predict_X_test_fantasy = rf_fantasy.predict(X_test)

In [21]:
metrics.mean_absolute_error(y_train_fantasy, predict_X_train_fantasy)

13.09391518105097

In [22]:
metrics.mean_absolute_error(y_test_fantasy, predict_X_test_fantasy)

17.740780678195563

In [23]:
metrics.r2_score(y_test_fantasy, predict_X_test_fantasy)

0.26246328533282837

### Supercoach Predictions

In [24]:
rf_super = xgb.XGBRegressor()

In [25]:
rf_super.fit(X_train, y_train_super)

XGBRegressor(base_score=0.5, booster='gbtree', callbacks=None,
             colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
             early_stopping_rounds=None, enable_categorical=False,
             eval_metric=None, gamma=0, gpu_id=-1, grow_policy='depthwise',
             importance_type=None, interaction_constraints='',
             learning_rate=0.300000012, max_bin=256, max_cat_to_onehot=4,
             max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
             missing=nan, monotone_constraints='()', n_estimators=100, n_jobs=0,
             num_parallel_tree=1, predictor='auto', random_state=0, reg_alpha=0,
             reg_lambda=1, ...)

In [26]:
predict_X_train_super = rf_super.predict(X_train)

In [27]:
predict_X_test_super = rf_super.predict(X_test)

In [28]:
metrics.mean_absolute_error(y_train_super, predict_X_train_super)

19.39634601061774

In [29]:
metrics.mean_absolute_error(y_test_super, predict_X_test_super)

26.547581375573838

In [30]:
metrics.r2_score(y_test_super, predict_X_test_super)

0.37095723368508526

### Tipping Predictions

In [31]:
from sklearn.linear_model import Ridge

In [32]:
rf_tipping = Ridge()

In [33]:
rf_tipping.fit(X_train_tipping, y_train_tipping)

Ridge()

In [34]:
predict_X_train_tipping = rf_tipping.predict(X_train_tipping)

In [35]:
predict_X_test_tipping = rf_tipping.predict(X_test_tipping)

In [36]:
metrics.mean_absolute_error(y_train_tipping, predict_X_train_tipping)

1.1635977011771375

In [37]:
metrics.mean_absolute_error(y_test_tipping, predict_X_test_tipping)

56.402871511413686

In [38]:
metrics.r2_score(y_test_tipping, predict_X_test_tipping)

-3.0479645719008523

In [39]:
correct = 0
incorrect = 0

for a, b in zip(y_test_tipping, predict_X_test_tipping):
    if a>0 and b>0 or a<0 and b<0 or a==0 and b==0:
        correct += 1
    else:
        incorrect += 1

correct / (correct + incorrect)

0.5338753387533876

### Exporting Models

In [40]:
import joblib

In [41]:
joblib.dump(rf_fantasy, 'fantasy model.pkl', compress=9)
joblib.dump(rf_super, 'supercoach model.pkl', compress=9)
joblib.dump(rf_super, 'tipping model.pkl', compress=9);