# Proof of Concept 4. Double Linear Regression with Bootstrap using Normalised Data/Both Teams #
## For Brownlow Predictor Project ##

Trains up 4 models using the 4 Macro Rules of Feature Selection using Normalised Data/Both Team Columns Only (FS_Val = 0.2 and Includes Winloss)

Uses experimental method of bootstrapping test cases so that the 3-votes/2-votes/1-vote rows are equal in number to 0-vote rows together with experimental method of doing two steps of Linear Regression: the first to pick out the three players most likely to get votes, and the second to allocate those three players 3, 2 and 1. (This Proof of Concept only demonstrates adj_votes = 2) 


**Author: `Lang (Ron) Chen` 2021.12-2022.1**

___

**0. Import Libraries**

In [1]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

from BrownlowPredictorTools2.predict import predict1, predict2
from BrownlowPredictorTools2.test import test1, test2
from BrownlowPredictorTools.return_tp import return_tp
from BrownlowPredictorTools2.wholeseason import wholeseason
from BrownlowPredictorTools.feature_selection2 import feature_selection2

In [2]:
choice = 'NormalisedData'

In [3]:
filelist = os.listdir(f'./Data/{choice}')[1:]
# Remove the first file (an ipynb checkpoint file)

**1. Feature Selection**

In [4]:
# Gets list of emperical test games (full 2021 season)
final_test_games = [file for file in filelist if '2021' in file]

In [5]:
# Gathers full games list (except 2021) and performs a single Train-Test Split (note different from previous KFold)
test_train_games = [file for file in filelist if '2021' not in file]
train_games, test_games = train_test_split(test_train_games, train_size = 0.8, test_size = 0.2, random_state = 42)

In [6]:
# Read in pre-prepared sample data of trained data only 
# (the same rows as if we used concatenated all the data from the train_games list)
train_data = pd.read_csv('Train_Data (N).csv')

In [7]:
ADJ_VOTES = 2

# Means replace the all rows with votes's labels (Brownlow Votes) to 2 votes (the mean).
# Alternatives: 1 vote (min), 3 votes (max) 

Bootstrap

In [8]:
# Splits data into zero votes and more than 0 votes
zero = train_data[train_data['Brownlow Votes'] == 0]
tagged1 = train_data[train_data['Brownlow Votes'] > 0]

# Replaces votes with 2
tagged1['Brownlow Votes'] = tagged1['Brownlow Votes'].replace([1, 2, 3], ADJ_VOTES)


# Bootstraps data

# Sample them (using the same random_state (seed) outcome is the same) so tagged has same number of rows as zero
new_tagged1 = tagged1.sample(n = len(zero), replace = True, random_state = 42)
        
# concatenate them together
first_lr_data = pd.concat([zero, new_tagged1], axis = 0)

second_lr_data = train_data[train_data['Brownlow Votes'] > 0]        

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


Feature Selection

*First LR*

In [9]:
cols = [col for col in first_lr_data.columns if 'BTN' in col]
cols

['Kicks BTN',
 'Handballs BTN',
 'Disposals BTN',
 'Marks BTN',
 'Goals BTN',
 'Behinds BTN',
 'Tackles BTN',
 'Hitouts BTN',
 'Goal Assists BTN',
 'Inside 50s BTN',
 'Clearances BTN',
 'Clangers BTN',
 'Rebound 50s BTN',
 'Frees For BTN',
 'Frees Agains BTN',
 'Contested Possessions BTN',
 'Uncontested Possessions BTN',
 'Effective Disposals BTN',
 'Contested Marks BTN',
 'Marks Inside 50 BTN',
 'One Percenters BTN',
 'Bounces BTN',
 'Centre Clearances BTN',
 'Stoppage Clearances BTN',
 'Score Involvements BTN',
 'Metres Gained BTN',
 'Turnovers BTN',
 'Intercepts BTN',
 'Tackles Inside 50 BTN',
 'Time On Ground % BTN',
 'Uncontested Marks BTN',
 'Marks Outside 50 BTN',
 'Tackles Outside 50 BTN',
 'Behind Assists BTN',
 'Ineffective Disposals BTN']

In [10]:
corr1 = dict()
for col in cols:
    corr1[col] = first_lr_data[[col, 'Brownlow Votes']].corr(method = 'pearson').loc[col]['Brownlow Votes']
corr1

{'Kicks BTN': 0.589000074736177,
 'Handballs BTN': 0.45483608682845106,
 'Disposals BTN': 0.636653301367667,
 'Marks BTN': 0.3189152599003634,
 'Goals BTN': 0.3524526288548748,
 'Behinds BTN': 0.16923791940912486,
 'Tackles BTN': 0.27188131586910896,
 'Hitouts BTN': 0.0380608008473891,
 'Goal Assists BTN': 0.2136809388293751,
 'Inside 50s BTN': 0.43094260849047583,
 'Clearances BTN': 0.4508083098970477,
 'Clangers BTN': -0.23454267346047292,
 'Rebound 50s BTN': 0.07060220045154116,
 'Frees For BTN': 0.27489825301551696,
 'Frees Agains BTN': -0.06697508470228745,
 'Contested Possessions BTN': 0.5538266933134951,
 'Uncontested Possessions BTN': 0.4834979807852794,
 'Effective Disposals BTN': 0.594316422451107,
 'Contested Marks BTN': 0.15727337905394279,
 'Marks Inside 50 BTN': 0.23800873532671127,
 'One Percenters BTN': -0.11279664409086788,
 'Bounces BTN': 0.1390721199903441,
 'Centre Clearances BTN': 0.37290960444057863,
 'Stoppage Clearances BTN': 0.41052881923720386,
 'Score Involve

In [11]:
corr1 = list(corr1.items())
corr1

[('Kicks BTN', 0.589000074736177),
 ('Handballs BTN', 0.45483608682845106),
 ('Disposals BTN', 0.636653301367667),
 ('Marks BTN', 0.3189152599003634),
 ('Goals BTN', 0.3524526288548748),
 ('Behinds BTN', 0.16923791940912486),
 ('Tackles BTN', 0.27188131586910896),
 ('Hitouts BTN', 0.0380608008473891),
 ('Goal Assists BTN', 0.2136809388293751),
 ('Inside 50s BTN', 0.43094260849047583),
 ('Clearances BTN', 0.4508083098970477),
 ('Clangers BTN', -0.23454267346047292),
 ('Rebound 50s BTN', 0.07060220045154116),
 ('Frees For BTN', 0.27489825301551696),
 ('Frees Agains BTN', -0.06697508470228745),
 ('Contested Possessions BTN', 0.5538266933134951),
 ('Uncontested Possessions BTN', 0.4834979807852794),
 ('Effective Disposals BTN', 0.594316422451107),
 ('Contested Marks BTN', 0.15727337905394279),
 ('Marks Inside 50 BTN', 0.23800873532671127),
 ('One Percenters BTN', -0.11279664409086788),
 ('Bounces BTN', 0.1390721199903441),
 ('Centre Clearances BTN', 0.37290960444057863),
 ('Stoppage Cleara

In [12]:
selected_features1 = [col[0] for col in corr1 if col[1] > 0.2]
selected_features1

['Kicks BTN',
 'Handballs BTN',
 'Disposals BTN',
 'Marks BTN',
 'Goals BTN',
 'Tackles BTN',
 'Goal Assists BTN',
 'Inside 50s BTN',
 'Clearances BTN',
 'Frees For BTN',
 'Contested Possessions BTN',
 'Uncontested Possessions BTN',
 'Effective Disposals BTN',
 'Marks Inside 50 BTN',
 'Centre Clearances BTN',
 'Stoppage Clearances BTN',
 'Score Involvements BTN',
 'Metres Gained BTN',
 'Uncontested Marks BTN',
 'Marks Outside 50 BTN',
 'Tackles Outside 50 BTN',
 'Behind Assists BTN',
 'Ineffective Disposals BTN']

*Second LR*

In [13]:
corr2 = dict()
for col in cols:
    corr2[col] = second_lr_data[[col, 'Brownlow Votes']].corr(method = 'pearson').loc[col]['Brownlow Votes']
corr2

{'Kicks BTN': 0.1680649816551983,
 'Handballs BTN': 0.11410782023284817,
 'Disposals BTN': 0.1926596756665896,
 'Marks BTN': 0.04002990252320002,
 'Goals BTN': 0.1118167497113383,
 'Behinds BTN': 0.06519021127478904,
 'Tackles BTN': 0.031672694163300645,
 'Hitouts BTN': -0.030544084214220304,
 'Goal Assists BTN': 0.08259384485804178,
 'Inside 50s BTN': 0.13029862043562515,
 'Clearances BTN': 0.15623123854389526,
 'Clangers BTN': -0.025923232382053286,
 'Rebound 50s BTN': -0.02314836114480929,
 'Frees For BTN': 0.08207718476473543,
 'Frees Agains BTN': -0.01799170634526773,
 'Contested Possessions BTN': 0.1921419605619774,
 'Uncontested Possessions BTN': 0.0910303167617685,
 'Effective Disposals BTN': 0.17264034262690262,
 'Contested Marks BTN': 0.037821612504544556,
 'Marks Inside 50 BTN': 0.0749445091609433,
 'One Percenters BTN': -0.03729972021031526,
 'Bounces BTN': 0.03641104120931636,
 'Centre Clearances BTN': 0.138043805277034,
 'Stoppage Clearances BTN': 0.13152712739654118,
 'S

In [14]:
sort_corr2 = list(corr2.items())
sort_corr2

[('Kicks BTN', 0.1680649816551983),
 ('Handballs BTN', 0.11410782023284817),
 ('Disposals BTN', 0.1926596756665896),
 ('Marks BTN', 0.04002990252320002),
 ('Goals BTN', 0.1118167497113383),
 ('Behinds BTN', 0.06519021127478904),
 ('Tackles BTN', 0.031672694163300645),
 ('Hitouts BTN', -0.030544084214220304),
 ('Goal Assists BTN', 0.08259384485804178),
 ('Inside 50s BTN', 0.13029862043562515),
 ('Clearances BTN', 0.15623123854389526),
 ('Clangers BTN', -0.025923232382053286),
 ('Rebound 50s BTN', -0.02314836114480929),
 ('Frees For BTN', 0.08207718476473543),
 ('Frees Agains BTN', -0.01799170634526773),
 ('Contested Possessions BTN', 0.1921419605619774),
 ('Uncontested Possessions BTN', 0.0910303167617685),
 ('Effective Disposals BTN', 0.17264034262690262),
 ('Contested Marks BTN', 0.037821612504544556),
 ('Marks Inside 50 BTN', 0.0749445091609433),
 ('One Percenters BTN', -0.03729972021031526),
 ('Bounces BTN', 0.03641104120931636),
 ('Centre Clearances BTN', 0.138043805277034),
 ('Sto

In [15]:
selected_features2 = [col[0] for col in sort_corr2 if col[1] > 0.2]
selected_features2

['Score Involvements BTN', 'Behind Assists BTN']

**2.Trains Models**

0. Demonstration of functions (completely same as those in libraries)

Uses same functions as PoC3_LR(2)-N-BT.ipynb - see that notebook for details

1. Micro Rule of Feature Selection 1: 

*All cols that passed FS_val selected*

In [16]:
# Trains LR model for step 1
traindataf_x_1 = first_lr_data[selected_features1]
traindataf_x_1.index = range(0,len(first_lr_data))
traindataf_y_1 = first_lr_data['Brownlow Votes']
traindataf_y_1.index = range(0,len(first_lr_data))

lm_f_1 = linear_model.LinearRegression()
traindataf_x_1 = traindataf_x_1.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
modelf_1 = lm_f_1.fit(traindataf_x_1, traindataf_y_1)

In [17]:
# Get predictions and observations for step 1
predictionsf_1, testdataf_y_1 = predict1(test_games, lm_f_1, selected_features1, choice)

In [18]:
# Get True Positive/True Negative results for step 1
resultf1_1, resultf2_1 = test1(predictionsf_1, testdataf_y_1, 2)

In [19]:
# TP/TN based on what was predicted for step 1
resultf1_1

[[0.9713494514857812, 0.028650548514218766],
 [0.3915574963609898, 0.6084425036390102]]

In [20]:
# TP/TN based on what was observed for step 1
resultf2_1

[[0.9713494514857812, 0.028650548514218766],
 [0.3915574963609898, 0.6084425036390102]]

In [21]:
# Only the True Positive Values for step 1
return_tp(resultf1_1)

(0.9713494514857812, 0.6084425036390102)

In [22]:
# Trains LR model for step 2
traindatas_x_1 = second_lr_data[selected_features2]
traindatas_x_1.index = range(0,len(second_lr_data))
traindatas_y_1 = second_lr_data['Brownlow Votes']
traindatas_y_1.index = range(0,len(second_lr_data))

lm_s_1 = linear_model.LinearRegression()
traindatas_x_1 = traindatas_x_1.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
models_1 = lm_s_1.fit(traindatas_x_1, traindatas_y_1)

In [23]:
# Get predictions and observations for step 2
predictionss_1, testdatas_y_1 = predict2(test_games, lm_s_1, selected_features2, choice)

In [24]:
# Get True Positive/True Negative results for step 2
results1_1, results2_1 = test2(predictionss_1, testdatas_y_1, 3)

In [25]:
# TP/TN based on what was predicted for step 2
results1_1

[[0.42358078602620086, 0.3406113537117904, 0.23580786026200873],
 [0.31877729257641924, 0.34934497816593885, 0.3318777292576419],
 [0.2576419213973799, 0.31004366812227074, 0.43231441048034935]]

In [26]:
# TP/TN based on what was observed for step 2
results2_1

[[0.42358078602620086, 0.31877729257641924, 0.2576419213973799],
 [0.3406113537117904, 0.34934497816593885, 0.31004366812227074],
 [0.23580786026200873, 0.3318777292576419, 0.43231441048034935]]

In [27]:
# Only the True Positive Values for step 2
return_tp(results1_1)

(0.42358078602620086, 0.34934497816593885, 0.43231441048034935)

2. Micro Rule of Feature Selection 2

*-For those with dependency/triangle relationships (i.e. A=Disposals/B=Kicks/C=Handballs), if A comes first then B, C excluded. If B or C comes first then A excluded*

In [28]:
# Selects feature according to micro FS_Rule for step-1
selected_features1_2 = feature_selection2(selected_features1, 2, False)
selected_features1_2

['Kicks BTN',
 'Handballs BTN',
 'Marks BTN',
 'Goals BTN',
 'Tackles BTN',
 'Goal Assists BTN',
 'Inside 50s BTN',
 'Clearances BTN',
 'Frees For BTN',
 'Contested Possessions BTN',
 'Uncontested Possessions BTN',
 'Effective Disposals BTN',
 'Metres Gained BTN',
 'Behind Assists BTN',
 'Ineffective Disposals BTN']

In [29]:
# Selects feature according to micro FS_Rule for step-2
selected_features2_2 = feature_selection2(selected_features2, 2, False)
selected_features2_2
#All other operations hereonin same as 1.

['Score Involvements BTN']

In [30]:
traindataf_x_2 = first_lr_data[selected_features1_2]
traindataf_x_2.index = range(0,len(first_lr_data))
traindataf_y_2 = first_lr_data['Brownlow Votes']
traindataf_y_2.index = range(0,len(first_lr_data))

lm_f_2 = linear_model.LinearRegression()
traindataf_x_2 = traindataf_x_2.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
modelf_2 = lm_f_2.fit(traindataf_x_2, traindataf_y_2)

predictionsf_2, testdataf_y_2 = predict1(test_games, lm_f_2, selected_features1_2, choice)

resultf1_2, resultf2_2 = test1(predictionsf_2, testdataf_y_2, 2)

In [31]:
resultf1_2

[[0.9707104057940142, 0.029289594205985727],
 [0.4002911208151383, 0.5997088791848617]]

In [32]:
resultf2_2

[[0.9707104057940142, 0.029289594205985727],
 [0.4002911208151383, 0.5997088791848617]]

In [33]:
return_tp(resultf1_2)

(0.9707104057940142, 0.5997088791848617)

In [34]:
traindatas_x_2 = second_lr_data[selected_features2_2]
traindatas_x_2.index = range(0,len(second_lr_data))
traindatas_y_2 = second_lr_data['Brownlow Votes']
traindatas_y_2.index = range(0,len(second_lr_data))

lm_s_2 = linear_model.LinearRegression()
traindatas_x_2 = traindatas_x_2.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
models_2 = lm_s_2.fit(traindatas_x_2, traindatas_y_2)

predictionss_2, testdatas_y_2 = predict2(test_games, lm_s_2, selected_features2_2, choice)

results1_2, results2_2 = test2(predictionss_2, testdatas_y_2, 3)

In [35]:
results1_2

[[0.43231441048034935, 0.32751091703056767, 0.24017467248908297],
 [0.31877729257641924, 0.38427947598253276, 0.29694323144104806],
 [0.24890829694323144, 0.28820960698689957, 0.462882096069869]]

In [36]:
results2_2

[[0.43231441048034935, 0.31877729257641924, 0.24890829694323144],
 [0.32751091703056767, 0.38427947598253276, 0.28820960698689957],
 [0.24017467248908297, 0.29694323144104806, 0.462882096069869]]

In [37]:
return_tp(results1_2)

(0.43231441048034935, 0.38427947598253276, 0.462882096069869)

3. Micro Rule of Feature Selection 3: 

*All cols that passed FS_val selected but abandon all 'summary' cols such as Disposal/Tackles/Marks*

In [38]:
selected_features1_3 = feature_selection2(selected_features1, 3, False)
selected_features1_3

['Kicks BTN',
 'Handballs BTN',
 'Goals BTN',
 'Goal Assists BTN',
 'Inside 50s BTN',
 'Frees For BTN',
 'Contested Possessions BTN',
 'Uncontested Possessions BTN',
 'Effective Disposals BTN',
 'Marks Inside 50 BTN',
 'Centre Clearances BTN',
 'Stoppage Clearances BTN',
 'Metres Gained BTN',
 'Uncontested Marks BTN',
 'Marks Outside 50 BTN',
 'Tackles Outside 50 BTN',
 'Behind Assists BTN',
 'Ineffective Disposals BTN']

In [39]:
selected_features2_3 = feature_selection2(selected_features2, 3, False)
selected_features2_3

['Behind Assists BTN']

In [40]:
traindataf_x_3 = first_lr_data[selected_features1_3]
traindataf_x_3.index = range(0,len(first_lr_data))
traindataf_y_3 = first_lr_data['Brownlow Votes']
traindataf_y_3.index = range(0,len(first_lr_data))

lm_f_3 = linear_model.LinearRegression()
traindataf_x_3 = traindataf_x_3.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
modelf_3 = lm_f_3.fit(traindataf_x_3, traindataf_y_3)

predictionsf_3, testdataf_y_3 = predict1(test_games, lm_f_3, selected_features1_3, choice)

resultf1_3, resultf2_3 = test1(predictionsf_3, testdataf_y_3, 2)

In [41]:
resultf1_3

[[0.9714559591010757, 0.02854404089892427],
 [0.3901018922852984, 0.6098981077147017]]

In [42]:
resultf2_3

[[0.9714559591010757, 0.02854404089892427],
 [0.3901018922852984, 0.6098981077147017]]

In [43]:
return_tp(resultf1_3)

(0.9714559591010757, 0.6098981077147017)

In [44]:
traindatas_x_3 = second_lr_data[selected_features2_3]
traindatas_x_3.index = range(0,len(second_lr_data))
traindatas_y_3 = second_lr_data['Brownlow Votes']
traindatas_y_3.index = range(0,len(second_lr_data))

lm_s_3 = linear_model.LinearRegression()
traindatas_x_3 = traindatas_x_3.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
models_3 = lm_s_3.fit(traindatas_x_3, traindatas_y_3)

predictionss_3, testdatas_y_3 = predict2(test_games, lm_s_3, selected_features2_3, choice)

results1_3, results2_3 = test2(predictionss_3, testdatas_y_3, 3)

In [45]:
results1_3

[[0.4148471615720524, 0.33624454148471616, 0.24890829694323144],
 [0.3624454148471616, 0.3537117903930131, 0.2838427947598253],
 [0.22270742358078602, 0.31004366812227074, 0.4672489082969432]]

In [46]:
results2_3

[[0.4148471615720524, 0.3624454148471616, 0.22270742358078602],
 [0.33624454148471616, 0.3537117903930131, 0.31004366812227074],
 [0.24890829694323144, 0.2838427947598253, 0.4672489082969432]]

In [47]:
return_tp(results1_3)

(0.4148471615720524, 0.3537117903930131, 0.4672489082969432)

4. Micro Rule of Feature Selection 4: 

*Exclude Disposals, otherwise as per rule 2*

In [48]:
selected_features1_4 = feature_selection2(selected_features1, 4, False)
selected_features1_4

['Kicks BTN',
 'Handballs BTN',
 'Marks BTN',
 'Goals BTN',
 'Tackles BTN',
 'Goal Assists BTN',
 'Inside 50s BTN',
 'Clearances BTN',
 'Frees For BTN',
 'Contested Possessions BTN',
 'Uncontested Possessions BTN',
 'Effective Disposals BTN',
 'Metres Gained BTN',
 'Behind Assists BTN',
 'Ineffective Disposals BTN']

In [49]:
selected_features2_4 = feature_selection2(selected_features1, 4, False)
selected_features2_4

['Kicks BTN',
 'Handballs BTN',
 'Marks BTN',
 'Goals BTN',
 'Tackles BTN',
 'Goal Assists BTN',
 'Inside 50s BTN',
 'Clearances BTN',
 'Frees For BTN',
 'Contested Possessions BTN',
 'Uncontested Possessions BTN',
 'Effective Disposals BTN',
 'Metres Gained BTN',
 'Behind Assists BTN',
 'Ineffective Disposals BTN']

In [50]:
traindataf_x_4 = first_lr_data[selected_features1_4]
traindataf_x_4.index = range(0,len(first_lr_data))
traindataf_y_4 = first_lr_data['Brownlow Votes']
traindataf_y_4.index = range(0,len(first_lr_data))

lm_f_4 = linear_model.LinearRegression()
traindataf_x_4 = traindataf_x_4.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
modelf_4 = lm_f_4.fit(traindataf_x_4, traindataf_y_4)

predictionsf_4, testdataf_y_4 = predict1(test_games, lm_f_4, selected_features1_4, choice)

resultf1_4, resultf2_4 = test1(predictionsf_4, testdataf_y_4, 2)

In [51]:
resultf1_4

[[0.9707104057940142, 0.029289594205985727],
 [0.4002911208151383, 0.5997088791848617]]

In [52]:
resultf2_4

[[0.9707104057940142, 0.029289594205985727],
 [0.4002911208151383, 0.5997088791848617]]

In [53]:
return_tp(resultf1_4)

(0.9707104057940142, 0.5997088791848617)

In [54]:
traindatas_x_4 = second_lr_data[selected_features2_4]
traindatas_x_4.index = range(0,len(second_lr_data))
traindatas_y_4 = second_lr_data['Brownlow Votes']
traindatas_y_4.index = range(0,len(second_lr_data))

lm_s_4 = linear_model.LinearRegression()
traindatas_x_4 = traindatas_x_4.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
models_4 = lm_s_4.fit(traindatas_x_4, traindatas_y_4)

predictionss_4, testdatas_y_4 = predict2(test_games, lm_s_4, selected_features2_4, choice)

results1_4, results2_4 = test2(predictionss_4, testdatas_y_4, 3)

In [55]:
results1_4

[[0.5327510917030568, 0.3318777292576419, 0.13537117903930132],
 [0.2838427947598253, 0.3930131004366812, 0.3231441048034934],
 [0.18340611353711792, 0.27510917030567683, 0.5414847161572053]]

In [56]:
results2_4

[[0.5327510917030568, 0.2838427947598253, 0.18340611353711792],
 [0.3318777292576419, 0.3930131004366812, 0.27510917030567683],
 [0.13537117903930132, 0.3231441048034934, 0.5414847161572053]]

In [57]:
return_tp(results1_4)

(0.5327510917030568, 0.3930131004366812, 0.5414847161572053)

**3. Summary Observations**

0. Demonstration of functions (completely same as those in libraries)

Uses same functions as PoC3_LR(2)-N-BT.ipynb - see that notebook for details

1. Emperical Experiment

In [58]:
leaderboard1 = wholeseason(final_test_games, lm_f_1, lm_s_1, selected_features1, selected_features2, choice)
leaderboard2 = wholeseason(final_test_games, lm_f_2, lm_s_2, selected_features1_2, selected_features2_2, choice)
leaderboard3 = wholeseason(final_test_games, lm_f_3, lm_s_3, selected_features1_3, selected_features2_3, choice)
leaderboard4 = wholeseason(final_test_games, lm_f_4, lm_s_4, selected_features1_4, selected_features2_4, choice)

In [59]:
leaderboard1[0:15]

[('Clayton Oliver', 31),
 ('Oliver Wines', 30),
 ('Jack Steele', 30),
 ('Sam Walsh', 26),
 ('Darcy Parish', 26),
 ('Marcus Bontempelli', 26),
 ('Jarryd Lyons', 25),
 ('Tom Mitchell', 25),
 ('Jackson Macrae', 24),
 ('Touk Miller', 21),
 ('Christian Petracca', 20),
 ('Cameron Guthrie', 20),
 ('Callum Mills', 19),
 ('Dayne Zorko', 19),
 ('Luke Parker', 19)]

In [60]:
leaderboard2[0:15]

[('Clayton Oliver', 30),
 ('Oliver Wines', 30),
 ('Jack Steele', 30),
 ('Jarryd Lyons', 28),
 ('Sam Walsh', 26),
 ('Darcy Parish', 26),
 ('Marcus Bontempelli', 26),
 ('Jackson Macrae', 25),
 ('Christian Petracca', 25),
 ('Tom Mitchell', 22),
 ('Cameron Guthrie', 21),
 ('Callum Mills', 21),
 ('Dayne Zorko', 20),
 ('Rory Laird', 19),
 ('Touk Miller', 19)]

In [61]:
leaderboard3[0:15]

[('Oliver Wines', 31),
 ('Jack Steele', 31),
 ('Clayton Oliver', 29),
 ('Darcy Parish', 27),
 ('Jarryd Lyons', 25),
 ('Jackson Macrae', 25),
 ('Sam Walsh', 25),
 ('Marcus Bontempelli', 23),
 ('Cameron Guthrie', 22),
 ('Tom Mitchell', 22),
 ('Christian Petracca', 22),
 ('Touk Miller', 22),
 ('Callum Mills', 21),
 ('Rory Laird', 20),
 ('Dayne Zorko', 19)]

In [62]:
leaderboard4[0:15]

[('Jack Steele', 37),
 ('Oliver Wines', 36),
 ('Clayton Oliver', 30),
 ('Darcy Parish', 30),
 ('Jackson Macrae', 29),
 ('Jarryd Lyons', 27),
 ('Tom Mitchell', 27),
 ('Sam Walsh', 26),
 ('Marcus Bontempelli', 25),
 ('Touk Miller', 24),
 ('Rory Laird', 23),
 ('Christian Petracca', 23),
 ('Cameron Guthrie', 21),
 ('Callum Mills', 20),
 ('Taylor Adams', 20)]

2. Predictor's r scores

In [63]:
print(lm_f_1.score(traindataf_x_1, traindataf_y_1))
print(lm_s_1.score(traindatas_x_1, traindatas_y_1))
print(lm_f_2.score(traindataf_x_2, traindataf_y_2))
print(lm_s_2.score(traindatas_x_2, traindatas_y_2))
print(lm_f_3.score(traindataf_x_3, traindataf_y_3))
print(lm_s_3.score(traindatas_x_3, traindatas_y_3))
print(lm_f_4.score(traindataf_x_4, traindataf_y_4))
print(lm_s_4.score(traindatas_x_4, traindatas_y_4))

0.5847730227366119
0.05801737930048224
0.5782357676007112
0.057988964505007234
0.5799540663740815
0.050570156027257
0.5782357676007112
0.1356742075092373


## Note: A few improvements could be made on this notebook: ##

*1. A good way to get real tp1, tp2, tp3 stats may be to use the actual predicted indexes from predict1 in predict2. Current model only tests lm2's ability to rank correctly given the top 3 players, and assumes it has similar capabilities for any three players*