# NHL Playoff Bracket Predictions
### Uses regular season team summary statistics to predict playoff outcomes

In [2]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import linear_model
from sklearn.neural_network import MLPRegressor, MLPClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn import svm

pd.options.display.max_rows = 999
pd.options.display.max_columns = 999

In [3]:
ls

 Volume in drive T is Data
 Volume Serial Number is E01F-4123

 Directory of T:\Dropbox\Projects\NHL\src

03/04/2018  09:32 AM    <DIR>          .
03/04/2018  09:32 AM    <DIR>          ..
03/04/2018  08:59 AM    <DIR>          .ipynb_checkpoints
02/18/2018  11:37 AM                 0 __init__.py
03/04/2018  09:32 AM           104,178 Bracket_2018.ipynb
02/18/2018  11:38 AM            64,187 Bracket_2018_LSTM.ipynb
03/03/2018  05:43 PM    <DIR>          deepNN
03/03/2018  09:54 PM             4,700 iris.csv
02/11/2018  08:02 PM           568,674 Join.ipynb
02/13/2018  07:54 PM             1,836 lstm.py
02/11/2018  07:22 PM             2,501 NHL_Collection.ipynb
02/12/2018  06:57 PM            67,212 NHL_KNN.ipynb
02/12/2018  07:45 PM             8,973 NHL_Predictions_2017.R
03/03/2018  05:33 PM    <DIR>          notebooks
02/12/2018  07:52 PM             1,046 scrap.R
              10 File(s)        823,307 bytes
               5 Dir(s)  767,594,168,320 bytes free


# Read in the raw data from file

In [4]:
# regular season
reg = pd.read_csv('../Data/2000_2017_regular_season.csv')
reg.head()

Unnamed: 0,year,Rk,Team,AvAge,GP,W,L,OL,PTS,PTS%,GF,GA,SOW,SOL,SRS,SOS,TG/G,EVGF,EVGA,PP,PPO,PP%,PPA,PPOA,PK%,SH,SHA,PIM/G,oPIM/G,S,S%,SA,SV%,PDO
0,2017,1,Washington Capitals*,28.4,82,55,19,8,118,0.72,263.0,182,2,5.0,0.99,0.0,5.43,199,130,57,248.0,22.98,44,272.0,83.82,5,3.0,9.0,8.5,2495.0,10.5,2282.0,0.922,102.5
1,2017,2,Pittsburgh Penguins*,28.7,82,50,21,11,111,0.677,282.0,234,4,5.0,0.59,0.01,6.29,213,170,60,260.0,23.08,52,257.0,79.77,5,7.0,8.4,8.7,2745.0,10.1,2671.0,0.914,101.0
2,2017,3,Chicago Blackhawks*,29.4,82,50,23,9,109,0.665,244.0,213,4,1.0,0.32,-0.06,5.57,197,158,42,233.0,18.03,47,211.0,77.73,1,7.0,6.9,7.8,2508.0,9.6,2574.0,0.918,101.3
3,2017,4,Columbus Blue Jackets*,26.2,82,50,24,8,108,0.659,249.0,195,2,2.0,0.68,0.02,5.41,195,152,42,211.0,19.91,39,223.0,82.51,10,2.0,8.6,8.5,2540.0,9.7,2489.0,0.922,101.2
4,2017,5,Minnesota Wild*,28.3,82,49,25,8,106,0.646,266.0,208,3,2.0,0.63,-0.08,5.78,211,166,47,224.0,20.98,37,217.0,82.95,5,3.0,8.0,8.3,2527.0,10.4,2465.0,0.916,101.5


In [5]:
# playoff stats
playoff = pd.read_csv('../Data/2000_2017_playoffs.csv')
playoff.head()

Unnamed: 0,year,Rk,Team,GP,W,L,T,OW,OL,W-L%,G,GA,DIFF
0,2017,1,Pittsburgh Penguins,25,16,9,0,2,2,0.64,77,57,20
1,2017,2,Nashville Predators,22,14,8,0,2,1,0.636,60,48,12
2,2017,3,Ottawa Senators,19,11,8,0,6,2,0.579,47,50,-3
3,2017,4,Anaheim Ducks,17,10,7,0,4,1,0.588,50,52,-2
4,2017,5,Edmonton Oilers,13,7,6,0,1,3,0.538,36,35,1


# Cleanup
* playoff result is the outcome variable, so append playoff result to regular season dataframe
* will need a foreign key relationship of team/year between the two tables

In [6]:
def drop_pun(str_var):
    '''drop the list of chars from string'''
    ignore='!"#$%&\'()*+,-/:;<=>?@[\\]^_`{|}~'
    str_cln=str_var.translate({ord(x): '' for x in ignore})
    return str_cln

In [7]:
reg.Team = reg.Team.apply(drop_pun)

In [8]:
# rename rank columns to more distinct
playoff.rename(columns={"Rk": "playoff_rank"}, inplace=True)
reg.rename(columns={"Rk": "reg_rank"}, inplace=True)

In [9]:
allData = pd.merge(reg, playoff[['year','Team','playoff_rank']], how = 'left', on = ['year', 'Team'])
allData.head()

Unnamed: 0,year,reg_rank,Team,AvAge,GP,W,L,OL,PTS,PTS%,GF,GA,SOW,SOL,SRS,SOS,TG/G,EVGF,EVGA,PP,PPO,PP%,PPA,PPOA,PK%,SH,SHA,PIM/G,oPIM/G,S,S%,SA,SV%,PDO,playoff_rank
0,2017,1,Washington Capitals,28.4,82,55,19,8,118,0.72,263.0,182,2,5.0,0.99,0.0,5.43,199,130,57,248.0,22.98,44,272.0,83.82,5,3.0,9.0,8.5,2495.0,10.5,2282.0,0.922,102.5,6.0
1,2017,2,Pittsburgh Penguins,28.7,82,50,21,11,111,0.677,282.0,234,4,5.0,0.59,0.01,6.29,213,170,60,260.0,23.08,52,257.0,79.77,5,7.0,8.4,8.7,2745.0,10.1,2671.0,0.914,101.0,1.0
2,2017,3,Chicago Blackhawks,29.4,82,50,23,9,109,0.665,244.0,213,4,1.0,0.32,-0.06,5.57,197,158,42,233.0,18.03,47,211.0,77.73,1,7.0,6.9,7.8,2508.0,9.6,2574.0,0.918,101.3,15.0
3,2017,4,Columbus Blue Jackets,26.2,82,50,24,8,108,0.659,249.0,195,2,2.0,0.68,0.02,5.41,195,152,42,211.0,19.91,39,223.0,82.51,10,2.0,8.6,8.5,2540.0,9.7,2489.0,0.922,101.2,13.0
4,2017,5,Minnesota Wild,28.3,82,49,25,8,106,0.646,266.0,208,3,2.0,0.63,-0.08,5.78,211,166,47,224.0,20.98,37,217.0,82.95,5,3.0,8.0,8.3,2527.0,10.4,2465.0,0.916,101.5,14.0


In [10]:
# drop teams that never made playoffs (playoff_rank will be NaN)
allData.playoff_rank.unique()

array([ 6.,  1., 15., 13., 14.,  4.,  9.,  5.,  8.,  7., 11.,  3., 10.,
       12., nan,  2., 16.])

In [11]:
# drop rows that have nan values
allData.dropna(axis=0, inplace=True)

In [12]:
allData.playoff_rank.unique()

array([ 6.,  1., 15., 13., 14.,  4.,  9.,  5.,  8.,  7., 11.,  3., 10.,
       12.,  2., 16.])

#### Scale data by year


In [13]:
scaled = allData.drop(['Team', 'playoff_rank','GP'],axis=1).groupby('year').apply(lambda x: (x - np.mean(x)) / np.std(x)).drop(['year'],axis=1)
scaled.head()

Unnamed: 0,reg_rank,AvAge,W,L,OL,PTS,PTS%,GF,GA,SOW,SOL,SRS,SOS,TG/G,EVGF,EVGA,PP,PPO,PP%,PPA,PPOA,PK%,SH,SHA,PIM/G,oPIM/G,S,S%,SA,SV%,PDO
0,-1.54422,0.279852,2.30482,-2.040732,-0.349784,2.342913,2.345213,1.143264,-2.097775,-0.782994,0.941161,2.486895,0.658553,-0.365061,0.93548,-2.445596,1.323048,0.442393,1.27124,-0.042705,1.190094,0.948279,-0.575086,-1.375992,-0.019429,-0.527525,-0.170873,1.56,-1.193346,1.620383,2.106337
1,-1.344966,0.585145,0.907959,-1.482583,0.769526,1.289073,1.287176,2.186964,1.508318,0.887393,0.941161,0.943138,0.977851,2.441216,1.770331,0.863456,1.835195,1.346005,1.319393,1.779371,0.537988,-0.971986,-0.575086,1.140108,-0.485735,-0.333493,2.161864,0.92,1.821015,0.0,0.392707
2,-1.145712,1.297496,0.907959,-0.924434,0.023319,0.987976,0.99191,0.099564,0.052011,0.887393,-1.289739,-0.098897,-1.257237,0.091775,0.816216,-0.12926,-1.23769,-0.687122,-1.112335,0.640573,-1.461805,-1.93923,-1.990683,1.140108,-1.651498,-1.206637,-0.049571,0.12,1.069362,0.810191,0.735433
3,-0.946457,-1.958964,0.907959,-0.64536,-0.349784,0.837427,0.844277,0.374221,-1.196252,-0.782994,-0.732014,1.290484,1.297149,-0.430323,0.696951,-0.625618,-1.23769,-2.343744,-0.207058,-1.181502,-0.94012,0.327156,1.19441,-2.005017,-0.3303,-0.527525,0.24902,0.28,0.410697,1.620383,0.621191
4,-0.747203,0.178088,0.628587,-0.366285,-0.349784,0.53633,0.524405,1.308059,-0.294729,0.0522,-0.732014,1.097514,-1.895834,0.777029,1.651067,0.53255,-0.384111,-1.364831,0.308179,-1.637021,-1.200963,0.535777,-0.575086,-1.375992,-0.796605,-0.721557,0.127717,1.4,0.224721,0.405096,0.963917


In [14]:
# stitch scaled data back onto original dataframe
allData=pd.concat([allData[['Team','year','playoff_rank']],scaled],axis=1)

# Train/Test split
* train on < 2016 seasons
* test on 2017 season

In [15]:
x_train = allData[allData.year<2017].drop('playoff_rank', axis=1)
y_train = allData[allData.year<2017].playoff_rank

x_test = allData[allData.year==2017].drop('playoff_rank', axis=1)
y_test = allData[allData.year==2017].playoff_rank

In [16]:
x_train.shape

(144, 33)

In [17]:
x_train.head()

Unnamed: 0,Team,year,reg_rank,AvAge,W,L,OL,PTS,PTS%,GF,GA,SOW,SOL,SRS,SOS,TG/G,EVGF,EVGA,PP,PPO,PP%,PPA,PPOA,PK%,SH,SHA,PIM/G,oPIM/G,S,S%,SA,SV%,PDO
30,Washington Capitals,2016,-1.586155,-0.78811,2.427442,-2.587274,-0.531859,2.630351,2.629573,1.397151,-1.428116,0.398137,-0.872357,2.161109,-0.451652,0.272382,1.485446,-0.682185,0.764093,-0.283781,0.972061,-1.222208,0.096566,1.164298,-1.472919,-0.19298,-0.156232,-0.669376,0.228823,1.291544,-0.562472,0.867722,0.975331
31,Dallas Stars,2016,-1.378135,0.78811,0.980023,-1.108832,-0.126633,1.092339,1.098393,2.438505,2.022679,-1.01746,-0.872357,0.884235,0.03011,2.990805,1.948442,1.609956,1.303453,0.356545,1.081858,-0.19298,-0.277993,0.055215,1.472919,3.23778,-0.494032,-0.669376,1.350796,1.660556,-0.159044,-2.169305,-0.701271
32,St. Louis Blues,2016,-1.170115,-0.630488,0.738787,-0.813143,-0.126633,0.8127,0.801299,-0.546711,-0.681998,1.105935,0.396526,0.015961,0.511872,-0.806358,-0.598037,-0.354736,0.044947,-1.098741,0.785884,-0.707594,1.032963,1.156649,-0.73646,-0.19298,0.722046,-0.371875,-0.125484,-0.553519,0.54437,1.084652,-0.314363
33,Pittsburgh Penguins,2016,-0.962094,0.945732,0.49755,-0.221766,-0.531859,0.393242,0.389937,0.911185,-0.495469,0.398137,0.396526,1.13961,0.03011,0.40183,0.790952,-0.027287,-0.494413,0.298334,-0.70831,-0.879132,0.143386,0.88894,1.841149,-0.19298,-0.561592,-0.570209,2.315298,-0.553519,0.513337,0.867722,0.201515
34,Anaheim Ducks,2016,-0.754074,-1.576221,0.015077,-0.517455,0.683818,0.253423,0.252816,-0.963253,-1.521381,-0.309662,0.396526,-0.035114,-1.896937,-1.58305,-1.524029,-0.682185,0.94388,-0.63305,1.42557,-1.393746,1.688441,1.959778,0.36823,-0.879132,2.073244,1.512293,-0.036907,-0.922531,-1.327952,0.650791,-1.475087


In [18]:
x_train.shape, x_test.shape

((144, 33), (16, 33))

# Regression Models

In [19]:
tree_reg=DecisionTreeRegressor()
tree_reg.fit(X=x_train.drop(['year','Team'], axis=1), y=y_train)

DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=None, splitter='best')

### Fit Linear Model

In [20]:
reg = linear_model.LinearRegression()

In [21]:
reg.fit(X=x_train.drop(['year','Team'], axis=1), y=y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

### Fit Neural Network

In [22]:
mlp = MLPRegressor(hidden_layer_sizes=(100,100), max_iter=2000)

In [23]:
mlp.fit(X=x_train.drop(['year','Team'], axis=1), y=y_train)

MLPRegressor(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=2000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

# Classification Models

In [24]:
lr = LogisticRegression()
lr.fit(x_train.drop(['year','Team'], axis=1), y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

### KNN Classification

In [25]:
knn = KNeighborsClassifier(n_neighbors = 5, algorithm = 'auto')
knn.fit(x_train.drop(['year','Team'], axis=1), y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='uniform')

### SVM Classification

In [26]:
SVM = svm.SVC()
SVM.fit(x_train.drop(['year','Team'], axis=1), y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

### NN Clssification

In [27]:
nn_clf = MLPClassifier(hidden_layer_sizes=(100,100), max_iter=2000)
nn_clf.fit(x_train.drop(['year','Team'], axis=1), y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100, 100), learning_rate='constant',
       learning_rate_init=0.001, max_iter=2000, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [28]:
tree = DecisionTreeClassifier(random_state=0)
tree.fit(x_train.drop(['year','Team'], axis=1), y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=0,
            splitter='best')

### Deep Learning

In [29]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

In [30]:
seed = 7
np.random.seed(seed)

In [31]:
def encode(Y):
    # encode class values as integers
    encoder = LabelEncoder()
    encoder.fit(Y)
    encoded_Y = encoder.transform(Y)
    # convert integers to dummy variables (i.e. one hot encoded)
    dummy_y = np_utils.to_categorical(encoded_Y)
    return dummy_y

In [32]:
train_dummy_y = encode(y_train)
test_dummy_y = encode(y_test)

In [59]:
def baseline_model():
    # create model
    model = Sequential()
    model.add(Dense(32, input_dim=31, activation='relu'))
    model.add(Dense(16, activation='sigmoid'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [73]:
estimator = KerasClassifier(build_fn=baseline_model, epochs=50, batch_size=4, verbose=2)

In [74]:
estimator.fit(x=x_train.drop(['year','Team'], axis=1).as_matrix().astype(float),
            y=train_dummy_y)

Epoch 1/50
 - 1s - loss: 2.8593 - acc: 0.0625
Epoch 2/50
 - 0s - loss: 2.7780 - acc: 0.0764
Epoch 3/50
 - 0s - loss: 2.7162 - acc: 0.0972
Epoch 4/50
 - 0s - loss: 2.6661 - acc: 0.1042
Epoch 5/50
 - 0s - loss: 2.6214 - acc: 0.1111
Epoch 6/50
 - 0s - loss: 2.5796 - acc: 0.1319
Epoch 7/50
 - 0s - loss: 2.5386 - acc: 0.1806
Epoch 8/50
 - 0s - loss: 2.4993 - acc: 0.1736
Epoch 9/50
 - 0s - loss: 2.4596 - acc: 0.1875
Epoch 10/50
 - 0s - loss: 2.4206 - acc: 0.2222
Epoch 11/50
 - 0s - loss: 2.3812 - acc: 0.2153
Epoch 12/50
 - 0s - loss: 2.3411 - acc: 0.2222
Epoch 13/50
 - 0s - loss: 2.3015 - acc: 0.2292
Epoch 14/50
 - 0s - loss: 2.2608 - acc: 0.2292
Epoch 15/50
 - 0s - loss: 2.2185 - acc: 0.2361
Epoch 16/50
 - 0s - loss: 2.1771 - acc: 0.2500
Epoch 17/50
 - 0s - loss: 2.1326 - acc: 0.2778
Epoch 18/50
 - 0s - loss: 2.0910 - acc: 0.2778
Epoch 19/50
 - 0s - loss: 2.0427 - acc: 0.3194
Epoch 20/50
 - 0s - loss: 1.9947 - acc: 0.3611
Epoch 21/50
 - 0s - loss: 1.9476 - acc: 0.3889
Epoch 22/50
 - 0s - lo

<keras.callbacks.History at 0x220291597f0>

# 2017 Predictions
* LM, NN are the predictions from linear model, and neural network, respectively.

### Convert Regression to ordered class

In [75]:
# convert regression to ordered classification
def reg_to_ord(array):
    temp = array.argsort()
    ranks = np.empty_like(temp)
    ranks[temp] = np.arange(len(array))
    return ranks+1 # plus 1 so index starts at 1

In [76]:
# determine number clusters
def elbow_plot():
    for k in range(1,17):
        knn = KNeighborsClassifier(n_neighbors = k, algorithm = 'auto')
        predicted = knn.fit(knn.fit(x_train.drop(['year','Team'], axis=1), y_train),
                           n_neighbors = k)

In [77]:
predictions_2017 = pd.DataFrame({'Team': x_test.Team,
                                 'Actual': y_test,
                                 'DNN.CLF': estimator.predict(x_test.drop(['year','Team'], axis=1).as_matrix().astype(float)),
                                 'LM' : reg.predict(X=x_test.drop(['year','Team'], axis=1)),
                                 'NN' : mlp.predict(X=x_test.drop(['year','Team'], axis=1)),
                                 'TREE.REG': tree_reg.predict(X=x_test.drop(['year','Team'], axis=1)),
                                 'KNN': knn.predict(X=x_test.drop(['year','Team'], axis=1)),
                                 'SVM': SVM.predict(X=x_test.drop(['year','Team'], axis=1)),
                                 'NN.CLF': nn_clf.predict(X=x_test.drop(['year','Team'], axis=1)),
                                 'TREE.CLF': tree.predict(X=x_test.drop(['year','Team'], axis=1)),
                                 'LR': lr.predict(X=x_test.drop(['year','Team'], axis=1))})
predictions_2017

Unnamed: 0,Actual,DNN.CLF,KNN,LM,LR,NN,NN.CLF,SVM,TREE.CLF,TREE.REG,Team
0,6.0,3,5.0,4.454686,8.0,4.012233,8.0,2.0,5.0,2.0,Washington Capitals
1,1.0,9,2.0,9.915342,10.0,10.875052,10.0,7.0,16.0,12.0,Pittsburgh Penguins
2,15.0,9,15.0,8.11573,10.0,13.052409,15.0,9.0,16.0,3.0,Chicago Blackhawks
3,13.0,7,2.0,5.104469,8.0,4.21823,8.0,8.0,5.0,3.0,Columbus Blue Jackets
4,14.0,4,1.0,5.507042,5.0,6.825256,2.0,7.0,2.0,2.0,Minnesota Wild
5,4.0,8,4.0,7.905437,6.0,4.8509,9.0,16.0,12.0,6.0,Anaheim Ducks
6,9.0,5,4.0,8.099499,9.0,4.723683,9.0,16.0,9.0,16.0,Montreal Canadiens
7,5.0,13,3.0,8.433143,10.0,7.857594,2.0,3.0,16.0,11.0,Edmonton Oilers
8,8.0,7,1.0,8.858501,13.0,11.540844,8.0,8.0,11.0,16.0,New York Rangers
9,7.0,5,3.0,7.710936,6.0,7.308301,3.0,13.0,6.0,13.0,St. Louis Blues


In [72]:
predictions_2017['Mean']=predictions_2017.drop(['Actual','Team'],axis=1).mean(axis=1)
predictions_2017

Unnamed: 0,Actual,DNN.CLF,KNN,LM,LR,NN,NN.CLF,SVM,TREE.CLF,TREE.REG,Team,Mean
0,6.0,3,5.0,4.454686,8.0,4.012233,8.0,2.0,5.0,2.0,Washington Capitals,4.607435
1,1.0,4,2.0,9.915342,10.0,10.875052,10.0,7.0,16.0,12.0,Pittsburgh Penguins,9.087822
2,15.0,4,15.0,8.11573,10.0,13.052409,15.0,9.0,16.0,3.0,Chicago Blackhawks,10.352015
3,13.0,0,2.0,5.104469,8.0,4.21823,8.0,8.0,5.0,3.0,Columbus Blue Jackets,4.813633
4,14.0,6,1.0,5.507042,5.0,6.825256,2.0,7.0,2.0,2.0,Minnesota Wild,4.148033
5,4.0,15,4.0,7.905437,6.0,4.8509,9.0,16.0,12.0,6.0,Anaheim Ducks,8.972926
6,9.0,3,4.0,8.099499,9.0,4.723683,9.0,16.0,9.0,16.0,Montreal Canadiens,8.758131
7,5.0,9,3.0,8.433143,10.0,7.857594,2.0,3.0,16.0,11.0,Edmonton Oilers,7.810082
8,8.0,7,1.0,8.858501,13.0,11.540844,8.0,8.0,11.0,16.0,New York Rangers,9.377705
9,7.0,12,3.0,7.710936,6.0,7.308301,3.0,13.0,6.0,13.0,St. Louis Blues,7.891026


In [66]:
predictions_2017[['LM','NN','Mean']]=predictions_2017[['LM','NN','Mean']].apply(reg_to_ord)
predictions_2017.sort_values('Actual')

Unnamed: 0,Actual,DNN.CLF,KNN,LM,LR,NN,NN.CLF,SVM,TREE.CLF,TREE.REG,Team,Mean
1,1.0,9,2.0,14,10.0,12,10.0,7.0,16.0,12.0,Pittsburgh Penguins,10
16,2.0,15,9.0,13,16.0,10,16.0,16.0,13.0,2.0,Nashville Predators,15
11,3.0,14,16.0,16,12.0,16,16.0,16.0,9.0,15.0,Ottawa Senators,16
5,4.0,11,4.0,7,6.0,5,9.0,16.0,12.0,6.0,Anaheim Ducks,7
7,5.0,9,3.0,10,10.0,8,2.0,3.0,16.0,11.0,Edmonton Oilers,6
0,6.0,6,5.0,1,8.0,1,8.0,2.0,5.0,2.0,Washington Capitals,3
9,7.0,5,3.0,6,6.0,7,3.0,13.0,6.0,13.0,St. Louis Blues,5
8,8.0,10,1.0,12,13.0,13,8.0,8.0,11.0,16.0,New York Rangers,11
6,9.0,8,4.0,8,9.0,3,9.0,16.0,9.0,16.0,Montreal Canadiens,8
12,10.0,10,9.0,11,5.0,14,9.0,9.0,12.0,10.0,Toronto Maple Leafs,9


In [None]:
percentageError=np.abs(predictions_2017['KNN']-predictions_2017.Actual)/predictions_2017.Actual
percentageError

In [None]:
percentageError.mean()

In [None]:
# evaluate individual models
for x in predictions_2017.drop(['Team','Actual'], axis=1).columns:
    print(x, round((np.abs(predictions_2017[x]-predictions_2017.Actual)/predictions_2017.Actual).mean(),2))

# Bracket Simulator

In [None]:
predictions_2017[['Team','Mean']]

In [None]:
bracket=dict({'A1': 'Montreal Canadiens',
              'A1WC':'New York Rangers',
              'A2': 'Ottawa Senators',
              'A3': 'Boston Bruins',
              'M1':'Washington Capitals',
              'M1WC':'Toronto Maple Leafs', 
              'M2':'Pittsburgh Penguins',
              'M3':'Columbus Blue Jackets',
              'C1':'Chicago Blackhawks',
              'C1WC':'Nashville Predators',
              'C2': 'Minnesota Wild',
              'C3':'St. Louis Blues',
              'P1':'Anaheim Ducks',
              'P1WC':'Calgary Flames',
              'P2':'Edmonton Oilers',
              'P3':'San Jose Sharks'})

In [None]:
for i, key in enumerate(bracket.keys()):
    a1wc = 

In [None]:
def bracket(predictions, bracket_dict):
    for key in bracket_dict.key[0:1]:
        predicts
        