# Perceptron to predict if an NFL home team will win

In [78]:
import pandas as pd

spreads_and_scores = pd.read_csv('spreads_and_scores_clean.csv')
spreads_and_scores

Unnamed: 0,schedule_season,schedule_week,schedule_playoff,team_home,team_away,spread_favorite,over_under_line,stadium,weather_temperature,weather_wind_mph,weather_humidity,team_favorite_away,team_favorite_home,total_score,did_home_team_win
0,0,1,0,0,0,-3.0,35.0,0,64.0,8.0,66.0,1,0,24.0,1
1,0,1,0,1,1,-1.0,34.0,1,72.0,9.0,81.0,1,0,29.0,0
2,0,1,0,2,2,-6.0,35.0,2,65.0,5.0,77.0,1,0,18.0,1
3,0,1,0,3,3,-3.0,42.0,3,82.0,10.0,58.0,0,1,41.0,0
4,0,1,0,4,4,-9.0,38.0,4,66.0,11.0,70.0,0,1,61.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10438,41,19,1,26,19,-4.0,48.5,51,35.0,0.0,43.0,0,1,35.0,0
10439,41,19,1,3,26,-2.5,54.0,3,35.0,6.0,54.0,0,1,78.0,1
10440,41,19,1,14,14,-3.0,48.0,48,50.0,11.0,69.0,0,1,57.0,0
10441,41,20,1,3,19,-7.0,54.5,3,41.0,4.0,41.0,0,1,51.0,0


We will use the same features for classification as we used with Naive Bayes. These proved to give a solid model 

In [79]:
# favored level will be 0 if slightly favored, 1 if mediumly favored or 2 if favored heavily
def determine_spread_favorite_lvl(spread):
    if spread < 3:
        return 0
    elif spread >= 3 and spread <=7:
        return 1
    else:
        return 2
spreads_and_scores['favored_level'] = spreads_and_scores.apply(lambda row: determine_spread_favorite_lvl(abs(row['spread_favorite'])), axis=1)
spreads_and_scores

Unnamed: 0,schedule_season,schedule_week,schedule_playoff,team_home,team_away,spread_favorite,over_under_line,stadium,weather_temperature,weather_wind_mph,weather_humidity,team_favorite_away,team_favorite_home,total_score,did_home_team_win,favored_level
0,0,1,0,0,0,-3.0,35.0,0,64.0,8.0,66.0,1,0,24.0,1,1
1,0,1,0,1,1,-1.0,34.0,1,72.0,9.0,81.0,1,0,29.0,0,0
2,0,1,0,2,2,-6.0,35.0,2,65.0,5.0,77.0,1,0,18.0,1,1
3,0,1,0,3,3,-3.0,42.0,3,82.0,10.0,58.0,0,1,41.0,0,1
4,0,1,0,4,4,-9.0,38.0,4,66.0,11.0,70.0,0,1,61.0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10438,41,19,1,26,19,-4.0,48.5,51,35.0,0.0,43.0,0,1,35.0,0,1
10439,41,19,1,3,26,-2.5,54.0,3,35.0,6.0,54.0,0,1,78.0,1,0
10440,41,19,1,14,14,-3.0,48.0,48,50.0,11.0,69.0,0,1,57.0,0,1
10441,41,20,1,3,19,-7.0,54.5,3,41.0,4.0,41.0,0,1,51.0,0,1


In [80]:
features = spreads_and_scores[['team_favorite_home', 'favored_level']]
features.head()

Unnamed: 0,team_favorite_home,favored_level
0,0,1
1,0,0
2,0,1
3,1,1
4,1,2


In [81]:
labels = spreads_and_scores['did_home_team_win']
labels.head()

0    1
1    0
2    1
3    0
4    0
Name: did_home_team_win, dtype: int64

In [82]:
from sklearn.model_selection import train_test_split

train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.2, random_state=3)

In [83]:
from sklearn.linear_model import Perceptron

default_ppn = Perceptron()
default_ppn.fit(train_features, train_labels)
train_score = default_ppn.score(train_features, train_labels)
print("Accuracy on the test data is {:0.2f}%".format(train_score*100))

Accuracy on the test data is 65.72%


In [84]:
from sklearn.metrics import accuracy_score
test_score = accuracy_score(test_labels, default_ppn.predict(test_features))
print("Accuracy on the test data is {:0.2f}%".format(test_score*100))

Accuracy on the test data is 66.01%


Now we check to see if we can tune the hyperparameters for the Perceptron. 

In [85]:

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from numpy import mean
from numpy import std

ppn = Perceptron()
cv = RepeatedStratifiedKFold(n_repeats=3)
scores = cross_val_score(estimator=ppn, X=train_features, y=train_labels, scoring='accuracy', cv=cv, n_jobs=-1)
print('Mean Accuracy: %.3f (%.3f)' % (mean(scores), std(scores)))

Mean Accuracy: 0.573 (0.084)


In [86]:
from sklearn.model_selection import GridSearchCV

# define grid
grid = dict()
grid['eta0'] = [0.0001, 0.001, 0.01, 0.1, 1.0]
# define search
search = GridSearchCV(ppn, grid, scoring='accuracy', cv=cv, n_jobs=-1)
# perform the search
results = search.fit(train_features, train_labels)
# summarize
print('Mean Accuracy: %.3f' % results.best_score_)
print('Config: %s' % results.best_params_)


Mean Accuracy: 0.595
Config: {'eta0': 1.0}


In [87]:
best_ppn = Perceptron(eta0=0.1)
best_ppn.fit(train_features, train_labels)
train_score = best_ppn.score(train_features, train_labels)
print("Accuracy on the test data is {:0.2f}%".format(train_score*100))

Accuracy on the test data is 57.06%


In [88]:
test_score = accuracy_score(test_labels, best_ppn.predict(test_features))
print("Accuracy on the test data is {:0.2f}%".format(test_score*100))

Accuracy on the test data is 57.78%


It seems that trying to tune it gives us a worse model. 