In [201]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn import metrics
import mord

dataFrame = pd.read_csv("NFLDraftDataCleaned.csv")

In [202]:
#Create new dataframes to hold independent and dependent variables
X = dataFrame[['position', 'age', 'height', 'weight', 'collegeAttended', 'rushingAttempts', 'rushingYards',
                           'avgYardsPerRush', 'rushingTouchdowns', 'receptions', 'receivingYards', 'avgYardsPerReception',
                           'receivingTouchdowns', 'fortyYardDash', 'verticalJump', 'benchPress', 'broadJump', 'threeCone',
                           'shuttle']]
# X = dataFrame[['age', 'height', 'weight', 'rushingAttempts', 'rushingYards',
#                            'avgYardsPerRush', 'rushingTouchdowns', 'receptions', 'receivingYards', 'avgYardsPerReception',
#                            'receivingTouchdowns', 'fortyYardDash', 'verticalJump', 'benchPress', 'broadJump', 'threeCone',
#                            'shuttle']]
y = dataFrame[['draftRound']]

In [203]:
#Some of the dependent variables (position and collegeAttended) are stored as categorical data
#This needs to be converted to numerical data using OneHotEncoder
onehot = pd.get_dummies(X['position'])
X = X.drop('position', axis = 1)
X = X.join(onehot)

onehot = pd.get_dummies(X['collegeAttended'])
X = X.drop('collegeAttended', axis = 1)
X = X.join(onehot)

In [204]:
#Split the dataset into some training data and testing data
#2016 draft data starts at row 794 out of 936
X_Train, X_Test, y_Train, y_Test = train_test_split(X, y, test_size = 0.15, shuffle = False)

In [205]:
#Create a linear regression model and fit it to the train data
linearModel = LinearRegression().fit(X_Train, y_Train)

#Predict the draft round for the test set
unformattedPredictions = linearModel.predict(X_Test)

linearPredictions = []

#There are some errors with our predictions, they are giving wildly higher scores
for prediction in unformattedPredictions:
    if(prediction < 1.5):
        linearPredictions.append(1)
    if(prediction >= 1.5 and prediction < 2.5):
        linearPredictions.append(2)
    if(prediction >= 2.5 and prediction < 3.5):
        linearPredictions.append(3)
    if(prediction >= 3.5 and prediction < 4.5):
        linearPredictions.append(4)
    if(prediction >= 4.5 and prediction < 5.5):
        linearPredictions.append(5)
    if(prediction >= 5.5 and prediction < 6.5):
        linearPredictions.append(6)
    if(prediction >= 6.5):
        linearPredictions.append(7)   
        
print("LINEAR REGRESSION SCORES:")
explainedVarianceScore = metrics.explained_variance_score(y_Test, linearPredictions)
print("Explained variance score = ", explainedVarianceScore)

meanAbsoluteError = metrics.mean_absolute_error(y_Test, linearPredictions)
print("Mean absolute error = ", meanAbsoluteError)

rSquared = metrics.r2_score(y_Test, linearPredictions)
print("R Squared = ", rSquared)

f1Score = metrics.f1_score(y_Test, linearPredictions, average="micro")
print("F1 score = ", f1Score)

print()
print('Confusion Matrix:')
print(metrics.confusion_matrix(y_Test, linearPredictions))

LINEAR REGRESSION SCORES:

Explained variance score =  0.020009293140500484
Mean absolute error =  1.49645390070922
R Squared =  -0.12811465412092704
F1 score =  0.23404255319148937

Confusion Matrix:
[[ 4  7  3  1  0  1  0]
 [ 5  4  4  4  0  0  0]
 [ 2  2  6  7  3  0  0]
 [ 1  5  7 12  4  0  1]
 [ 0  3  7  8  5  2  0]
 [ 2  0  4  3  3  1  0]
 [ 0  3  5  6  5  0  1]]


In [206]:
#Create an ordinal logistic regression model and fit it to the training data
ordinalModel = mord.OrdinalRidge()
ordinalModel.fit(X_Train, y_Train)

#Create some predictions based on the ordinal logistic regression model
ordinalPredictions = ordinalModel.predict(X_Test)

print("ORDINAL LOGISTIC REGRESSION SCORES:")
explainedVarianceScore = metrics.explained_variance_score(y_Test, ordinalPredictions)
print("Explained variance score = ", explainedVarianceScore)

meanAbsoluteError = metrics.mean_absolute_error(y_Test, ordinalPredictions)
print("Mean absolute error = ", meanAbsoluteError)

rSquared = metrics.r2_score(y_Test, ordinalPredictions)
print("RSquared = ", rSquared)

f1Score = metrics.f1_score(y_Test, ordinalPredictions, average="micro")
print("F1 score = ", f1Score)

print()
print('Confusion Matrix:')
print(metrics.confusion_matrix(y_Test, ordinalPredictions))

ORDINAL LOGISTIC REGRESSION SCORES:

Explained variance score =  0.06299006795608975
Mean absolute error =  1.4539007092198581
RSquared =  -0.06259801359121808
F1 score =  0.24113475177304963

Confusion Matrix:
[[ 0  0  0  0  0  0  0  0]
 [ 1  1  6  6  1  0  1  0]
 [ 1  2  4  7  3  0  0  0]
 [ 0  2  2  6  8  2  0  0]
 [ 0  0  4  7 15  3  1  0]
 [ 0  0  3  7  9  6  0  0]
 [ 0  1  1  2  6  2  1  0]
 [ 0  0  3  5  5  6  0  1]]


In [207]:
#Create an multinominal logistic regression model and fit it to the training data
logisticModel = LogisticRegression(solver = "newton-cg", multi_class = "multinomial", max_iter = 4000)
logisticModel.fit(X_Train, y_Train)

#Create some predictions based on the ordinal logistic regression model
logisticPredictions = logisticModel.predict(X_Test)

print("MULTINOMIAL LOGISTIC REGRESSION SCORES:")
explainedVarianceScore = metrics.explained_variance_score(y_Test, logisticPredictions)
print("Explained variance score = ", explainedVarianceScore)

meanAbsoluteError = metrics.mean_absolute_error(y_Test, logisticPredictions)
print("Mean absolute error = ", meanAbsoluteError)

rSquared = metrics.r2_score(y_Test, logisticPredictions)
print("RSquared = ", rSquared)

f1Score = metrics.f1_score(y_Test, logisticPredictions, average="micro")
print("F1 score = ", f1Score)

print()
print('Confusion Matrix:')
print(metrics.confusion_matrix(y_Test, logisticPredictions))

  y = column_or_1d(y, warn=True)


MULTINOMIAL LOGISTIC REGRESSION SCORES:

Explained variance score =  -0.26938491026311207
Mean absolute error =  1.8085106382978724
RSquared =  -0.5580676075971425
F1 score =  0.16312056737588654

Confusion Matrix:
[[10  2  2  0  1  0  1]
 [ 8  3  5  1  0  0  0]
 [ 7  4  3  3  2  1  0]
 [ 4  5  9  3  3  5  1]
 [ 6  1  6  7  1  1  3]
 [ 2  1  2  2  3  2  1]
 [ 1  6  4  3  2  3  1]]
