In [1]:
import sys
sys.path.insert(1,'../models')
sys.path.insert(2,'../features')
import create_model
import build_features
import pandas as pd
import numpy as np
import math
import statistics
import csv
from sklearn.feature_selection import SelectKBest, f_regression

In [2]:
data = pd.read_csv("../../data/processed/passing.csv")
data.head()

Unnamed: 0.1,Unnamed: 0,id,Player,Tm,Age,Pos,G,GS,QBrec,Cmp,...,Y/G,Rate,QBR,Sk,Yds.1,Sk%,NY/A,ANY/A,Cmp/G,Att/G
0,0,AlleBr00,Brandon Allen,DEN,27,QB,3,3,1-2-0,39,...,171.7,68.3,40.1,9,59,9.7,4.9,4.58,13.0,28.0
1,1,AlleBr00,Brandon Allen,CIN,28,QB,5,5,1-4-0,90,...,185.0,82.0,37.5,7,51,4.7,5.87,5.33,18.0,28.4
2,2,AlleBr00,Brandon Allen,,29,QB,6,1,0-1-0,17,...,24.8,81.6,8.4,4,33,10.5,3.05,4.11,2.8,5.7
3,3,AlleJo02,Josh Allen,BUF,22,QB,12,11,5-6-0,169,...,172.8,67.9,49.8,28,213,8.0,5.35,4.37,14.1,26.7
4,4,AlleJo02,Josh Allen,BUF,23,QB,16,16,10-6-0,271,...,193.1,85.3,49.4,38,237,7.6,5.72,5.71,16.9,28.8


In [3]:
# Get all player ids
players = data.id.unique()

In [4]:
# For each receiver
offense_type = "passing"
yards_error = []
td_error = []
non_rookies = []
yards = []
tds = []
for player in players:
    # Get their career data
    career_data = build_features.get_model_data(offense_type,player)
    if isinstance(career_data,str):
        continue
    non_rookies.append(player)
    
    # Split data into training and testing data
    numberOfYears = len(career_data.index)
    training_data = career_data.iloc[0:numberOfYears-1]
    testing_data = career_data.iloc[numberOfYears-1]
    
    # Prepare training data
    #X_train = training_data[["GS","Cmp","Att","Cmp%","TD%","Y/A","AY/A","Y/C","Y/G","Rate","QBR","NY/A","ANY/A","Cmp/G","Att/G"]]
    X_train = training_data[["GS", "Cmp", "Att", "TD%", "Y/C", "Y/G", "Cmp/G", "Att/G"]]
    y_train = training_data[["Yds","TD"]]

    # Prepare testing data
    X_test = testing_data[["GS", "Cmp", "Att", "TD%", "Y/C", "Y/G", "Cmp/G", "Att/G"]]
    X_test = np.asarray(X_test)
    X_test = X_test.reshape(-1,8)
    
    y_test = testing_data[["Yds","TD"]]
    y_test = np.asarray(y_test)
    y_test = y_test.reshape(-1,2)
    
    mdl = create_model.create_model(X_train,y_train)
    
    # Make prediction on model
    y_pred = mdl.predict(X_test)
    yards.append(y_pred[0][0])
    tds.append(y_pred[0][1])
    
    try:
        yards_error.append((abs(math.floor(round(y_pred[0][0]) - y_test[0][0]))/math.floor(y_test[0][0]))*100)
    except ZeroDivisionError:
        yards_error.append(0)
    
    try:
        td_error.append((abs(math.floor(round(y_pred[0][1]) - y_test[0][1]))/math.floor(y_test[0][1]))*100)
    except ZeroDivisionError:
        td_error.append(0)
    

In [5]:
print(statistics.mean(yards_error))

524.5190521475283


In [6]:
print(statistics.mean(td_error))

335.8265340083663


In [7]:
# Write data to csv file for further processing
filename = "../../data/predictions/passing.csv"
f = open(filename,'w')
writer = csv.writer(f)
for i in range(len(non_rookies)):
    writer.writerow([non_rookies[i],yards[i],tds[i]])

In [9]:
# Write data to csv file for further processing
filename = "../../data/predictions/error_passing.csv"
f = open(filename,'w')
writer = csv.writer(f)
for i in range(len(non_rookies)):
    writer.writerow([non_rookies[i],yards_error[i],td_error[i]])

### Feature Selection Testing

In [32]:
X_train = training_data[["GS","Cmp","Att","Cmp%","TD%","Y/A","AY/A","Y/C","Y/G","Rate","QBR","NY/A","ANY/A","Cmp/G","Att/G"]]
selector = SelectKBest(f_regression,k=8)
y_train = training_data["Yds"]
selector.fit(X_train,y_train)

SelectKBest(k=8, score_func=<function f_regression at 0x7fd490efc3a0>)

In [33]:
print(selector.get_support())

[ True  True  True False  True False False  True  True False False False
 False  True  True]


In [34]:
print(X_train.columns[selector.get_support()])

Index(['GS', 'Cmp', 'Att', 'TD%', 'Y/C', 'Y/G', 'Cmp/G', 'Att/G'], dtype='object')


In [35]:
X_train = training_data[["GS","Cmp","Att","Cmp%","TD%","Y/A","AY/A","Y/C","Y/G","Rate","QBR","NY/A","ANY/A","Cmp/G","Att/G"]]
selector = SelectKBest(f_regression,k=8)
y_train = training_data["TD"]
selector.fit(X_train,y_train)

SelectKBest(k=8, score_func=<function f_regression at 0x7fd490efc3a0>)

In [36]:
print(selector.get_support())

[ True  True  True False  True False False  True  True False False False
 False  True  True]


In [37]:
print(X_train.columns[selector.get_support()])

Index(['GS', 'Cmp', 'Att', 'TD%', 'Y/C', 'Y/G', 'Cmp/G', 'Att/G'], dtype='object')
