In [92]:
##Import needed libraries
from math import exp
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [68]:
##Import CSV
df = pd.read_csv ('ffdata.csv')
#Lets get rid of some of the stuff we dont need
#df.drop(['PosRank'])
#First we need to seperate all our data by year, then by position
#We have 10 years of data from 2008 to 2018
#Lets make some training and testing data, lets figure, I like 80% train 20% test for this
yr_train = list(range(2008, 2017))
yr_test = list(range(2017,2019))
x_train = df.loc[df['Year'].isin(yr_train)]
x_test = df.loc[df['Year'].isin(yr_test)]

rb_tr = x_train[x_train['FantPos'] == 'RB']
qb_tr = x_train[x_train['FantPos'] == 'QB']
wr_tr = x_train[x_train['FantPos'] == 'WR']
te_tr = x_train[x_train['FantPos'] == 'TE']

#While we do have to take into account running QB's and high receiving RB's we can generally split the categories
rush_col = ['RushAtt', 'RushYds', 'RushY/A', 'RushTD']
rec_col = ['RecTgt', 'Receptions', 'RecYds', 'RecY/R', 'RecTD']
pass_col = ['PassAtt', 'PassCmp', 'PassYds', 'PassTD', 'PassInt']

rbs = rb_tr.drop(pass_col, axis=1)
wrs = wr_tr.drop(pass_col, axis=1)
tes = te_tr.drop(rush_col+pass_col, axis=1)
qbs = qb_tr.drop(rec_col, axis=1)

rbs.head()

Unnamed: 0,Year,Name,Tm,FantPos,Age,G,GS,RushAtt,RushYds,RushY/A,...,RecTgt,Receptions,RecYds,RecY/R,RecTD,FantPt,PPR,PPG,PPRPG,PosRank
3,2008,DeAngelo Williams,CAR,RB,25.0,16.0,16.0,273.0,1515.0,5.55,...,30.0,22.0,121.0,5.5,2.0,286.0,308.0,17.875,19.25,1.0
6,2008,Michael Turner,ATL,RB,26.0,16.0,16.0,376.0,1699.0,4.52,...,9.0,6.0,41.0,6.83,0.0,276.0,282.0,17.25,17.625,2.0
10,2008,Adrian Peterson,MIN,RB,23.0,16.0,15.0,363.0,1760.0,4.85,...,39.0,21.0,125.0,5.95,0.0,249.0,270.0,15.5625,16.875,3.0
11,2008,Matt Forte,CHI,RB,23.0,16.0,16.0,316.0,1238.0,3.92,...,76.0,63.0,477.0,7.57,4.0,244.0,307.0,15.25,19.1875,4.0
12,2008,Thomas Jones,NYJ,RB,30.0,16.0,14.0,290.0,1312.0,4.52,...,42.0,36.0,207.0,5.75,2.0,242.0,278.0,15.125,17.375,5.0


In [69]:
#so now how do we set up our x_train and y_train, we want to ideally project the next seasons rankings, however players change season to season
#and we don't have the data for retirees, rookies, etc... so instead we can give our program a little "cheat" to account for this by giving it the 
#games started of the year being predicted. 
RBxs = []
WRxs = []
TExs = []
QBxs = []
RBys = []
WRys = []
TEys = []
QBys = []
#These are all of our training x's and y's seperated by position. Each one is a list of a seperate years so we have basically 8 seperate datasets to
#help train on.
count = 0
for pos in [rbs, wrs, tes, qbs]:
    tempx = []
    tempy = []
    #Create our temporary arrays to be replaced by our position arrays defined above
    for year in range(2009,2017):
        #get the data from this year, at the correct position
        players = pos[pos['Year'] == year]

        #get the data from the previous year that will be used as our x
        prev = pos[pos['Year'] == year-1]

        #Only take the players on which we have data from previous years
        players = players.loc[players['Name'].isin(prev['Name'])]

        #Make sure we are only using data that will inform our prediction, not for someone who does not stay around
        prev = prev.loc[prev['Name'].isin(players['Name'])]
        
        #Sort the values by name so we can add to the array
        prev = prev.sort_values(by=['Name'])
        players = players.sort_values(by=['Name'])
        prev = prev.drop(['FantPt'], axis=1)
        
        #make sure we add the year to our list
        tempx.append(prev)
        tempy.append(players['FantPt'])
        

        
    #assign the temp to the correct variable
    if count == 0: 
        RBxs = tempx
        RBys = tempy
    if count == 1: 
        WRxs = tempx
        WRys = tempy
    if count == 2: 
        TExs = tempx
        TEys = tempy
    if count == 0: 
        QBxs = tempx
        QBys = tempy
    count+=1

print(RBxs[0].head())
print(RBys[0].head())


     Year             Name   Tm FantPos   Age     G    GS  RushAtt  RushYds  \
322  2008    Aaron Stecker  NOR      RB  33.0   6.0   0.0      8.0     43.0   
327  2008  Adrian Peterson  CHI      RB  29.0  15.0   0.0     20.0    100.0   
10   2008  Adrian Peterson  MIN      RB  23.0  16.0  15.0    363.0   1760.0   
218  2008   Ahmad Bradshaw  NYG      RB  22.0  15.0   0.0     67.0    355.0   
165  2008      Ahman Green  HOU      RB  31.0   8.0   1.0     74.0    294.0   

     RushY/A  RushTD  RecTgt  Receptions  RecYds  RecY/R  RecTD    PPR  \
322     5.38     0.0    12.0         9.0    52.0    5.78    1.0   25.0   
327     5.00     0.0    10.0         6.0    45.0    7.50    0.0   21.0   
10      4.85    10.0    39.0        21.0   125.0    5.95    0.0  270.0   
218     5.30     1.0     6.0         5.0    42.0    8.40    1.0   57.0   
165     3.97     3.0    14.0        11.0    32.0    2.91    0.0   62.0   

             PPG        PPRPG  PosRank  
322  2.666666667  4.166666667    101.0 

In [100]:
#Turn lists into dataframes
dfRBX1 = pd.DataFrame(RBxs[0])
valuesToDrop = ['Name','FantPos', 'Tm']
dfRBX1F = dfRBX1.drop(valuesToDrop, axis=1)
dfRBX1F = dfRBX1F.fillna(0)
dfRBY1 = pd.DataFrame(RBys[0])
dfRBY1F = dfRBY1.fillna(0)


dfRBX2 = pd.DataFrame(RBxs[1])
dfRBX2F = dfRBX2.drop(valuesToDrop, axis=1)
dfRBX2F = dfRBX2F.fillna(0)
dfRBY2 = pd.DataFrame(RBys[1])
dfRBY2F = dfRBY2.fillna(0)


#Train the model 
reg = LinearRegression().fit(dfRBX1F, dfRBY1F)
reg.score(dfRBX1F, dfRBY1F)

#Test the model
ninePred = reg.predict(dfRBX2F)
#Return accuracy
#r2_score(dfRBY2F, ninePred)

print(len(dfRBY2F))

# dfRBX3 = pd.DataFrame(RBxs[2])
# dfRBX3F = dfRB31.drop(valuesToDrop, axis=1)
# dfRBX3F = dfRBX3F.fillna(0)
# dfRBY3 = pd.DataFrame(RBys[2])
# dfRBY3F = dfRBY1.fillna(0)


# #Train the model 
# reg = LinearRegression().fit(dfRBX1F, dfRBY1F)
# reg.score(dfRBX1F, dfRBY1F)

# #Test the model
# ninePred = reg.predict(dfRBX2F)
print(len(ninePred))
# #Return accuracy
# r2_score(dfRBY2F, ninePred)



112
113


In [None]:
array([[157.75487489],
       [ 19.76999897],
       [130.64254637],
       [  9.81507643],
       [ 22.8995733 ],
       [106.39126728],
       [137.56826518],
       [ 45.22653722],
       [ 74.25915698],
       [ 73.07192594],
       [ 97.68626904],
       [ 17.95367185],
       [ 65.68565628],
       [ 53.39386738],
       [109.15315732],
       [150.06072681],
       [104.58507421],
       [198.87428325],
       [ 20.20103099],
       [ 89.15313664],
       [ 12.94364394],
       [ 76.12908506],
       [ 34.79685533],
       [ 87.30165653],
       [123.67547126],
       [122.08061742],
       [ 98.83150426],
       [ 98.50320753],
       [ 14.93011028],
       [ 35.79953384],
       [114.14214697],
       [161.92521728],
       [ 76.6941152 ],
       [ 40.33552704],
       [ 31.56126822],
       [ -1.86496106],
       [ 18.89469872],
       [  8.79524436],
       [ 16.33611273],
       [134.65672557],
       [125.17942222],
       [ 47.00034053],
       [ 24.94055109],
       [ 75.9620408 ],
       [ 31.24904197],
       [144.39294718],
       [ 25.45600222],
       [153.28196412],
       [131.20009837],
       [100.34742877],
       [124.16296484],
       [ 52.60354386],
       [ 47.93874027],
       [140.60745091],
       [155.73555991],
       [ 22.62181089],
       [ 88.40989584],
       [ 54.90204714],
       [ 85.45126974],
       [116.86652705],
       [ 16.3758266 ],
       [ 23.47280798],
       [132.8909211 ],
       [ 86.56997087],
       [ 54.65600126],
       [ 74.94368668],
       [ 20.6211467 ],
       [ 10.59307831],
       [ 99.06682038],
       [ 99.04545696],
       [140.94971535],
       [174.56863775],
       [ 68.694199  ],
       [ 66.55048975],
       [ 29.37474149],
       [ 78.18360434],
       [ 43.90467755],
       [104.9478429 ],
       [ 98.30888165],
       [ 19.20393552],
       [ 52.17181309],
       [ 59.42388112],
       [  1.01221068],
       [ 12.96106067],
       [ 69.49433496],
       [  3.38896548],
       [ 38.57646255],
       [ 12.90574268],
       [ 49.63862468],
       [ 23.74268193],
       [ 38.9377296 ],
       [132.35095044],
       [ 36.0589733 ],
       [ 71.248119  ],
       [ 69.05874902],
       [136.12658585],
       [183.44462294],
       [135.5216142 ],
       [ 71.19105535],
       [105.95263764],
       [116.8279114 ],
       [ 45.41007261],
       [ 72.6180043 ],
       [139.29331608],
       [180.5530358 ],
       [ 70.13192557],
       [107.50514349],
       [ 52.43278496],
       [105.09474082],
       [-41.75165607],
       [ 42.53003231],
       [ 41.49116497],
       [ 86.14742181]]) 