In [1]:
##Import needed libraries
from math import exp
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

In [10]:
##Import CSV
df = pd.read_csv ('footballdata.csv')
#Lets get rid of some of the stuff we dont need
#df.drop(['PosRank'])
#First we need to seperate all our data by year, then by position
#We have 10 years of data from 2008 to 2018
#Lets make some training and testing data, lets figure, I like 80% train 20% test for this
yr_train = list(range(2008, 2017))
yr_test = list(range(2017,2019))
x_train = df.loc[df['Year'].isin(yr_train)]
x_test = df.loc[df['Year'].isin(yr_test)]

rb_tr = x_train[x_train['FantPos'] == 'RB']
qb_tr = x_train[x_train['FantPos'] == 'QB']
wr_tr = x_train[x_train['FantPos'] == 'WR']
te_tr = x_train[x_train['FantPos'] == 'TE']

#While we do have to take into account running QB's and high receiving RB's we can generally split the categories
rush_col = ['RushAtt', 'RushYds', 'RushY/A', 'RushTD']
rec_col = ['RecTgt', 'Receptions', 'RecYds', 'RecY/R', 'RecTD']
pass_col = ['PassAtt', 'PassCmp', 'PassYds', 'PassTD', 'PassInt']

rbs = rb_tr.drop(pass_col, axis=1)
wrs = wr_tr.drop(pass_col, axis=1)
tes = te_tr.drop(rush_col+pass_col, axis=1)
qbs = qb_tr.drop(rec_col, axis=1)

rbs.head()

Unnamed: 0,Year,Name,Tm,FantPos,Age,Games,GamesStarted,RushAtt,RushYds,RushY/A,...,RecTgt,Receptions,RecYds,RecY/R,RecTD,FantPt,PPR,PPG,PPRPG,PosRank
3,2008,DeAngelo Williams,CAR,RB,25.0,16.0,16.0,273.0,1515.0,5.55,...,30.0,22.0,121.0,5.5,2.0,286.0,308.0,17.875,19.25,1.0
6,2008,Michael Turner,ATL,RB,26.0,16.0,16.0,376.0,1699.0,4.52,...,9.0,6.0,41.0,6.83,0.0,276.0,282.0,17.25,17.625,2.0
10,2008,Adrian Peterson,MIN,RB,23.0,16.0,15.0,363.0,1760.0,4.85,...,39.0,21.0,125.0,5.95,0.0,249.0,270.0,15.5625,16.875,3.0
11,2008,Matt Forte,CHI,RB,23.0,16.0,16.0,316.0,1238.0,3.92,...,76.0,63.0,477.0,7.57,4.0,244.0,307.0,15.25,19.1875,4.0
12,2008,Thomas Jones,NYJ,RB,30.0,16.0,14.0,290.0,1312.0,4.52,...,42.0,36.0,207.0,5.75,2.0,242.0,278.0,15.125,17.375,5.0


In [38]:
#so now how do we set up our x_train and y_train, we want to ideally project the next seasons rankings, however players change season to season
#and we don't have the data for retirees, rookies, etc... so instead we can give our program a little "cheat" to account for this by giving it the 
#games started of the year being predicted. 
RBxs = []
WRxs = []
TExs = []
QBxs = []
RBys = []
WRys = []
TEys = []
QBys = []
#These are all of our training x's and y's seperated by position. Each one is a list of a seperate year 
count = 0
for pos in [rbs, wrs, tes, qbs]:
    tempx = []
    tempy = []
    for year in range(2009,2017):
        players = pos[pos['Year'] == year]
        #print("Players is: ", players.head())
        prev = pos[pos['Year'] == year-1]
        #print("Prev is: ", prev.head())
        players = players.loc[players['Name'].isin(prev['Name'])]
        #print("Now players is: ", players.head())
        prev = prev.loc[prev['Name'].isin(players['Name'])]
        prev['Year'].values[:] = year
        prev.sort_values(by=['Name'])
        players.sort_values(by=['Name'])
        prev = prev.drop(['PosRank'], axis=1)
        tempx.append(prev)
        tempy.append(players['Name'])
    if count == 0: 
        RBxs = tempx
        RBys = tempy
    if count == 1: 
        WRxs = tempx
        WRys = tempy
    if count == 2: 
        TExs = tempx
        TEys = tempy
    if count == 0: 
        QBxs = tempx
        QBys = tempy

print(RBxs[0].head())
print(RBys[0].head())

   Year           Name   Tm FantPos   Age  Games  GamesStarted  PassCmp  \
0  2009     Drew Brees  NOR      QB  29.0   16.0          16.0    413.0   
1  2009  Aaron Rodgers  GNB      QB  25.0   16.0          16.0    341.0   
2  2009  Philip Rivers  SDG      QB  27.0   16.0          16.0    312.0   
4  2009     Jay Cutler  DEN      QB  25.0   16.0          16.0    384.0   
5  2009    Kurt Warner  ARI      QB  37.0   16.0          16.0    401.0   

   PassAtt  PassYds  PassTD  PassInt  RushAtt  RushYds  RushY/A  RushTD  \
0    635.0   5069.0    34.0     17.0     22.0     -1.0    -0.05     0.0   
1    536.0   4038.0    28.0     13.0     56.0    207.0     3.70     4.0   
2    478.0   4009.0    34.0     11.0     31.0     84.0     2.71     0.0   
4    616.0   4526.0    25.0     18.0     57.0    200.0     3.51     2.0   
5    598.0   4583.0    30.0     14.0     18.0     -2.0    -0.11     0.0   

   FantPt    PPR      PPG    PPRPG  
0   305.0  305.0  19.0625  19.0625  
1   296.0  296.0     18.