In [51]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score

# Load Data and Remove unused fields
df_2019 = pd.read_csv("./data_v2/yearly/2019.csv")
df_2019 = df_2019.drop(columns=['Unnamed: 0', 'G', 'GS', 'Tgt', 'Yds', 'Yds.1', 'Yds.2', 'Int', 'Att', 'Att.1', 'FumblesLost', 'Cmp', 'Y/R'])
df_2019.head()

Unnamed: 0,Player,Tm,Pos,Age,Rec,Fumbles,PassingYds,PassingTD,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints
0,Christian McCaffrey,CAR,RB,23,116,1,0,0,2,1387,15,287,1005,4,469.2
1,Lamar Jackson,BAL,QB,22,0,9,3127,36,401,1206,7,176,0,0,415.68
2,Derrick Henry,TEN,RB,25,18,5,0,0,0,1540,16,303,206,2,294.6
3,Aaron Jones,GNB,RB,25,49,3,0,0,0,1084,16,236,474,3,314.8
4,Ezekiel Elliott,DAL,RB,24,54,3,0,0,0,1357,12,301,420,2,311.7


In [52]:
# Get total TDs and remove unnecessary fields
df_2019['TDs'] = df_2019['PassingTD'] + df_2019['RushingTD'] + df_2019['ReceivingTD']
df_2019 = df_2019.drop(columns=['PassingTD', 'RushingTD', 'ReceivingTD'])
df_2019.head()

Unnamed: 0,Player,Tm,Pos,Age,Rec,Fumbles,PassingYds,PassingAtt,RushingYds,RushingAtt,ReceivingYds,FantasyPoints,TDs
0,Christian McCaffrey,CAR,RB,23,116,1,0,2,1387,287,1005,469.2,19
1,Lamar Jackson,BAL,QB,22,0,9,3127,401,1206,176,0,415.68,43
2,Derrick Henry,TEN,RB,25,18,5,0,0,1540,303,206,294.6,18
3,Aaron Jones,GNB,RB,25,49,3,0,0,1084,236,474,314.8,19
4,Ezekiel Elliott,DAL,RB,24,54,3,0,0,1357,301,420,311.7,14


In [53]:
# Get total YDS and remove unnecessary fields
df_2019['YDs'] = df_2019['PassingYds'] + df_2019['RushingYds'] + df_2019['ReceivingYds']
df_2019 = df_2019.drop(columns=['PassingYds', 'RushingYds', 'ReceivingYds'])
df_2019.head()

Unnamed: 0,Player,Tm,Pos,Age,Rec,Fumbles,PassingAtt,RushingAtt,FantasyPoints,TDs,YDs
0,Christian McCaffrey,CAR,RB,23,116,1,2,287,469.2,19,2392
1,Lamar Jackson,BAL,QB,22,0,9,401,176,415.68,43,4333
2,Derrick Henry,TEN,RB,25,18,5,0,303,294.6,18,1746
3,Aaron Jones,GNB,RB,25,49,3,0,236,314.8,19,1558
4,Ezekiel Elliott,DAL,RB,24,54,3,0,301,311.7,14,1777


In [54]:
# Get total Touches and remove unnecessary fields
df_2019["Touches"] = df_2019['Rec'] + df_2019['PassingAtt'] + df_2019['RushingAtt']
df_2019 = df_2019.drop(columns=['Rec', 'PassingAtt', 'RushingAtt'])
df_2019.head()

Unnamed: 0,Player,Tm,Pos,Age,Fumbles,FantasyPoints,TDs,YDs,Touches
0,Christian McCaffrey,CAR,RB,23,1,469.2,19,2392,405
1,Lamar Jackson,BAL,QB,22,9,415.68,43,4333,577
2,Derrick Henry,TEN,RB,25,5,294.6,18,1746,321
3,Aaron Jones,GNB,RB,25,3,314.8,19,1558,285
4,Ezekiel Elliott,DAL,RB,24,3,311.7,14,1777,355


In [55]:
# 30% for test, 70% for training
print (0.2*len(df_2019))
print (0.8*len(df_2019))
df_2019.head()

124.0
496.0


Unnamed: 0,Player,Tm,Pos,Age,Fumbles,FantasyPoints,TDs,YDs,Touches
0,Christian McCaffrey,CAR,RB,23,1,469.2,19,2392,405
1,Lamar Jackson,BAL,QB,22,9,415.68,43,4333,577
2,Derrick Henry,TEN,RB,25,5,294.6,18,1746,321
3,Aaron Jones,GNB,RB,25,3,314.8,19,1558,285
4,Ezekiel Elliott,DAL,RB,24,3,311.7,14,1777,355


In [61]:
df_train = df_2019[:496]
df_test = df_2019[-124:]

print("Train:")
df_train.head()
print("Test:")
df_test.head()

Train:
Test:


Unnamed: 0,Player,Tm,Pos,Age,Fumbles,FantasyPoints,TDs,YDs,Touches
496,Bobo Wilson,TAM,WR,24,3,4.5,0,35,3
497,Zach Zenner,3TM,RB,28,0,3.7,0,17,5
498,Josh Adams,NYJ,RB,23,0,1.2,0,12,8
499,Antony Auclair,TAM,TE,26,0,2.1,0,11,1
500,Tony Brooks-James,PIT,RB,25,0,0.7,0,7,8


In [62]:
# save to csv files
df_train.to_csv('./2019_train.csv')
df_test.to_csv('./2019_test.csv')