In [14]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score

# Load Data and Remove unused fields
df_2019 = pd.read_csv("./data_v2/yearly/2019.csv")
df_2019 = df_2019.drop(columns=['Unnamed: 0', 'G', 'GS', 'Tgt', 'Yds', 'Yds.1', 'Yds.2', 'Int', 'Att', 'Att.1', 'FumblesLost', 'Cmp', 'Y/R'])
df_2019.head()

Unnamed: 0,Player,Tm,Pos,Age,Rec,Fumbles,PassingYds,PassingTD,PassingAtt,RushingYds,RushingTD,RushingAtt,ReceivingYds,ReceivingTD,FantasyPoints
0,Christian McCaffrey,CAR,RB,23,116,1,0,0,2,1387,15,287,1005,4,469.2
1,Lamar Jackson,BAL,QB,22,0,9,3127,36,401,1206,7,176,0,0,415.68
2,Derrick Henry,TEN,RB,25,18,5,0,0,0,1540,16,303,206,2,294.6
3,Aaron Jones,GNB,RB,25,49,3,0,0,0,1084,16,236,474,3,314.8
4,Ezekiel Elliott,DAL,RB,24,54,3,0,0,0,1357,12,301,420,2,311.7


In [15]:
# Get total TDs and remove unnecessary fields
df_2019['TDs'] = df_2019['PassingTD'] + df_2019['RushingTD'] + df_2019['ReceivingTD']
df_2019 = df_2019.drop(columns=['PassingTD', 'RushingTD', 'ReceivingTD'])
df_2019.head()

Unnamed: 0,Player,Tm,Pos,Age,Rec,Fumbles,PassingYds,PassingAtt,RushingYds,RushingAtt,ReceivingYds,FantasyPoints,TDs
0,Christian McCaffrey,CAR,RB,23,116,1,0,2,1387,287,1005,469.2,19
1,Lamar Jackson,BAL,QB,22,0,9,3127,401,1206,176,0,415.68,43
2,Derrick Henry,TEN,RB,25,18,5,0,0,1540,303,206,294.6,18
3,Aaron Jones,GNB,RB,25,49,3,0,0,1084,236,474,314.8,19
4,Ezekiel Elliott,DAL,RB,24,54,3,0,0,1357,301,420,311.7,14


In [16]:
# Get total YDS and remove unnecessary fields
df_2019['YDs'] = df_2019['PassingYds'] + df_2019['RushingYds'] + df_2019['ReceivingYds']
df_2019 = df_2019.drop(columns=['PassingYds', 'RushingYds', 'ReceivingYds'])
df_2019.head()

Unnamed: 0,Player,Tm,Pos,Age,Rec,Fumbles,PassingAtt,RushingAtt,FantasyPoints,TDs,YDs
0,Christian McCaffrey,CAR,RB,23,116,1,2,287,469.2,19,2392
1,Lamar Jackson,BAL,QB,22,0,9,401,176,415.68,43,4333
2,Derrick Henry,TEN,RB,25,18,5,0,303,294.6,18,1746
3,Aaron Jones,GNB,RB,25,49,3,0,236,314.8,19,1558
4,Ezekiel Elliott,DAL,RB,24,54,3,0,301,311.7,14,1777


In [17]:
# Get total Touches and remove unnecessary fields
df_2019["Touches"] = df_2019['Rec'] + df_2019['PassingAtt'] + df_2019['RushingAtt']
df_2019 = df_2019.drop(columns=['Rec', 'PassingAtt', 'RushingAtt'])
df_2019.head()

Unnamed: 0,Player,Tm,Pos,Age,Fumbles,FantasyPoints,TDs,YDs,Touches
0,Christian McCaffrey,CAR,RB,23,1,469.2,19,2392,405
1,Lamar Jackson,BAL,QB,22,9,415.68,43,4333,577
2,Derrick Henry,TEN,RB,25,5,294.6,18,1746,321
3,Aaron Jones,GNB,RB,25,3,314.8,19,1558,285
4,Ezekiel Elliott,DAL,RB,24,3,311.7,14,1777,355


In [18]:
# let's remove any player without at least 50 touches
df_2019.drop(df_2019[df_2019.Touches < 10].index, inplace=True)
df_2019.sort_values(by='Touches', ascending=False)

Unnamed: 0,Player,Tm,Pos,Age,Fumbles,FantasyPoints,TDs,YDs,Touches
29,Jameis Winston,TAM,QB,25,12,297.36,34,5359,685
59,Carson Wentz,PHI,QB,27,16,271.86,28,4282,669
78,Jared Goff,LAR,QB,25,10,247.52,24,4678,659
68,Matt Ryan,ATL,QB,34,9,265.34,27,4613,650
11,Dak Prescott,DAL,QB,26,6,335.78,33,5179,648
...,...,...,...,...,...,...,...,...,...
440,Taiwan Jones,HOU,RB,31,0,5.90,0,49,10
329,Kalif Raymond,TEN,WR,25,1,29.50,1,165,10
401,D'Ernest Johnson,CLE,RB,23,0,15.20,0,92,10
387,Deonte Harris,NOR,WR,22,3,9.50,0,55,10


In [19]:
# 30% for test, 70% for training
print (0.2*len(df_2019))
print (0.8*len(df_2019))
df_2019 = df_2019.sample(frac=1).reset_index(drop=True)
df_2019.head()

77.60000000000001
310.40000000000003


Unnamed: 0,Player,Tm,Pos,Age,Fumbles,FantasyPoints,TDs,YDs,Touches
0,Cooper Kupp,LAR,WR,26,3,270.5,10,1165,97
1,Kareem Hunt,CLE,RB,24,1,101.4,3,464,80
2,C.J. Ham,MIN,RB,26,1,37.6,1,166,24
3,Trey Quinn,WAS,WR,24,1,49.8,1,198,26
4,Travis Homer,SEA,RB,21,0,28.0,0,170,29


In [20]:
df_train = df_2019[:153]
df_test = df_2019[-38:]

print("Train:")
df_train.head()
print("Test:")
df_test.head()

Train:
Test:


Unnamed: 0,Player,Tm,Pos,Age,Fumbles,FantasyPoints,TDs,YDs,Touches
350,Dallas Goedert,PHI,TE,24,2,144.7,5,607,58
351,O.J. Howard,TAM,TE,25,1,83.9,1,459,34
352,Jameis Winston,TAM,QB,25,12,297.36,34,5359,685
353,DeAndre Hopkins,HOU,WR,27,0,266.54,8,1189,108
354,Jaylen Samuels,PIT,RB,23,1,104.4,2,515,118


In [21]:
# save to csv files
df_train.to_csv('./2019_train.csv')
df_test.to_csv('./2019_test.csv')