## Feature Engineering for Modeling 

### Steps to feature engineer: 
    1. create candidate features 
        * 4 week prior avgerage: points, yds, touches, touchdowns, fumbles, qbr 
        * opp win record, opp avg def pts allowed 
        * isStarter, isHurt 
    2. create incemental refresh schedule 
    3. write final data set to Google Drive

In [1]:
#importing various libraries 
import gspread 
#Service client credential from oauth2client
from oauth2client.service_account import ServiceAccountCredentials
# Print nicely
import pprint
#Create scope
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
#create some credential using that scope and content of startup_funding.json
creds = ServiceAccountCredentials.from_json_keyfile_name('../quickstart/g_sheet_creds.json',scope)
#create gspread authorize using that credential
client = gspread.authorize(creds)
my_email = 'matthewjchristy66@gmail.com'

In [2]:
#data manipulation 
import pandas as pd 
import numpy as np

In [3]:
def read_file(sheet_name):
    out = client.open(sheet_name).sheet1
    out = out.get_all_values()
    out = pd.DataFrame(out, columns = out.pop(0))
    return(out)

In [4]:
#grabbing data for modeling 
passing = read_file(sheet_name = 'passing_processed_step1')
rush = read_file(sheet_name= 'rushing_processed_step1')
rec = read_file(sheet_name = 'receiving_processed_step1')

In [5]:
passing.head()

Unnamed: 0,Name,Week,Team,Opp,Score,Comp,Att,Yds,TD,Int,Sck,FUM,Rate,300yd_flag,passing_fantasy_pts
0,Andy Dalton,1,CIN,@ SEA,L 20-21,35,51,418,2,0,5,2,106.5,1,24.72
1,Dak Prescott,1,DAL,vs NYG,W 35-17,25,32,405,4,0,0,0,158.3,1,32.2
2,Matthew Stafford,1,DET,@ ARI,T 27-27,27,45,385,3,0,3,1,110.0,1,27.4
3,Case Keenum,1,WAS,@ PHI,L 27-32,30,44,380,3,0,1,0,117.6,1,27.2
4,Patrick Mahomes,1,KC,@ JAX,W 40-26,25,33,378,3,0,0,0,143.2,1,27.12


In [6]:
rush.head()

Unnamed: 0,Name,Week,Team,Opp,Score,Att,Yds,Avg,TD,FUM,100yd_flag,rush_fantasy_pts
0,Marlon Mack,1,IND,@ LAC,L 24-30,25,174,7.0,1,0,1,23.4
1,Christian McCaffrey,1,CAR,vs LA,L 27-30,19,128,6.7,2,0,1,24.8
2,Saquon Barkley,1,NYG,@ DAL,L 17-35,11,120,10.9,0,0,1,12.0
3,Dalvin Cook,1,MIN,vs ATL,W 28-12,21,111,5.3,2,0,1,23.1
4,Mark Ingram,1,BAL,@ MIA,W 59-10,14,107,7.6,2,0,1,22.7


In [7]:
rec.head()

Unnamed: 0,Name,Week,Team,Opp,Score,Rec,Yds,Avg,TD,FUM,100yd_flag,rec_fantasy_pts
0,Sammy Watkins,1,KC,@ JAX,W 40-26,9,198,22.0,3,0,1,37.8
1,Michael Gallup,1,DAL,vs NYG,W 35-17,7,158,22.6,0,0,1,15.8
2,John Ross,1,CIN,@ SEA,L 20-21,7,158,22.6,2,0,1,27.8
3,DeSean Jackson,1,PHI,vs WAS,W 32-27,8,154,19.2,2,0,1,27.4
4,Marquise Brown,1,BAL,@ MIA,W 59-10,4,147,36.8,2,0,1,26.7


In [8]:
#creating a moving avg function 
def move_avg(df, smooth_col, group_vals, window):
    l_mean = lambda x: x.rolling(window, 1).mean()
    out = df.groupby(group_vals)[smooth_col].transform(l_mean)
    return(out)

In [9]:
#light cleaning 
pf = ['Comp', 'Att', 'Yds', 'TD', 'Int', 'Sck', 'FUM', 'Rate', 'passing_fantasy_pts']
rrf = ['Yds', 'TD', 'FUM']
passing[pf] = passing[pf].astype(float)
rush[rrf] = rush[rrf].astype(float)
rec[rrf] = rec[rrf].astype(float)
rec['rec_fantasy_pts'] = rec['rec_fantasy_pts'].astype(float)
rush['rush_fantasy_pts'] = rush['rush_fantasy_pts'].astype(float)

### Passing Feature Eng

In [15]:
cols = ['Yds', 'TD', 'Int', 'Rate', 'passing_fantasy_pts']
lag4 = ['lag4_' + ''.join(x) for x in cols]
lag2 = ['lag2_' + ''.join(x) for x in cols]

In [16]:
passing[lag4] = move_avg(df=passing, smooth_col=cols, group_vals='Name', window=4)
passing[lag2] = move_avg(df = passing, smooth_col=cols, group_vals='Name', window=2)

In [29]:
passing['yds_ratio'] = passing.lag4_Yds/passing.lag2_Yds
passing['rate_ratio'] = passing.lag4_Rate/passing.lag2_Rate
passing['fantasy_pts_ratio'] = passing.lag4_passing_fantasy_pts/passing.lag2_passing_fantasy_pts

In [31]:
passing.loc[passing.Name == 'Lamar Jackson', :]

Unnamed: 0,Name,Week,Team,Opp,Score,Comp,Att,Yds,TD,Int,...,lag4_Rate,lag4_passing_fantasy_pts,lag2_Yds,lag2_TD,lag2_Int,lag2_Rate,lag2_passing_fantasy_pts,yds_ratio,rate_ratio,fantasy_pts_ratio
8,Lamar Jackson,1,BAL,@ MIA,W 59-10,17.0,20.0,324.0,5.0,0.0,...,158.3,32.96,324.0,5.0,0.0,158.3,32.96,1.0,1.0,1.0
49,Lamar Jackson,2,BAL,vs ARI,W 23-17,24.0,37.0,272.0,2.0,0.0,...,131.55,25.92,298.0,3.5,0.0,131.55,25.92,1.0,1.0,1.0
89,Lamar Jackson,3,BAL,@ KC,L 28-33,22.0,43.0,267.0,0.0,0.0,...,111.233333,20.84,269.5,1.0,0.0,87.7,14.78,1.067409,1.268339,1.410014
120,Lamar Jackson,4,BAL,vs CLE,L 25-40,24.0,34.0,247.0,3.0,2.0,...,107.45,21.1,257.0,1.5,1.0,83.35,16.28,1.079767,1.289142,1.296069
172,Lamar Jackson,5,BAL,@ PIT,W 26-23,19.0,28.0,161.0,1.0,3.0,...,81.6,15.47,204.0,2.0,2.5,75.5,16.16,1.160539,1.080795,0.957302
204,Lamar Jackson,6,BAL,vs CIN,W 23-17,21.0,33.0,236.0,0.0,0.0,...,76.625,13.11,198.5,0.5,1.5,69.9,9.94,1.147355,1.096209,1.318913
241,Lamar Jackson,7,BAL,@ SEA,W 30-16,9.0,20.0,143.0,0.0,0.0,...,76.325,11.87,189.5,0.0,0.0,77.15,7.58,1.038259,0.989307,1.565963
310,Lamar Jackson,9,BAL,vs NE,W 37-20,17.0,23.0,163.0,1.0,0.0,...,79.225,9.03,153.0,0.5,0.0,88.55,8.12,1.148693,0.894692,1.112069
333,Lamar Jackson,10,BAL,@ CIN,W 49-13,15.0,17.0,223.0,3.0,0.0,...,105.075,11.65,193.0,2.0,0.0,133.0,15.72,0.990933,0.790038,0.741094
364,Lamar Jackson,11,BAL,vs HOU,W 41-7,17.0,24.0,222.0,4.0,0.0,...,118.65,15.51,222.5,3.5,0.0,148.75,22.9,0.84382,0.797647,0.677293


### Rush Feature Eng 

In [None]:
cols = ['Yds', 'Td', 'FUM', '100yd_flag', 'rush_fantasy_pts']
lag4 = ['lag4_' + ''.join(x) for x in cols]
lag2 = ['lag2_' + ''.join(x) for x in cols]

### Receiving Feature Eng 

In [None]:
cols = ['Yds', 'Td', 'FUM', '100yd_flag', 'rec_fantasy_pts']
lag4 = ['lag4_' + ''.join(x) for x in cols]
lag2 = ['lag2_' + ''.join(x) for x in cols]