# Feature Engineering for Receiving Data 

In [1]:
#importing various libraries 
import gspread 
#Service client credential from oauth2client
from oauth2client.service_account import ServiceAccountCredentials
# Print nicely
import pprint
#Create scope
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
#create some credential using that scope and content of startup_funding.json
creds = ServiceAccountCredentials.from_json_keyfile_name('../quickstart/g_sheet_creds.json',scope)
#create gspread authorize using that credential
client = gspread.authorize(creds)
my_email = 'matthewjchristy66@gmail.com'

In [2]:
#data manipulation 
import pandas as pd 
import numpy as np

In [3]:
def read_file(sheet_name):
    out = client.open(sheet_name).sheet1
    out = out.get_all_values()
    out = pd.DataFrame(out, columns = out.pop(0))
    return(out)

In [4]:
rec = read_file(sheet_name = 'receiving_processed_step1')

In [5]:
rrf = ['Yds', 'TD', 'FUM']
rec[rrf] = rec[rrf].astype(float)
rec['rec_fantasy_pts'] = rec['rec_fantasy_pts'].astype(float)

In [6]:
rec.head()

Unnamed: 0,Name,Week,Team,Opp,Score,Rec,Yds,Avg,TD,FUM,100yd_flag,rec_fantasy_pts
0,Sammy Watkins,1,KC,@ JAX,W 40-26,9,198.0,22.0,3.0,0.0,1,37.8
1,Michael Gallup,1,DAL,vs NYG,W 35-17,7,158.0,22.6,0.0,0.0,1,15.8
2,John Ross,1,CIN,@ SEA,L 20-21,7,158.0,22.6,2.0,0.0,1,27.8
3,DeSean Jackson,1,PHI,vs WAS,W 32-27,8,154.0,19.2,2.0,0.0,1,27.4
4,Marquise Brown,1,BAL,@ MIA,W 59-10,4,147.0,36.8,2.0,0.0,1,26.7


### Feature Eng

In [7]:
cols = ['Yds', 'TD', 'rec_fantasy_pts']
lag3 = ['lag3_' + ''.join(x) for x in cols]
lag2 = ['lag2_' + ''.join(x) for x in cols]
# getting prior week data 
prior_week = ['last_week_' + ''.join(x) for x in ['Yds', 'TD', 'rec_fantasy_pts']]

#### Getting data for imputation

In [8]:
rec_recap = read_file('2018_receiving_recap')
rec_recap[['Yds/G', 'TD', '2018_ppg']] = rec_recap[['Yds/G', 'TD', '2018_ppg']].astype(float)
rec_recap = rec_recap.rename(columns = {"Player":"Name", 'TD':'total_td', 'Int':'total_int', 'FUM':'total_fum'})
rec_recap = rec_recap.drop('Team', axis = 1)

In [9]:
joined = pd.merge(rec, rec_recap, on = 'Name', how = 'inner')
joined = joined.drop_duplicates()

#### Lagging variables for inference

In [10]:
#creating a moving avg function 
def move_avg(df, smooth_col, group_vals, window):
    out = df.groupby(group_vals, as_index = False)[smooth_col].rolling(window = window).mean()
    out = out.reset_index(level = 0, drop = True)
    return(out)

In [11]:
joined[lag3] = move_avg(df = joined, smooth_col=cols, group_vals = 'Name', window = 3)
joined[lag2] = move_avg(df = joined, smooth_col = cols, group_vals = 'Name', window = 2)

In [12]:
joined[prior_week] = joined.groupby('Name')[['Yds', 'TD', 'rec_fantasy_pts']].shift(periods = 1, axis = 0)

In [17]:
big_list = lag3 + lag2
joined[big_list] = joined.groupby('Name')[big_list].shift(periods = 1, axis = 0)

#### Getting Ratios & Imputing

In [20]:
joined.lag2_Yds = joined['lag2_Yds'].fillna(joined['Yds/G'])
joined.lag2_TD = joined['lag2_TD'].fillna(joined['total_td']/16)
joined.lag2_rec_fantasy_pts = joined['lag2_rec_fantasy_pts'].fillna(joined['2018_ppg'])

In [21]:
joined.lag3_Yds = joined['lag3_Yds'].fillna(joined['Yds/G'])
joined.lag3_TD = joined['lag3_TD'].fillna(joined['total_td']/16)
joined.lag3_rec_fantasy_pts = joined['lag3_rec_fantasy_pts'].fillna(joined['2018_ppg'])

In [22]:
joined.last_week_Yds = joined['last_week_Yds'].fillna(joined['Yds/G'])
joined.last_week_TD = joined['last_week_TD'].fillna(joined['total_td']/16)
joined.last_week_rec_fantasy_pts = joined['last_week_rec_fantasy_pts'].fillna(joined['2018_ppg'])

In [23]:
joined['yds_ration'] = joined.lag2_Yds/joined.lag3_Yds
joined['fantasy_pts_ratio'] = joined.lag2_rec_fantasy_pts/joined.lag3_rec_fantasy_pts

### Finalizing Data for Modeling

In [24]:
drop_vars = ['Yds/G', 'total_td', '2018_ppg', 'Team', 'Avg', '100yd_flag']

In [25]:
final_data = joined.drop(drop_vars, axis = 1)
print(final_data.shape)

(1656, 21)


### Writing the data 

In [26]:
# %load ../01_data-acq/write_function.py
def writer(data, sheet_name, share_email):
    #preparing the data to be written 
    data = data.fillna('na')
    data = data.astype(str)
    
    #Grabbing Parameters for looping 
    n_rows = data.shape[0]
    n_cols = data.shape[1]
    
    #creating sheets
     #Now will can access our google sheets we call client.open on StartupName
    sheet = client.create(sheet_name) 
    sheet.share(share_email,  perm_type='user', role='writer') #sharing my email 
    
    #getting cell list to batch update
    import string
    end_col = string.ascii_uppercase[n_cols - 1]
    end_row = n_rows + 1
    
    sheet_range = 'A1:'+ end_col + str(end_row)
    
    #turning df to one long list 
    df_as_list = data.stack().tolist()
    df_as_list = data.columns.tolist() + df_as_list
    
    #getting the target sheet 
    ws = sheet.get_worksheet(0)
    cell_list = ws.range(sheet_range)
    
    #writing df list to cell range list 
    for i in range(0, len(cell_list)):
        cell_list[i].value = df_as_list[i]
        
    #batch updating 
    ws.update_cells(cell_list)

In [28]:
writer(data  = final_data, sheet_name = 'receiving_data_model_ready', share_email = my_email)