## Modeling Rush Yards Per Game 

https://www.statsmodels.org/devel/mixed_linear.html

https://www.statsmodels.org/devel/gam.html

https://pygam.readthedocs.io/en/latest/notebooks/tour_of_pygam.html

#### Modeling Framework 
    1. Fit 3 models for each type of data 
        * GAM and / or spline model - because we have panel data 
    2. Tune spline parameters 
    3. Train / test split - not cross validate 
        * work on back testing modeling framework 
    4. Output: 
        1. Prediction
        2. A notion of variance or uncertainty for optimization 
    5. Pipeline to injest new data 

In [1]:
# %load ../api_access_snippet.py
#import library
import gspread 
#Service client credential from oauth2client
from oauth2client.service_account import ServiceAccountCredentials
# Print nicely
import pprint
#Create scope
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
#create some credential using that scope and content of startup_funding.json
creds = ServiceAccountCredentials.from_json_keyfile_name('../quickstart/g_sheet_creds.json',scope)
#create gspread authorize using that credential
client = gspread.authorize(creds)
my_email = 'matthewjchristy66@gmail.com'

def read_file(sheet_name):
    out = client.open(sheet_name).sheet1
    out = out.get_all_values()
    out = pd.DataFrame(out, columns = out.pop(0))
    return(out)

import pandas as pd 
import numpy as np

In [2]:
rush = read_file('rushing_data_model_ready')

In [3]:
rush.head()

Unnamed: 0,Name,Week,Opp,Score,Att,Yds,TD,FUM,rush_fantasy_pts,total_fum,...,lag3_TD,lag3_rush_fantasy_pts,lag2_Yds,lag2_TD,lag2_rush_fantasy_pts,last_week_Yds,last_week_TD,last_week_rush_fantasy_pts,yds_ration,fantasy_pts_ratio
0,Marlon Mack,1,@ LAC,L 24-30,25,174.0,1.0,0.0,23.4,1,...,0.5625,10.8825,75.7,0.5625,10.8825,75.7,0.5625,10.8825,1.0,1.0
1,Marlon Mack,2,@ TEN,W 19-17,20,51.0,0.0,0.0,5.1,1,...,0.5625,10.8825,75.7,0.5625,10.8825,174.0,1.0,23.4,1.0,1.0
2,Marlon Mack,3,vs ATL,W 27-24,16,74.0,1.0,0.0,13.4,1,...,0.5625,10.8825,112.5,0.5,14.25,51.0,0.0,5.1,1.486129458388375,1.3094417643004823
3,Marlon Mack,4,vs OAK,L 24-31,11,39.0,0.0,0.0,3.9,1,...,0.6666666666666666,13.966666666666669,62.5,0.5,9.25,74.0,1.0,13.4,0.6270903010033444,0.662291169451074
4,Marlon Mack,5,@ KC,W 19-13,29,132.0,0.0,0.0,13.2,1,...,0.3333333333333333,7.466666666666666,56.5,0.5,8.649999999999999,39.0,0.0,3.9,1.0335365853658538,1.1584821428571428


In [4]:
target = ['rush_fantasy_pts']
last_week = ['last_week_Yds', 'last_week_TD', 'last_week_rush_fantasy_pts']
inputs = ['yds_ration', 'fantasy_pts_ratio', 'lag2_Yds', 'lag2_rush_fantasy_pts', 'lag2_TD']

In [5]:
rush = rush.replace('na', np.NaN);
fix_vars = target + last_week + inputs
rush[fix_vars] = rush[fix_vars].astype(float)
rush.loc[rush.rush_fantasy_pts < 0, 'rush_fantasy_pts'] = 0

In [6]:
rush['log1p_target'] = np.log1p(rush.rush_fantasy_pts + 1)

In [7]:
rush.dtypes

Name                           object
Week                           object
Opp                            object
Score                          object
Att                            object
Yds                            object
TD                             object
FUM                            object
rush_fantasy_pts              float64
total_fum                      object
lag3_Yds                       object
lag3_TD                        object
lag3_rush_fantasy_pts          object
lag2_Yds                      float64
lag2_TD                       float64
lag2_rush_fantasy_pts         float64
last_week_Yds                 float64
last_week_TD                  float64
last_week_rush_fantasy_pts    float64
yds_ration                    float64
fantasy_pts_ratio             float64
log1p_target                  float64
dtype: object

### Fitting GAM

In [15]:
#importing statsmodels packages
import statsmodels.api as sm

from statsmodels.gam.api import GLMGam, BSplines, CyclicCubicSplines

import statsmodels.formula.api as smf

In [25]:
x_list = inputs + last_week
f = 'log1p_target ~ last_week_rush_fantasy_pts + last_week_TD + last_week_Yds + yds_ration + fantasy_pts_ratio + lag2_Yds + lag2_TD + lag2_rush_fantasy_pts'

In [31]:
complexity = 2
num_vars = len(x_list)

df_iter = np.repeat(complexity, num_vars)
degrees = np.repeat(complexity - 1, num_vars)

In [32]:
x_spline = rush[x_list]
bs = BSplines(x_spline, df = df_iter, degree = degrees)
cs = CyclicCubicSplines(x_spline, df = df_iter)

ValueError: 0<=der=2<=k=1 must hold

In [None]:
pass_gam = GLMGam.from_formula(f, data = pass_mdl_data, smoother = cs, family=sm.families.Gaussian())

## Going the Bayesian Route 

https://docs.pymc.io/notebooks/GLM-linear.html

In [8]:
import pymc3 as pm
f = 'log1p_target ~ last_week_rush_fantasy_pts + last_week_TD + last_week_Yds + yds_ration + fantasy_pts_ratio + lag2_Yds + lag2_TD + lag2_rush_fantasy_pts'

In [11]:
with pm.Model() as model:
    # specify glm and pass in data. The resulting linear model, its likelihood and
    # and all its parameters are automatically added to our model.
    pm.glm.GLM.from_formula(f, rush)
    trace = p,sample(1000, cores=3) # draw 3000 posterior samples using NUTS sampling

NameError: name 'sample' is not defined