# Imports

In [1]:
import itertools
import numpy as np
import pandas as pd

In [2]:
import sys
import os
sys.path.append(os.path.abspath('..'))
from helpers_df import create_lag_df, create_train_and_test_sets, normalize_stats

# Grab Data

In [3]:
df_rec = pd.read_pickle("./wr-simple-data-2012-2022.pkl")

In [4]:
df_wr = df_rec.query("position == 'WR'").copy()

In [5]:
query = "games >= 10"
df_wr_filtered = df_wr.query(query).copy()

# Lag Version

In [8]:
df_lag = create_lag_df(df_wr_filtered.copy())

In [9]:
df_lag.head()

Unnamed: 0,player_id,season,season_type,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,...,rtd_sh_last,rfd_sh_last,rtdfd_sh_last,dom_last,w8dom_last,yptmpa_last,ppr_sh_last,age_last,position_last,player_name_last
0,00-0020337,2013,REG,64,110,745.0,4,0.0,0.0,1219.0,...,0.210526,0.298246,0.289474,0.254741,0.28127,2.395918,0.162871,33.0,WR,Steve Smith
1,00-0020337,2014,REG,79,134,1065.0,6,2.0,1.0,1413.0,...,0.181818,0.268293,0.258065,0.206234,0.220884,1.670404,0.136052,34.0,WR,Steve Smith
2,00-0020494,2013,REG,42,78,452.0,2,1.0,1.0,641.0,...,0.333333,0.157895,0.179487,0.244817,0.191708,1.29638,0.102523,33.0,WR,Santana Moss
3,00-0022044,2013,REG,109,181,1407.0,5,0.0,0.0,2004.0,...,0.181818,0.385366,0.365639,0.288388,0.35233,2.884477,0.204743,31.0,WR,Andre Johnson
4,00-0022044,2014,REG,85,147,936.0,3,3.0,3.0,1358.0,...,0.263158,0.348485,0.341014,0.29976,0.321721,2.222749,0.213362,32.0,WR,Andre Johnson


# Create Test and Train Sets

In [10]:
x_cols = ['receiving_yards_after_catch_last',
       'receiving_air_yards_last', 'tgt_sh_last', "age"]

In [11]:
X_train, X_test, y_train, y_test = create_train_and_test_sets(df_lag,
                                                              x_cols=x_cols,
                                                              inference_col = "fantasy_points"
                                                             )

```
Length of train set: 584
Length of test set: 146
Length of data set: 730
```


# Normalization

In [12]:
from sklearn.preprocessing import StandardScaler

In [13]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Fit Models

In [14]:
best_score = 0
best_model = None

## MLR

In [15]:
from sklearn.linear_model import LinearRegression

In [16]:
mlr = LinearRegression().fit(X_train_scaled, y_train)
score = round(mlr.score(X_test_scaled, y_test), 3)

In [17]:
if score > best_score:
    best_score = score
    best_model = mlr
score

0.406

## Random Forest Regressor

In [18]:
from sklearn.ensemble import RandomForestRegressor

In [19]:
start = 1
stop = 100 + 1

In [20]:
for depth in range(start, stop):
    rfr = RandomForestRegressor(max_depth=depth, random_state=0)
    rfr.fit(X_train_scaled, y_train)
    
    score = rfr.score(X_test_scaled, y_test)
    if score > best_score:
        print(f"New best score: {score}")
        best_score = score
        best_model = rfr
        
print("\nDone :)")


Done :)


## Gradient Boosting Machines

In [21]:
from sklearn.ensemble import GradientBoostingRegressor

In [22]:
gbm = GradientBoostingRegressor(random_state=0)

In [23]:
gbm.fit(X_train_scaled, y_train)

In [24]:
score = gbm.score(X_test_scaled, y_test)

In [25]:
if score > best_score:
    best_score = score
    best_model = gbm
score

0.3386271274051912

## Neural Net

In [26]:
from sklearn.neural_network import MLPRegressor

In [30]:
nn = MLPRegressor(random_state=1,
                    hidden_layer_sizes=[10, 10],
                    max_iter=10000).fit(X_train_scaled, y_train)

In [31]:
score = nn.score(X_test_scaled, y_test)

In [32]:
if score > best_score:
    best_score = score
    best_model = nn
score

0.3821914939870589

# Projecting 2023

In [34]:
df_2023 = pd.read_pickle("./wr-simple-data-2023.pkl")

In [35]:
def compare_model_to_current_year(df, df_current, player_name, model):
    df_last = df.copy().query("season == 2022")
    
    player_row_22 = df_last[['player_name', 'receiving_yards_after_catch',
       'receiving_air_yards', 'tgt_sh', "age"]].query(f"player_name == '{player_name}'")
    
    player_row_22['age'] += 1
    
    rename_cols = {}
    for col in list(player_row_22.columns):
        if col != "age":
            rename_cols[col] = f"{col}_last"
        
    player_row_22.rename(columns=rename_cols, inplace=True)
    sample = scaler.transform(player_row_22[['receiving_yards_after_catch_last',
       'receiving_air_yards_last', 'tgt_sh_last', "age"]])
    
    prediction = model.predict(sample)[0]
    answer = df_current.query(f"player_name == '{player_name}'")["fantasy_points"].iloc[0]
    
    print("```")
    print(f"Prediction value: {prediction}")
    print(f"Answer: {answer}")
    print(f"Difference (p - a): {prediction - answer}")
    print("```")
    
    return prediction, answer

In [37]:
player_name = "Mike Evans"
model = best_model
compare_model_to_current_year(df_wr, df_2023, player_name, model)

```
Prediction value: 108.46865446825802
Answer: 194.3
Difference (p - a): -85.83134553174199
```


(108.46865446825802, 194.3)