# Imports

In [180]:
import itertools
import numpy as np
import pandas as pd

In [181]:
import sys
import os
sys.path.append(os.path.abspath('..'))
from helpers_df import create_lag_df, create_train_and_test_sets, normalize_stats

# Grab Data

In [182]:
df_rec = pd.read_pickle("./wr-simple-data-2012-2022-qbr.pkl")

In [183]:
df_wr = df_rec.query("position == 'WR'").copy()

# Lag Version

In [184]:
df_lag = create_lag_df(df_wr)

In [185]:
df_lag.head()

Unnamed: 0,player_id,season,season_type,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,...,w8dom_last,yptmpa_last,ppr_sh_last,age_last,position_last,player_name_last,depth_team_last,round_last,pick_last,qbr_last
0,00-0015754,2013,REG,13,21,115.0,0,0.0,0.0,174.0,...,0.13596,1.058366,0.088888,36.0,WR,Brandon Stokley,2.0,4.0,105.0,105.849771
1,00-0020337,2013,REG,64,110,745.0,4,0.0,0.0,1219.0,...,0.28127,2.395918,0.162871,33.0,WR,Steve Smith,1.0,3.0,74.0,86.181271
2,00-0020337,2014,REG,79,134,1065.0,6,2.0,1.0,1413.0,...,0.220884,1.670404,0.136052,34.0,WR,Steve Smith,1.0,3.0,74.0,88.755285
3,00-0020337,2015,REG,46,73,670.0,3,0.0,0.0,719.0,...,0.258193,1.922383,0.151386,35.0,WR,Steve Smith,1.0,3.0,74.0,91.027377
4,00-0020337,2016,REG,70,101,799.0,5,0.0,0.0,873.0,...,0.339718,2.310345,0.210909,36.0,WR,Steve Smith,1.75,3.0,74.0,83.106336


1 => improved

0 => no change

-1 => decline

In [186]:
df_lag_classification = df_lag.query("games >= 10").copy()
for i, row in df_lag_classification.iterrows():
    diff = row["fantasy_points"] - row["fantasy_points_last"]
    if diff > 10:
        df_lag_classification.at[i, "change"] = 1
    elif diff < -10:
        df_lag_classification.at[i, "change"] = -1
    else:
        df_lag_classification.at[i, "change"] = 0

In [187]:
df_lag_classification.dropna(inplace=True)

# Create Test and Train Sets

In [188]:
x_cols = ['receiving_yards_after_catch_last',
       'receiving_air_yards_last', 'tgt_sh_last', "age", "qbr_last",
          "round", "depth_team", "depth_team_last"]

In [218]:
X_train, X_test, y_train, y_test = create_train_and_test_sets(df_lag_classification,
                                                              x_cols=x_cols,
                                                              inference_col = "change",
                                                              test_size=0.2
                                                             )

```
Length of train set: 571
Length of test set: 143
Length of data set: 714
```


# Normalization

In [219]:
from sklearn.preprocessing import StandardScaler

In [220]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Fit Models

In [221]:
best_score = 0
best_model = None

## MLR

In [222]:
from sklearn.linear_model import LinearRegression

In [223]:
mlr = LinearRegression().fit(X_train_scaled, y_train)
score = round(mlr.score(X_test_scaled, y_test), 3)

In [224]:
if score > best_score:
    best_score = score
    best_model = mlr
score

0.277

## Random Forest Classifier

In [225]:
from sklearn.ensemble import RandomForestClassifier

In [226]:
start = 1
stop = 15 + 1

In [227]:
for depth in range(start, stop):
    rfr = RandomForestClassifier(max_depth=depth, random_state=0)
    rfr.fit(X_train_scaled, y_train)
    
    score = rfr.score(X_test_scaled, y_test)
    if score > best_score:
        print(f"New best score: {score}  |  depth: {depth}")
        best_score = score
        best_model = rfr
        
print("\nDone :)")

New best score: 0.6083916083916084  |  depth: 1
New best score: 0.6223776223776224  |  depth: 2
New best score: 0.6363636363636364  |  depth: 4

Done :)


## Gradient Boosting Machines

In [228]:
from sklearn.ensemble import GradientBoostingClassifier

In [229]:
gbm = GradientBoostingClassifier(random_state=0)

In [230]:
gbm.fit(X_train_scaled, y_train)

In [231]:
score = gbm.score(X_test_scaled, y_test)

In [232]:
if score > best_score:
    best_score = score
    best_model = gbm
score

0.5944055944055944

## Neural Net

In [233]:
from sklearn.neural_network import MLPClassifier

In [234]:
nn = MLPClassifier(random_state=1,
                    hidden_layer_sizes=[100, 100, 100],
                    max_iter=10000).fit(X_train_scaled, y_train)

In [235]:
score = nn.score(X_test_scaled, y_test)

In [236]:
if score > best_score:
    best_score = score
    best_model = nn
score

0.5384615384615384

### Best Score and Model

In [237]:
best_model

In [238]:
best_score

0.6363636363636364

# Projecting 2023

In [239]:
df_2023 = pd.read_pickle("./wr-simple-data-2023-qbr.pkl")

In [240]:
def compare_model_to_current_year(df, df_current, player_name, model):
    df_last = df.copy().query("season == 2022")
    
    player_row_22 = df_last[['player_name', 'receiving_yards_after_catch',
       'receiving_air_yards', 'tgt_sh', "age", "qbr", "round", "depth_team"]] \
        .query(f"player_name == '{player_name}'")
    
    player_row_22['age'] += 1
    
    rename_cols = {}
    for col in list(player_row_22.columns):
        if col != "age" and col != "round":
            rename_cols[col] = f"{col}_last"
        
    player_row_22.rename(columns=rename_cols, inplace=True)
    player_row_22["depth_team"] = df_current.query(f"player_name == \"{player_name}\"")["depth_team"].iloc[0]
    
    sample = scaler.transform(player_row_22[['receiving_yards_after_catch_last',
       'receiving_air_yards_last', 'tgt_sh_last', "age", "qbr_last", "round", "depth_team", "depth_team_last"]])
    
    previous_fp = df_last.query(f"season == 2022 and player_name == \"{player_name}\"")["fantasy_points"].iloc[0]
    current_fp = df_current.query(f"player_name == \"{player_name}\"")["fantasy_points"].iloc[0]
    
    answer = current_fp - previous_fp
    prediction = model.predict(sample)[0]
    
#     print("```")
#     print(f"Previous fantasy points: {previous_fp}")
#     print(f"Current fantasy points: {current_fp}")
#     print(f"Difference (p - a): {answer}")
#     print(f"Prediction: {prediction}")
#     print("```")
    
    return prediction, answer

In [241]:
correct = []
incorrect = []
model = best_model
for name in df_wr.query("season == 2022 and games >= 10")["player_name"]:
    if name in df_2023["player_name"].values and name != "Ja'Marr Chase":
        player_name = name
        
        if not df_2023.query(f"player_name == '{player_name}' and games >= 8").empty:
            prediction, answer = compare_model_to_current_year(df_wr, df_2023, player_name, model)

            if answer < 0:
                answer = -1
            elif answer == 0:
                answer = 0
            elif answer > 0:
                answer = 1

            if answer == prediction:
                correct.append((name, answer, prediction))
            else:
                incorrect.append((name, answer, prediction))

In [242]:
print(len(correct), len(incorrect))

46 29


In [243]:
df_incorrect = pd.DataFrame(columns=['Name', 'Age', 'Answer', 'Prediction', 'QBR'])
for name, answer, pred in incorrect:
    if ((answer + 1) != pred) or ((answer - 1) != pred):
        info = df_2023.query(f"player_name == '{name}'")
        
        games = info["games"].iloc[0]
        fantasy_points = info["fantasy_points"].iloc[0]
        if games >= 8 and fantasy_points >= 50:
            qbr = round(info["qbr"].iloc[0], 2)
            age = info["age"].iloc[0]
            
            row = (name, age, answer, pred, qbr)
            row_df = pd.DataFrame([row], columns=df_incorrect.columns)
            df_incorrect = pd.concat([df_incorrect, row_df], ignore_index=True)

  df_incorrect = pd.concat([df_incorrect, row_df], ignore_index=True)


In [244]:
df_incorrect.query("Prediction == -1.0")

Unnamed: 0,Name,Age,Answer,Prediction,QBR
0,Keenan Allen,31.0,1,-1.0,93.22
1,Brandin Cooks,29.0,1,-1.0,104.25
2,Mike Evans,30.0,1,-1.0,96.21
3,Nelson Agholor,30.0,1,-1.0,97.24
4,Tyreek Hill,29.0,1,-1.0,105.4
5,Courtland Sutton,27.0,1,-1.0,98.0
7,D.J. Moore,26.0,1,-1.0,84.46
8,DK Metcalf,25.0,1,-1.0,90.41
9,Michael Pittman,25.0,1,-1.0,87.25
11,CeeDee Lamb,24.0,1,-1.0,104.25


In [245]:
df_incorrect.query("Prediction == 1.0")

Unnamed: 0,Name,Age,Answer,Prediction,QBR
6,Michael Gallup,27.0,-1,1.0,104.25
10,K.J. Osborn,26.0,-1,1.0,103.78
12,Josh Palmer,23.0,-1,1.0,93.22
13,Drake London,22.0,-1,1.0,82.22
14,Alec Pierce,23.0,-1,1.0,87.25
15,Jahan Dotson,23.0,-1,1.0,80.51


In [246]:
df_incorrect.query("Prediction == 0.0")

Unnamed: 0,Name,Age,Answer,Prediction,QBR
