# Imports

In [1]:
import itertools
import numpy as np
import pandas as pd

In [2]:
import sys
import os
sys.path.append(os.path.abspath('..'))
from helpers_df import create_lag_df, create_train_and_test_sets, normalize_stats

# Grab Data

In [3]:
df_rec = pd.read_pickle("./wr-simple-data-2012-2022-qbr.pkl")

In [4]:
df_wr = df_rec.query("position == 'WR'").copy()

In [5]:
games = df_wr["games"]
df_wr["receiving_yards_after_catch_per_game"] = df_wr["receiving_yards_after_catch"] / games
df_wr["receiving_air_yards_per_game"] = df_wr["receiving_air_yards"] / games
df_wr["fantasy_points_per_game"] = df_wr["fantasy_points"] / games
df_wr["capital"] = ((df_wr["round"] - 1) * 32) + df_wr["pick"]
df_wr["receiving_yards_per_game"] = df_wr["receiving_yards"] / games
df_wr["targets_per_game"] = df_wr["targets"] / games

# Lag Version

In [6]:
df_lag = create_lag_df(df_wr)

In [7]:
df_lag.head()

Unnamed: 0,player_id,season,season_type,receptions,targets,receiving_yards,receiving_tds,receiving_fumbles,receiving_fumbles_lost,receiving_air_yards,...,depth_team_last,round_last,pick_last,qbr_last,receiving_yards_after_catch_per_game_last,receiving_air_yards_per_game_last,fantasy_points_per_game_last,capital_last,receiving_yards_per_game_last,targets_per_game_last
0,00-0015754,2013,REG,13,21,115.0,0,0.0,0.0,174.0,...,2.0,4.0,105.0,105.849771,10.571429,42.071429,6.028571,201.0,38.857143,4.214286
1,00-0020337,2013,REG,64,110,745.0,4,0.0,0.0,1219.0,...,1.0,3.0,74.0,86.181271,16.75,114.5,8.88125,138.0,73.375,8.625
2,00-0020337,2014,REG,79,134,1065.0,6,2.0,1.0,1413.0,...,1.0,3.0,74.0,88.755285,11.866667,81.266667,6.566667,138.0,49.666667,7.333333
3,00-0020337,2015,REG,46,73,670.0,3,0.0,0.0,719.0,...,1.0,3.0,74.0,91.027377,22.6875,88.3125,8.78125,138.0,66.5625,8.375
4,00-0020337,2016,REG,70,101,799.0,5,0.0,0.0,873.0,...,1.75,3.0,74.0,83.106336,36.571429,102.714286,12.142857,138.0,95.714286,10.428571


1 => improved

0 => no change

-1 => decline

In [8]:
df_lag_classification = df_lag.query("games >= 10").copy()
for i, row in df_lag_classification.iterrows():
    diff = row["fantasy_points_per_game"] - row["fantasy_points_per_game_last"]
#     diff = row["trinity"] - row["trinity_last"]
    if diff > 0:
        df_lag_classification.at[i, "change"] = 1
    else:
        df_lag_classification.at[i, "change"] = -1

In [9]:
df_lag_classification.dropna(inplace=True)

# Create Test and Train Sets

In [10]:
x_cols = ['receiving_yards_after_catch_per_game_last', 'receiving_air_yards_per_game_last',
          'tgt_sh_last', "age", "qbr_last", "capital", "depth_team", "depth_team_last",
         "w8dom_last", "wopr_x_last", "ppr_sh_last", "dom_last",
          "receiving_yards_per_game_last", "targets_per_game_last", "ay_sh_last", "ry_sh_last"]

In [11]:
X_train, X_test, y_train, y_test = create_train_and_test_sets(df_lag_classification, 
        x_cols=x_cols,
        inference_col = ['change'],
        test_size=0.25
)

```
Length of train set: 535
Length of test set: 179
Length of data set: 714
```


# Normalization

In [12]:
from sklearn.preprocessing import StandardScaler

In [13]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Fit Models

In [14]:
best_score = 0
best_model = None

## MLR

In [15]:
from sklearn.linear_model import LinearRegression

In [16]:
mlr = LinearRegression().fit(X_train_scaled, y_train)
score = round(mlr.score(X_test_scaled, y_test), 3)

In [17]:
if score > best_score:
    best_score = score
    best_model = mlr
score

0.197

## Random Forest Classifier

In [18]:
from sklearn.ensemble import RandomForestClassifier

In [19]:
start = 1
stop = 15 + 1

In [20]:
for depth in range(start, stop):
    rfr = RandomForestClassifier(max_depth=depth, random_state=0)
    rfr.fit(X_train_scaled, y_train)
    
    score = rfr.score(X_test_scaled, y_test)
    if score > best_score:
        print(f"New best score: {score}  |  depth: {depth}")
        best_score = score
        best_model = rfr
        
print("\nDone :)")

  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


New best score: 0.6871508379888268  |  depth: 1
New best score: 0.6927374301675978  |  depth: 2
New best score: 0.7150837988826816  |  depth: 3


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)


New best score: 0.7597765363128491  |  depth: 6


  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)
  return fit_method(estimator, *args, **kwargs)



Done :)


## Gradient Boosting Machines

In [21]:
from sklearn.ensemble import GradientBoostingClassifier

In [22]:
gbm = GradientBoostingClassifier(random_state=0)

In [23]:
gbm.fit(X_train_scaled, y_train)

  y = column_or_1d(y, warn=True)


In [24]:
score = gbm.score(X_test_scaled, y_test)

In [25]:
if score > best_score:
    best_score = score
    best_model = gbm
score

0.664804469273743

## Neural Net

In [26]:
from sklearn.neural_network import MLPClassifier

In [27]:
nn = MLPClassifier(random_state=1,
                    hidden_layer_sizes=[1000, 1000, 100],
                    max_iter=10000).fit(X_train_scaled, y_train)

  y = column_or_1d(y, warn=True)


In [28]:
score = nn.score(X_test_scaled, y_test)

In [29]:
if score > best_score:
    best_score = score
    best_model = nn
score

0.6424581005586593

### Best Score and Model

In [30]:
best_model

In [31]:
best_score

0.7597765363128491

# Projecting 2023

In [32]:
df_2023 = pd.read_pickle("./wr-simple-data-2023-qbr.pkl")

In [33]:
games = df_2023["games"]
df_2023["receiving_yards_after_catch_per_game"] = df_2023["receiving_yards_after_catch"] / games
df_2023["receiving_air_yards_per_game"] = df_2023["receiving_air_yards"] / games
df_2023["fantasy_points_per_game"] = df_2023["fantasy_points"] / games
df_2023["capital"] = ((df_2023["round"] - 1) * 32) + df_2023["pick"]
df_2023["receiving_yards_per_game"] = df_2023["receiving_yards"] / games
df_2023["targets_per_game"] = df_2023["targets"] / games

In [34]:
def compare_model_to_current_year(df, df_current, player_name, model):
    df_last = df.copy().query("season == 2022")
    
    player_row_22 = df_last[['player_name', 'receiving_yards_after_catch_per_game',
       'receiving_air_yards_per_game', 'tgt_sh', "age", "qbr", "capital", "depth_team",
        "w8dom", "wopr_x", "ppr_sh", "dom", "receiving_yards_per_game", "targets_per_game",
                            "ay_sh", "ry_sh"]] \
        .query(f"player_name == '{player_name}'")
    
    player_row_22['age'] += 1
    
    rename_cols = {}
    for col in list(player_row_22.columns):
        if col != "age" and col != "round" and col != "capital":
            rename_cols[col] = f"{col}_last"
        
    player_row_22.rename(columns=rename_cols, inplace=True)
    
    col_index = player_row_22.columns.get_loc("capital") + 1
    depth_team_current = df_current.query(f"player_name == \"{player_name}\"")["depth_team"].iloc[0]
    player_row_22.insert(loc=col_index, column="depth_team", value=depth_team_current)

    transform_cols = list(player_row_22.columns)[1:]
    sample = scaler.transform(player_row_22[transform_cols])
    
    previous_fp = df_last.query(f"season == 2022 and player_name == \"{player_name}\"")["fantasy_points"].iloc[0]
    current_fp = df_current.query(f"player_name == \"{player_name}\"")["fantasy_points"].iloc[0]
    
    answer = current_fp - previous_fp
    prediction = model.predict(sample)[0]
    
#     print("```")
#     print(f"Previous fantasy points: {previous_fp}")
#     print(f"Current fantasy points: {current_fp}")
#     print(f"Difference (p - a): {answer}")
#     print(f"Prediction: {prediction}")
#     print("```")
    
    return prediction, answer

In [35]:
correct = []
incorrect = []
model = best_model
for name in df_wr.query("season == 2022 and games >= 10")["player_name"]:
    if name in df_2023["player_name"].values and name != "Ja'Marr Chase":
        player_name = name
        
        if not df_2023.query(f"player_name == '{player_name}' and games >= 8").empty:
            prediction, answer = compare_model_to_current_year(df_wr, df_2023, player_name, model)

            if answer < 0:
                answer = -1
            elif answer == 0:
                answer = 0
            elif answer > 0:
                answer = 1

            if answer == prediction:
                correct.append((name, answer, prediction))
            else:
                incorrect.append((name, answer, prediction))

In [36]:
print(len(correct), len(incorrect), round(len(correct) / (len(incorrect) + len(correct)) * 100, 2))

48 27 64.0


In [37]:
df_correct = pd.DataFrame(columns=['Name', 'Age', 'Answer', 'Prediction', 'QBR'])
for name, answer, pred in correct:
    if ((answer + 1) != pred) or ((answer - 1) != pred):
        info = df_2023.query(f"player_name == '{name}'")
        
        games = info["games"].iloc[0]
        fantasy_points = info["fantasy_points"].iloc[0]
        if games >= 8 and fantasy_points >= 50:
            qbr = round(info["qbr"].iloc[0], 2)
            age = info["age"].iloc[0]
            
            row = (name, age, answer, pred, qbr)
            row_df = pd.DataFrame([row], columns=df_correct.columns)
            df_correct = pd.concat([df_correct, row_df], ignore_index=True)

  df_correct = pd.concat([df_correct, row_df], ignore_index=True)


In [38]:
df_incorrect = pd.DataFrame(columns=['Name', 'Age', 'Answer', 'Prediction', 'QBR'])
for name, answer, pred in incorrect:
    if ((answer + 1) != pred) or ((answer - 1) != pred):
        info = df_2023.query(f"player_name == '{name}'")
        
        games = info["games"].iloc[0]
        fantasy_points = info["fantasy_points"].iloc[0]
        if games >= 8 and fantasy_points >= 50:
            qbr = round(info["qbr"].iloc[0], 2)
            age = info["age"].iloc[0]
            
            row = (name, age, answer, pred, qbr)
            row_df = pd.DataFrame([row], columns=df_incorrect.columns)
            df_incorrect = pd.concat([df_incorrect, row_df], ignore_index=True)

  df_incorrect = pd.concat([df_incorrect, row_df], ignore_index=True)


In [39]:
df_incorrect.query("Prediction == -1.0")

Unnamed: 0,Name,Age,Answer,Prediction,QBR
0,Keenan Allen,31.0,1,-1.0,93.22
1,Brandin Cooks,29.0,1,-1.0,104.25
2,Mike Evans,30.0,1,-1.0,96.21
3,Nelson Agholor,30.0,1,-1.0,97.24
4,Tyreek Hill,29.0,1,-1.0,105.4
6,Josh Reynolds,28.0,1,-1.0,98.28
7,D.J. Chark,26.0,1,-1.0,75.18
8,D.J. Moore,26.0,1,-1.0,84.46
11,Michael Pittman,25.0,1,-1.0,87.25
12,CeeDee Lamb,24.0,1,-1.0,104.25


In [40]:
df_incorrect.query("Prediction == 1.0")

Unnamed: 0,Name,Age,Answer,Prediction,QBR
5,Chris Godwin,27.0,-1,1.0,96.21
9,Diontae Johnson,27.0,-1,1.0,81.43
10,Gabe Davis,24.0,-1,1.0,93.72
15,Josh Palmer,23.0,-1,1.0,93.22
17,Alec Pierce,23.0,-1,1.0,87.25


In [41]:
df_correct.query("Answer == 1")

Unnamed: 0,Name,Age,Answer,Prediction,QBR
7,Courtland Sutton,27.0,1,1.0,98.0
8,Justin Watson,27.0,1,1.0,91.71
12,DK Metcalf,25.0,1,1.0,90.41
16,Deebo Samuel,27.0,1,1.0,112.16
17,Brandon Aiyuk,25.0,1,1.0,112.16
23,Tutu Atwell,23.0,1,1.0,93.01
25,Elijah Moore,23.0,1,1.0,85.11
27,Chris Olave,23.0,1,1.0,93.59
28,Khalil Shakir,23.0,1,1.0,93.72
31,Romeo Doubs,23.0,1,1.0,91.8


In [42]:
df_correct.query("Answer == -1")

Unnamed: 0,Name,Age,Answer,Prediction,QBR
0,Davante Adams,30.0,-1,-1.0,78.1
1,Amari Cooper,29.0,-1,-1.0,85.11
2,Stefon Diggs,29.0,-1,-1.0,93.72
3,Tyler Lockett,30.0,-1,-1.0,90.41
4,Tyler Boyd,28.0,-1,-1.0,91.0
5,Curtis Samuel,27.0,-1,-1.0,80.51
6,Noah Brown,27.0,-1,-1.0,98.75
9,Michael Gallup,27.0,-1,-1.0,104.25
10,Christian Kirk,26.0,-1,-1.0,89.06
11,Darius Slayton,26.0,-1,-1.0,88.9
