# Imports

In [1]:
import plotly.express as px
import pandas as pd
import numpy as np

In [2]:
import sys
import os
sys.path.append(os.path.abspath('..'))
from helpers_df import create_lag_df, create_train_and_test_sets

In [3]:
base_path = "../../interactive/WR/stability/season"

# Grab Data

In [4]:
df_rec = pd.read_pickle("./wr-simple-data-2012-2022.pkl")

In [5]:
df_rec["yac_per_reception"] = df_rec["receiving_yards_after_catch"] / df_rec["receptions"]

In [6]:
df_wr = df_rec.query("position == 'WR'").copy()

# Normalize Data

In [7]:
numeric_columns = df_wr.select_dtypes(include=[np.number]).columns

In [8]:
for col in numeric_columns:
    if "season" not in col:
        df_wr[f"{col}_normalized"] = df_wr[col] / df_wr[col].max()

In [9]:
df_wr_normalized = df_wr.copy()

In [10]:
a = df_wr_normalized['yac_per_reception_normalized'] + df_wr_normalized['ay_sh_normalized'] + \
    df_wr_normalized['tgt_sh_normalized']
df_wr_normalized["trinity"] = a / 3

In [11]:
df_wr_normalized.dropna(inplace=True)

# Lag Version

In [12]:
df_lag = create_lag_df(df_wr_normalized)

In [13]:
df_starter_lag = df_lag.query("games >= 10 and games_last >= 10").copy()

# Prep Data For Models

In [14]:
df_prep = df_starter_lag.copy()

In [15]:
df_prep["new_metric"] = df_prep["receiving_yards_normalized_last"] + \
    df_prep['receiving_yards_after_catch_normalized_last'] + \
    df_prep['ay_sh_normalized_last'] + \
    df_prep['yptmpa_normalized_last']

In [16]:
df_prep["new_metric"] = df_prep["new_metric"] / 4

In [17]:
# x_cols = ['receiving_yards_normalized_last',
#    'receiving_yards_after_catch_normalized_last',
#    'ay_sh_normalized_last',
#    'yptmpa_normalized_last']
# x_cols = ["new_metric"]

In [18]:
df_prep.columns[60:]

Index(['rtd_sh_normalized', 'rfd_sh_normalized', 'rtdfd_sh_normalized',
       'dom_normalized', 'w8dom_normalized', 'yptmpa_normalized',
       'ppr_sh_normalized', 'age_normalized', 'yac_per_reception_normalized',
       'trinity', 'receptions_last', 'targets_last', 'receiving_yards_last',
       'receiving_tds_last', 'receiving_fumbles_last',
       'receiving_fumbles_lost_last', 'receiving_air_yards_last',
       'receiving_yards_after_catch_last', 'receiving_first_downs_last',
       'receiving_epa_last', 'receiving_2pt_conversions_last', 'racr_last',
       'target_share_last', 'air_yards_share_last', 'wopr_x_last',
       'fantasy_points_last', 'fantasy_points_ppr_last', 'games_last',
       'tgt_sh_last', 'ay_sh_last', 'yac_sh_last', 'wopr_y_last', 'ry_sh_last',
       'rtd_sh_last', 'rfd_sh_last', 'rtdfd_sh_last', 'dom_last', 'w8dom_last',
       'yptmpa_last', 'ppr_sh_last', 'age_last', 'position_last',
       'player_name_last', 'yac_per_reception_last',
       'receptions_n

In [19]:
# x_cols = ['receptions_normalized_last',
#        'targets_normalized_last', 'receiving_yards_normalized_last',
#        'receiving_air_yards_normalized_last',
#        'receiving_yards_after_catch_normalized_last',
#        'receiving_first_downs_normalized_last',
#        'racr_normalized_last',
#        'wopr_x_normalized_last',
#        'tgt_sh_normalized_last', 'ay_sh_normalized_last',
#        'yac_sh_normalized_last', 'wopr_y_normalized_last',
#        'ry_sh_normalized_last',
#        'rfd_sh_normalized_last',
#        'dom_normalized_last', 'w8dom_normalized_last',
#        'yptmpa_normalized_last', 'yac_per_reception_normalized_last', "age_last", "age"]

In [20]:
# x_cols = ['receptions_last', 'targets_last', 'receiving_yards_last',
#        'receiving_tds_last', 'receiving_fumbles_last',
#        'receiving_fumbles_lost_last', 'receiving_air_yards_last',
#        'receiving_yards_after_catch_last', 'receiving_first_downs_last',
#        'receiving_epa_last', 'receiving_2pt_conversions_last', 'racr_last',
#        'target_share_last', 'air_yards_share_last', 'wopr_x_last',
#        'fantasy_points_last', 'fantasy_points_ppr_last', 'games_last',
#        'tgt_sh_last', 'ay_sh_last', 'yac_sh_last', 'wopr_y_last', 'ry_sh_last',
#        'rtd_sh_last', 'rfd_sh_last', 'rtdfd_sh_last', 'dom_last', 'w8dom_last',
#        'yptmpa_last', 'ppr_sh_last', 'age_last',
#        'yac_per_reception_last', 'age']

In [140]:
x_cols = ["new_metric"]

In [141]:
# x_cols = ["trinity_last"]

In [142]:
inference_col = "fantasy_points"

In [143]:
X_train, X_test, y_train, y_test = create_train_and_test_sets(df_prep,
                                                              x_cols=x_cols,
                                                              inference_col = inference_col
                                                             )

```
Length of train set: 584
Length of test set: 146
Length of data set: 730
```


# Normalization For Models

In [144]:
from sklearn.preprocessing import StandardScaler

In [145]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Explore Regression Models

## MLR

In [146]:
from sklearn.linear_model import LinearRegression

In [147]:
mlr = LinearRegression().fit(X_train_scaled, y_train)

In [148]:
score = round(mlr.score(X_test_scaled, y_test), 3)

In [149]:
score

0.379

## Evaluation

In [150]:
def change_direction_correct(row):
    if (row['Actual_Change'] >= 0 and row['Predicted_Change'] >= 0) or (row['Actual_Change'] < 0 and row['Predicted_Change'] < 0):
        return True
    else:
        return False

In [151]:
previous_season_points = df_prep.loc[y_test.index, 'fantasy_points_last']
player_names = df_prep.loc[y_test.index, 'player_name']
season = df_prep.loc[y_test.index, 'season']

In [152]:
df_error_graph_mlr = pd.DataFrame({
    'Actual': y_test,
    'Predicted': mlr.predict(X_test_scaled),
    'Previous_Season_Fantasy': previous_season_points,
    'Player_Name': player_names,
    'Current_Season': season
})

In [153]:
df_error_graph_mlr['Actual_Change'] = df_error_graph_mlr['Actual'] - df_error_graph_mlr['Previous_Season_Fantasy']
df_error_graph_mlr['Predicted_Change'] = df_error_graph_mlr['Predicted'] - df_error_graph_mlr['Previous_Season_Fantasy']

In [154]:
df_error_graph_mlr['Direction_Correct'] = df_error_graph_mlr.apply(change_direction_correct, axis=1)

In [155]:
# Creating the scatter plot
fig = px.scatter(df_error_graph_mlr, x='Actual', y='Predicted', color="Direction_Correct",
                color_discrete_map={True: 'green', False: 'red'}, hover_data=['Player_Name', 'Current_Season',
                                                                              'Previous_Season_Fantasy'])

# Adding line of equality (where actual equals predicted)
fig.add_shape(
    type='line',
    line=dict(dash='dash'),
    x0=df_error_graph_mlr['Actual'].min(),
    y0=df_error_graph_mlr['Actual'].min(),
    x1=df_error_graph_mlr['Actual'].max(),
    y1=df_error_graph_mlr['Actual'].max()
)

# Updating layout
fig.update_layout(
    title='Actual vs Predicted Fantasy Production (MLR - New Metric)',
    xaxis_title='Actual Fantasy Production',
    yaxis_title='Predicted Fantasy Production',
    showlegend=False
)

fig.write_html("../../interactive/WR/stability/season/new-metric-mlr.html")

# Show the plot
fig.show()

In [156]:
total_irrel = len(df_error_graph_mlr.query("Actual <= 101"))
total_irrel_correct = len(df_error_graph_mlr.query("Actual <= 101 and Direction_Correct == True"))
print(f"Total irrelevant: {total_irrel}")
print(f"Total irrelevant direction correct: {total_irrel_correct}")
print(f"Percent direction correct: {round(total_irrel_correct / total_irrel, 3) * 100}%")

print()
total_rel = len(df_error_graph_mlr.query("Actual >= 101"))
total_rel_correct = len(df_error_graph_mlr.query("Actual >= 101 and Direction_Correct == True"))
print(f"Total relevant: {total_rel}")
print(f"Total relevant direction correct: {total_rel_correct}")
print(f"Percent direction correct: {round(total_rel_correct / total_rel, 3) * 100}%")

print()
total = len(df_error_graph_mlr)
total_dir_correct = len(df_error_graph_mlr.query("Direction_Correct == True"))
print(f"Total: {total}")
print(f"Total direction correct: {total_dir_correct}")
print(f"Percent direction correct: {round(total_dir_correct / total, 3) * 100}%")

Total irrelevant: 82
Total irrelevant direction correct: 56
Percent direction correct: 68.30000000000001%

Total relevant: 64
Total relevant direction correct: 36
Percent direction correct: 56.2%

Total: 146
Total direction correct: 92
Percent direction correct: 63.0%


## Random Forest

In [157]:
from sklearn.ensemble import RandomForestRegressor

In [158]:
start = 1
stop = 15 + 1

In [159]:
best_score = 0
best_model = None

In [160]:
for depth in range(start, stop):
    rfr = RandomForestRegressor(max_depth=depth, random_state=0)
    rfr.fit(X_train_scaled, y_train)
    
    score = rfr.score(X_test_scaled, y_test)
    if score > best_score:
        print(f"New best score: {score}")
        best_score = score
        best_model = rfr
        
print("\nDone :)")

New best score: 0.3535607804145986
New best score: 0.3862389230220321

Done :)


In [161]:
df_error_graph_rfr = pd.DataFrame({
    'Actual': y_test,
    'Predicted': rfr.predict(X_test_scaled),
    'Previous_Season_Fantasy': previous_season_points,
    'Player_Name': player_names,
    'Current_Season': season
})

In [162]:
df_error_graph_rfr['Actual_Change'] = df_error_graph_rfr['Actual'] - df_error_graph_rfr['Previous_Season_Fantasy']
df_error_graph_rfr['Predicted_Change'] = df_error_graph_rfr['Predicted'] - df_error_graph_rfr['Previous_Season_Fantasy']

In [163]:
# Apply the function
df_error_graph_rfr['Direction_Correct'] = df_error_graph_rfr.apply(change_direction_correct, axis=1)

In [164]:
# Creating the scatter plot
fig = px.scatter(df_error_graph_rfr, x='Actual', y='Predicted', color="Direction_Correct",
                color_discrete_map={True: 'green', False: 'red'}, hover_data=['Player_Name', 'Current_Season',
                                                                              'Previous_Season_Fantasy'])

# Adding line of equality (where actual equals predicted)
fig.add_shape(
    type='line',
    line=dict(dash='dash'),
    x0=df_error_graph_rfr['Actual'].min(),
    y0=df_error_graph_rfr['Actual'].min(),
    x1=df_error_graph_rfr['Actual'].max(),
    y1=df_error_graph_rfr['Actual'].max()
)

# Updating layout
fig.update_layout(
    title='Actual vs Predicted Fantasy Production (RFR - New Metric)',
    xaxis_title='Actual Fantasy Production',
    yaxis_title='Predicted Fantasy Production',
    showlegend=False
)

fig.write_html("../../interactive/WR/stability/season/new-metric-rfr.html")

# Show the plot
fig.show()

In [165]:
total_irrel = len(df_error_graph_rfr.query("Actual <= 101"))
total_irrel_correct = len(df_error_graph_rfr.query("Actual <= 101 and Direction_Correct == True"))
print(f"Total irrelevant: {total_irrel}")
print(f"Total irrelevant direction correct: {total_irrel_correct}")
print(f"Percent direction correct: {round(total_irrel_correct / total_irrel, 3) * 100}%")

print()
total_rel = len(df_error_graph_rfr.query("Actual >= 101"))
total_rel_correct = len(df_error_graph_rfr.query("Actual >= 101 and Direction_Correct == True"))
print(f"Total relevant: {total_rel}")
print(f"Total relevant direction correct: {total_rel_correct}")
print(f"Percent direction correct: {round(total_rel_correct / total_rel, 3) * 100}%")

print()
total = len(df_error_graph_rfr)
total_dir_correct = len(df_error_graph_rfr.query("Direction_Correct == True"))
print(f"Total: {total}")
print(f"Total direction correct: {total_dir_correct}")
print(f"Percent direction correct: {round(total_dir_correct / total, 3) * 100}%")

Total irrelevant: 82
Total irrelevant direction correct: 49
Percent direction correct: 59.8%

Total relevant: 64
Total relevant direction correct: 39
Percent direction correct: 60.9%

Total: 146
Total direction correct: 88
Percent direction correct: 60.3%
