# Imports

In [2]:
import plotly.express as px
import pandas as pd
import numpy as np
import math

In [3]:
import sys
import os
sys.path.append(os.path.abspath('..'))
from helpers_df import create_lag_df, create_train_and_test_sets

# Grab Data

### Roster Data

In [4]:
df_roster = pd.read_pickle("../../interactive/df_roster_2012_2022.pkl")

In [5]:
df_roster_seasonal = df_roster.groupby(["position", "player_name", "player_id"]).agg({"age": ["mean"]})

In [6]:
df_roster_seasonal.columns = list(map("_".join, df_roster_seasonal.columns))
df_roster_seasonal.reset_index(inplace=True)

In [7]:
df_roster_seasonal["age"] = df_roster_seasonal["age_mean"].dropna().apply(math.floor)
df_roster_seasonal.drop(columns=["age_mean"], inplace=True)

### Seasonal Data

In [8]:
df_seasonal = pd.read_pickle("../../interactive/df_seasonal_2012_2022.pkl")

### Merged Data

In [9]:
df_merge = df_seasonal.merge(df_roster_seasonal, on="player_id", how="left")

### Running Back Data

In [10]:
df_rbs = df_merge.copy().query("season_type == 'REG' and position == 'RB'").drop_duplicates().dropna()

In [11]:
df_rbs["total_yards"] = df_rbs["rushing_yards"] + df_rbs["receiving_yards"]
df_rbs["total_tds"] = df_rbs["rushing_tds"] + df_rbs["receiving_tds"]
df_rbs["touches"] = df_rbs["targets"] + df_rbs["carries"]
df_rbs["total_first_downs"] = df_rbs["rushing_first_downs"] + df_rbs["receiving_first_downs"]

In [12]:
df_rbs.drop(columns=['completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'special_teams_tds'], inplace=True)

# Normalize Data

In [13]:
numeric_columns = df_rbs.select_dtypes(include=[np.number]).columns
for col in numeric_columns:
    if "season" not in col:
        df_rbs[f"{col}_normalized"] = df_rbs[col] / df_rbs[col].max()

In [14]:
df_rb_normalized = df_rbs.copy()

In [15]:
new_metric = ['rushing_fumbles_lost',
   'target_share',
   'tgt_sh',
   'dom',
   'yptmpa',
   'ppr_sh']
a = np.zeros(len(df_rb_normalized))
for col in new_metric:
    a += df_rb_normalized[col]
    
df_rb_normalized["new_metric"] = a / len(new_metric)

In [16]:
df_rb_normalized.dropna(inplace=True)

# Lag Version

In [100]:
df_lag = create_lag_df(df_rb_normalized)

In [101]:
df_prep = df_lag.query("games >= 10 and games_last >= 10 and \
    total_yards >= 800 and total_yards_last >= 800").copy()

In [102]:
for i, row in df_prep.iterrows():
    fp = row["fantasy_points_ppr"]
    fp_last = row["fantasy_points_ppr_last"]
    change = fp - fp_last

    # increase
    if change > 0:
        change = 1
    
    # no change
    else:
        change = 0
    
    df_prep.at[i, "change"] = change

# Prep Data For Model

In [103]:
x_cols = ["new_metric_last"]

In [104]:
inference_col = "change"

In [105]:
X_train, X_test, y_train, y_test = create_train_and_test_sets(df_prep,
                                                              x_cols=x_cols,
                                                              inference_col = inference_col
                                                             )

```
Length of train set: 141
Length of test set: 36
Length of data set: 177
```


# Normalization For Models

In [106]:
from sklearn.preprocessing import StandardScaler

In [107]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Exploring Models

## Random Forest

In [108]:
from sklearn.ensemble import RandomForestClassifier

In [109]:
start = 1
stop = 15 + 1

In [110]:
best_score = -10
best_model = None

In [111]:
for depth in range(start, stop):
    rfr = RandomForestClassifier(max_depth=depth, random_state=0)
    rfr.fit(X_train_scaled, y_train)
    
    score = rfr.score(X_test_scaled, y_test)
    if score > best_score:
        print(f"New best score: {score}")
        best_score = score
        best_model = rfr
        
print("\nDone :)")

New best score: 0.6666666666666666

Done :)


In [112]:
df_error_graph_rfr = pd.DataFrame({
    'Actual_Change': y_test,
    'Predicted_Change': rfr.predict(X_test_scaled),
    'Player_Name': player_names,
    'Current_Season': season
})

In [113]:
# Apply the function
df_error_graph_rfr['Direction_Correct'] = df_error_graph_rfr.apply(change_direction_correct, axis=1)

In [114]:
# Creating the scatter plot
fig = px.scatter(df_error_graph_rfr, x='Actual_Change', y='Predicted_Change', color="Direction_Correct",
                color_discrete_map={True: 'green', False: 'red'}, hover_data=['Player_Name', 'Current_Season',
                                                                              ])

# Adding line of equality (where actual equals predicted)
fig.add_shape(
    type='line',
    line=dict(dash='dash'),
    x0=df_error_graph_rfr['Actual_Change'].min(),
    y0=df_error_graph_rfr['Actual_Change'].min(),
    x1=df_error_graph_rfr['Actual_Change'].max(),
    y1=df_error_graph_rfr['Actual_Change'].max()
)

# Updating layout
fig.update_layout(
    title='Actual vs Predicted Fantasy Production (RFR - New Metric)',
    xaxis_title='Actual Fantasy Production',
    yaxis_title='Predicted Fantasy Production',
    showlegend=False
)

# fig.write_html("../../interactive/WR/stability/season/new-metric-rfr.html")

# Show the plot
fig.show()

In [115]:
df_error_graph_rfr.query("Actual_Change != Predicted_Change")

Unnamed: 0,Actual_Change,Predicted_Change,Player_Name,Current_Season,Direction_Correct
420,0.0,1.0,C.J. Anderson,2015,False
327,1.0,0.0,Alfred Morris,2014,False
892,1.0,0.0,James Conner,2021,False
846,0.0,1.0,Ezekiel Elliott,2017,False
709,0.0,1.0,Melvin Gordon,2019,False
736,0.0,1.0,Todd Gurley,2020,False
603,0.0,1.0,Devonta Freeman,2016,False
164,1.0,0.0,Rashad Jennings,2015,False
400,0.0,1.0,Lamar Miller,2018,False
851,0.0,1.0,Ezekiel Elliott,2022,False


In [116]:
print()
total = len(df_error_graph_rfr)
total_dir_correct = len(df_error_graph_rfr.query("Direction_Correct == True"))
print(f"Total: {total}")
print(f"Total direction correct: {total_dir_correct}")
print(f"Percent direction correct: {round(total_dir_correct / total, 3) * 100}%")


Total: 36
Total direction correct: 18
Percent direction correct: 50.0%
