# Imports

In [29]:
import plotly.express as px
import pandas as pd
import numpy as np
import math

In [30]:
import sys
import os
sys.path.append(os.path.abspath('..'))
from helpers_df import create_lag_df, create_train_and_test_sets

# Grab Data

### Roster Data

In [31]:
df_roster = pd.read_pickle("../../interactive/df_roster_2012_2022.pkl")

In [32]:
df_roster_seasonal = df_roster.groupby(["position", "player_name", "player_id"]).agg({"age": ["mean"]})

In [33]:
df_roster_seasonal.columns = list(map("_".join, df_roster_seasonal.columns))
df_roster_seasonal.reset_index(inplace=True)

In [34]:
df_roster_seasonal["age"] = df_roster_seasonal["age_mean"].dropna().apply(math.floor)
df_roster_seasonal.drop(columns=["age_mean"], inplace=True)

### Seasonal Data

In [35]:
df_seasonal = pd.read_pickle("../../interactive/df_seasonal_2012_2022.pkl")

### Merged Data

In [36]:
df_merge = df_seasonal.merge(df_roster_seasonal, on="player_id", how="left")

### Running Back Data

In [37]:
df_rbs = df_merge.copy().query("season_type == 'REG' and position == 'RB'").drop_duplicates().dropna()

In [38]:
df_rbs["total_yards"] = df_rbs["rushing_yards"] + df_rbs["receiving_yards"]
df_rbs["total_tds"] = df_rbs["rushing_tds"] + df_rbs["receiving_tds"]
df_rbs["touches"] = df_rbs["targets"] + df_rbs["carries"]
df_rbs["total_first_downs"] = df_rbs["rushing_first_downs"] + df_rbs["receiving_first_downs"]

In [39]:
df_rbs.drop(columns=['completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'special_teams_tds'], inplace=True)

# Normalize Data

In [40]:
numeric_columns = df_rbs.select_dtypes(include=[np.number]).columns
for col in numeric_columns:
    if "season" not in col:
        df_rbs[f"{col}_normalized"] = df_rbs[col] / df_rbs[col].max()

In [41]:
df_rb_normalized = df_rbs.copy()

In [42]:
df_rb_normalized.dropna(inplace=True)

# Lag Version

In [43]:
df_lag = create_lag_df(df_rb_normalized)

In [44]:
new_metric = ['rushing_fumbles_lost',
   'target_share',
   'tgt_sh',
   'dom',
   'yptmpa',
   'ppr_sh']
for m in new_metric:
    print(f"<i>{m}</i>,")

<i>rushing_fumbles_lost</i>,
<i>target_share</i>,
<i>tgt_sh</i>,
<i>dom</i>,
<i>yptmpa</i>,
<i>ppr_sh</i>,


In [45]:
# new_metric = ['games_last', 'wopr_y_last', 'dom_last', 'age_last']
# new_metric = ['carries_last', 'rushing_tds_last', 'rtd_sh_last']
# new_metric = ['wopr_y_last', 'tgt_sh_last', 'receptions_last', 'carries_last']

new_metric = ['rushing_fumbles_lost',
   'target_share',
   'tgt_sh',
   'dom',
   'yptmpa',
   'ppr_sh']

a = np.zeros(len(df_lag))
for col in new_metric:
    a += df_lag[col.replace("_last", "_normalized_last")]
#     a += df_rb_normalized[col]
    
df_lag["new_metric_last"] = a / len(new_metric)

In [46]:
df_lag[["new_metric_last", "fantasy_points_ppr"]].corr()

Unnamed: 0,new_metric_last,fantasy_points_ppr
new_metric_last,1.0,0.857489
fantasy_points_ppr,0.857489,1.0


In [47]:
df_prep = df_lag.query("games >= 10 and games_last >= 10 and \
    total_yards >= 800 and total_yards_last >= 800").copy()

In [48]:
df_prep[["new_metric_last", "fantasy_points_ppr"]].corr()

Unnamed: 0,new_metric_last,fantasy_points_ppr
new_metric_last,1.0,0.623919
fantasy_points_ppr,0.623919,1.0


## Prep Data For Model

In [None]:
x_cols = ["new_metric_last"]

In [None]:
inference_col = "fantasy_points_ppr"

In [None]:
X_train, X_test, y_train, y_test = create_train_and_test_sets(df_lag,
                                                              x_cols=x_cols,
                                                              inference_col = inference_col
                                                             )

# Normalization For Models

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Exploring Models

## MLR

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
mlr = LinearRegression().fit(X_train_scaled, y_train)

In [None]:
score = round(mlr.score(X_test_scaled, y_test), 3)

In [None]:
score

## Evaluation

In [25]:
def change_direction_correct(row):
    if (row['Actual_Change'] >= 0 and row['Predicted_Change'] >= 0) or (row['Actual_Change'] < 0 and row['Predicted_Change'] < 0):
        return True
    else:
        return False

In [26]:
# previous_season_points = df_prep.loc[y_test.index, 'fantasy_points_ppr_last']
# player_names = df_prep.loc[y_test.index, 'player_name']
# season = df_prep.loc[y_test.index, 'season']

previous_season_points = df_lag.loc[y_test.index, 'fantasy_points_ppr_last']
player_names = df_lag.loc[y_test.index, 'player_name']
season = df_lag.loc[y_test.index, 'season']

NameError: name 'y_test' is not defined

In [None]:
df_error_graph_mlr = pd.DataFrame({
    'Actual': y_test,
    'Predicted': mlr.predict(X_test_scaled),
    'Previous_Season_Fantasy': previous_season_points,
    'Player_Name': player_names,
    'Current_Season': season
})

In [27]:
df_error_graph_mlr['Actual_Change'] = df_error_graph_mlr['Actual'] - df_error_graph_mlr['Previous_Season_Fantasy']
df_error_graph_mlr['Predicted_Change'] = df_error_graph_mlr['Predicted'] - df_error_graph_mlr['Previous_Season_Fantasy']
df_error_graph_mlr['Direction_Correct'] = df_error_graph_mlr.apply(change_direction_correct, axis=1)

NameError: name 'df_error_graph_mlr' is not defined

In [28]:
# Creating the scatter plot
fig = px.scatter(df_error_graph_mlr, x='Actual', y='Predicted', color="Direction_Correct",
                color_discrete_map={True: 'green', False: 'red'}, hover_data=['Player_Name', 'Current_Season',
                                                                              'Previous_Season_Fantasy'])

# Adding line of equality (where actual equals predicted)
fig.add_shape(
    type='line',
    line=dict(dash='dash'),
    x0=df_error_graph_mlr['Actual'].min(),
    y0=df_error_graph_mlr['Actual'].min(),
    x1=df_error_graph_mlr['Actual'].max(),
    y1=df_error_graph_mlr['Actual'].max()
)

# Updating layout
fig.update_layout(
    title='Actual vs Predicted Fantasy Production (MLR Generalized - New Metric)',
    xaxis_title='Actual Fantasy Production',
    yaxis_title='Predicted Fantasy Production',
    showlegend=False
)

# fig.write_html("../../interactive/RB/fantasy-metrics/new-metric-mlr-generalized.html")

# Show the plot
fig.show()

NameError: name 'df_error_graph_mlr' is not defined

In [None]:
print()
total = len(df_error_graph_mlr)
total_dir_correct = len(df_error_graph_mlr.query("Direction_Correct == True"))
print(f"Total: {total}")
print(f"Total direction correct: {total_dir_correct}")
print(f"Percent direction correct: {round(total_dir_correct / total, 3) * 100}%")

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
start = 1
stop = 15 + 1

In [None]:
best_score = -10
best_model = None

In [None]:
for depth in range(start, stop):
    rfr = RandomForestRegressor(max_depth=depth, random_state=0)
    rfr.fit(X_train_scaled, y_train)
    
    score = rfr.score(X_test_scaled, y_test)
    if score > best_score:
        print(f"New best score: {score}")
        best_score = score
        best_model = rfr
        
print("\nDone :)")

In [None]:
df_error_graph_rfr = pd.DataFrame({
    'Actual': y_test,
    'Predicted': rfr.predict(X_test_scaled),
    'Previous_Season_Fantasy': previous_season_points,
    'Player_Name': player_names,
    'Current_Season': season
})

In [None]:
df_error_graph_rfr['Actual_Change'] = df_error_graph_rfr['Actual'] - df_error_graph_rfr['Previous_Season_Fantasy']
df_error_graph_rfr['Predicted_Change'] = df_error_graph_rfr['Predicted'] - df_error_graph_rfr['Previous_Season_Fantasy']

In [None]:
# Apply the function
df_error_graph_rfr['Direction_Correct'] = df_error_graph_rfr.apply(change_direction_correct, axis=1)

In [None]:
# Creating the scatter plot
fig = px.scatter(df_error_graph_rfr, x='Actual', y='Predicted', color="Direction_Correct",
                color_discrete_map={True: 'green', False: 'red'}, hover_data=['Player_Name', 'Current_Season',
                                                                              'Previous_Season_Fantasy'])

# Adding line of equality (where actual equals predicted)
fig.add_shape(
    type='line',
    line=dict(dash='dash'),
    x0=df_error_graph_rfr['Actual'].min(),
    y0=df_error_graph_rfr['Actual'].min(),
    x1=df_error_graph_rfr['Actual'].max(),
    y1=df_error_graph_rfr['Actual'].max()
)

# Updating layout
fig.update_layout(
    title='Actual vs Predicted Fantasy Production (RFR Generalized - New Metric)',
    xaxis_title='Actual Fantasy Production',
    yaxis_title='Predicted Fantasy Production',
    showlegend=False
)

# fig.write_html("../../interactive/RB/fantasy-metrics/new-metric-rfr-generalized.html")

# Show the plot
fig.show()

In [None]:
print()
total = len(df_error_graph_rfr)
total_dir_correct = len(df_error_graph_rfr.query("Direction_Correct == True"))
print(f"Total: {total}")
print(f"Total direction correct: {total_dir_correct}")
print(f"Percent direction correct: {round(total_dir_correct / total, 3) * 100}%")