# Imports

In [51]:
import plotly.express as px
import pandas as pd
import numpy as np

In [2]:
import sys
import os
sys.path.append(os.path.abspath('..'))
from helpers_graph import plot_correlation_matrix, create_correlation_matrix, plot_scatter
from helpers_df import create_lag_df

# Grab Data

In [3]:
df_rec = pd.read_pickle("./wr-simple-data-2012-2022.pkl")

In [87]:
df_rec.columns

Index(['player_id', 'season', 'season_type', 'receptions', 'targets',
       'receiving_yards', 'receiving_tds', 'receiving_fumbles',
       'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
       'wopr_x', 'fantasy_points', 'fantasy_points_ppr', 'games', 'tgt_sh',
       'ay_sh', 'yac_sh', 'wopr_y', 'ry_sh', 'rtd_sh', 'rfd_sh', 'rtdfd_sh',
       'dom', 'w8dom', 'yptmpa', 'ppr_sh', 'age', 'position', 'player_name'],
      dtype='object')

In [4]:
df_depth_chart = pd.read_pickle("../../interactive/df_dc_mean_2012_2022.pkl")

In [5]:
df_depth_chart["player_id"] = df_depth_chart["gsis_id"]

In [12]:
# df_pfr_rec = pd.read_pickle("../../interactive/df_pfr_rec_2018_2022.pkl")

In [13]:
# df_pfr_rec.columns

In [14]:
df_ngs_rec = pd.read_pickle("../../interactive/df_ngs_rec_2016_2022.pkl")

In [25]:
df_ngs_needed = df_ngs_rec[["season", "player_gsis_id", "avg_cushion", "avg_separation", "avg_intended_air_yards",
           "percent_share_of_intended_air_yards", "catch_percentage", 'avg_yac', 'avg_expected_yac',
            'avg_yac_above_expectation']] \
            .copy() \
            .groupby(["season", "player_gsis_id"]) \
            .agg({
                "avg_cushion": ["mean"],
                "avg_separation": ["mean"],
                "avg_intended_air_yards": ["mean"],
                "percent_share_of_intended_air_yards": ["mean"],
                "catch_percentage": ["mean"],
                'avg_yac': ["mean"],
                'avg_expected_yac': ["mean"],
                'avg_yac_above_expectation': ["mean"]
            })

In [27]:
df_ngs_needed.columns = list(map("_".join, df_ngs_needed.columns))
df_ngs_needed.reset_index(inplace=True)

In [29]:
df_ngs_needed.rename(columns={"player_gsis_id": "player_id"}, inplace=True)

# Creating Merged DF with All Stas

In [30]:
df_merge_one = pd.merge(df_rec, df_depth_chart[["player_id", "season", "depth_team_mean"]],
                    on=["player_id", "season"], how="left")

In [37]:
df_merge_one.dropna(inplace=True)
df_merge_one = df_merge_one.query("season >= 2016")

In [44]:
df_merge = pd.merge(df_merge_one, df_ngs_needed, on=["player_id", "season"], how="left")

In [46]:
df_merge.dropna(inplace=True)

In [48]:
df_wr = df_merge.query("position == 'WR'").copy()

# Create Lag Version of Data

In [49]:
df_lag = create_lag_df(df_wr)

# Prepping Axes for Correlation Matrix

In [50]:
print([col for col in list(df_lag.columns) if "last" not in col])

['player_id', 'season', 'season_type', 'receptions', 'targets', 'receiving_yards', 'receiving_tds', 'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards', 'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa', 'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share', 'wopr_x', 'fantasy_points', 'fantasy_points_ppr', 'games', 'tgt_sh', 'ay_sh', 'yac_sh', 'wopr_y', 'ry_sh', 'rtd_sh', 'rfd_sh', 'rtdfd_sh', 'dom', 'w8dom', 'yptmpa', 'ppr_sh', 'age', 'position', 'player_name', 'depth_team_mean', 'avg_cushion_mean', 'avg_separation_mean', 'avg_intended_air_yards_mean', 'percent_share_of_intended_air_yards_mean', 'catch_percentage_mean', 'avg_yac_mean', 'avg_expected_yac_mean', 'avg_yac_above_expectation_mean']


In [63]:
df_search = df_lag.copy()

In [64]:
# query = ""
query = "games >= 10 and games_last >= 10 and \
    depth_team_mean <= 2 and depth_team_mean_last <= 2"
if query:
    df_search = df_search.query(query)

In [65]:
numeric_cols = df_search.select_dtypes(include=[np.number]).columns

In [79]:
df_corr = pd.DataFrame(columns=["Stat", "Correlation-Fantasy-Points", "Correlation-Self"])
for col in list(df_search.columns)[3:]:
    if "last" not in col and col in numeric_cols and "fantasy" not in col:
        temp_corr_mat_fp = df_search[[col, "fantasy_points"]].corr()
        corr_value_fp = temp_corr_mat_fp[col].iloc[1]
        
        temp_corr_mat_self = df_search[[col, f"{col}_last"]].corr()
        corr_value_self = temp_corr_mat_self[col].iloc[1]
        
        row = (col, corr_value_fp, corr_value_self)
        row_df = pd.DataFrame([row], columns=df_corr.columns)
        df_corr = pd.concat([df_corr, row_df], ignore_index=True)


The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.



In [84]:
fig = px.bar(df_corr.sort_values(by="Correlation-Self", ascending=False), x="Stat",
             y=["Correlation-Fantasy-Points", "Correlation-Self"],
                barmode='group')
# Update layout
fig.update_layout(
    title='Stat Correlation with Itself and Fantasy Points (Advanced Stats)',
    xaxis_title='Stat',
    yaxis_title='Correlation Value',
    legend_title='Correlation Type'
)

fig.write_html("../../interactive/WR/stability/season/correlation-bar-advanced-stats.html")

fig.show()

In [85]:
df_high_corrs = df_corr[(df_corr["Correlation-Fantasy-Points"] >= 0.5) & (df_corr["Correlation-Self"] >= 0.5)] \
                .sort_values(by="Correlation-Self", ascending=False)

In [86]:
fig = px.bar(df_high_corrs, x="Stat", y=["Correlation-Fantasy-Points", "Correlation-Self"],
                barmode='group')

# Update layout
fig.update_layout(
    title='Stat Correlation with Itself and Fantasy Points (Advanced Stats)',
    xaxis_title='Stat',
    yaxis_title='Correlation Value',
    legend_title='Correlation Type'
)

fig.write_html("../../interactive/WR/stability/season/correlation-bar-advanced-stats-focused.html")

fig.show()

# Choosing X and Y for Corr Mat

In [None]:
x = ['receptions', 'targets', 'receiving_yards', 'receiving_tds', 'fantasy_points', 'fantasy_points_ppr']

In [None]:
y = []
for col in x:
    y.append(f"{col}_last")

# Graphing

In [None]:
save_path = "../../interactive/WR/stability/season"

In [None]:
df = df_lag.copy()

In [None]:
query = "games >= 10 and games_last >= 10 and \
    depth_team_mean <= 2 and depth_team_mean_last <= 2"
if query:
    df_filter = df.query(query).copy()
else:
    df_filter = df.copy()

In [None]:
len(df_filter)

## Correlation Heat Map

In [None]:
title = "" # "Basic Receiving Stat Correlations (Young Starters)"
path = ""
if title:
    path = f"{save_path}/{title.replace(' ', '-')}"

corr_mat, x, y = create_correlation_matrix(df_filter, x, y)
plot_correlation_matrix(corr_mat, x, y, path=path, title=title)

print(path)

## Scatter Plot

In [None]:
hover_data = ["player_name", "season", "age"]

In [None]:
y_value = "receptions"
x_value = f"{y_value}_last"

In [None]:
y_fig_value_list = [w.capitalize() for w in y_value.split("_")]
y_fig_value = " ".join(y_fig_value_list)

In [None]:
fig_title_scatter = f"{y_fig_value} vs. {y_fig_value} Last"
path_scatter = f"{path}-{fig_title_scatter.replace(' ', '-')}"

In [None]:
# plot_scatter(df_filter, x_value, y_value, hover_data=hover_data, path=path_scatter, fig_title=fig_title_scatter)
plot_scatter(df_filter, x_value, y_value, hover_data=hover_data)

# Line Graph of Samples

In [None]:
df_samples = df_filter.groupby(["age"]).aggregate({"player_id": "count", "depth_team_mean": "mean"})

In [None]:
df_samples.columns = list(map("".join, df_samples.columns))
df_samples.reset_index(inplace=True)

In [None]:
df_samples.rename(columns={"player_id": "Samples", 
                           "depth_team_mean": "Average Depth Chart Position",
                           "age": "Age"
                          }, inplace=True)

In [None]:
fig = px.line(df_samples, x='Age',
              y=['Samples'],
              hover_data=["Average Depth Chart Position"]
             )

# Update layout
fig.update_layout(
    title='Samples Size vs. Age',
    xaxis_title='Age',
    yaxis_title='Sample Size',
)

# save the figure
fig.write_html(f"{save_path}/samples-v-age.html")

# Show the figure
fig.show()

In [None]:
fig = px.line(df_samples, x='Age',
              y=['Average Depth Chart Position'],
              hover_data=["Samples"]
             )

# Update layout
fig.update_layout(
    title='Average Depth Chart Position vs. Age',
    xaxis_title='Age',
    yaxis_title='Average Depth Chart Position ',
)

# save the figure
fig.write_html(f"{save_path}/depth-chart-v-age.html")

# Show the figure
fig.show()