# Imports

In [1]:
import plotly.graph_objects as go
import plotly.express as px

In [2]:
import itertools
import numpy as np
import pandas as pd

# Read Data

In [3]:
df_og = pd.read_pickle("./rushing-mlr.pkl")

# Multivariate Linear Regression

In [4]:
df = df_og.copy()

In [5]:
df.head()

Unnamed: 0,rusher_id,rusher,season,rushing_yards,yards_per_carry,carries,fumbles,rushing_touchdowns,times_tackled_for_loss,total_epa,epa_per_play,successful_plays,games,fantasy_points,fantasy_points_next,age,position,depth_team,round,pick
0,00-0007091,M.Hasselbeck,2012,0.0,0.0,1.0,1.0,0.0,0.0,-1.360733,-1.360733,0.0,8.0,76.48,3.0,36.0,QB,1.861111,6.0,187.0
1,00-0007091,M.Hasselbeck,2014,-5.0,-5.0,1.0,0.0,0.0,1.0,-1.280506,-1.280506,0.0,4.0,16.94,91.1,38.0,QB,2.0,6.0,187.0
3,00-0010346,P.Manning,2012,-2.0,-1.0,2.0,0.0,0.0,1.0,-0.817786,-0.408893,0.0,16.0,310.96,409.98,36.0,QB,1.0,1.0,1.0
4,00-0010346,P.Manning,2013,-9.0,-0.9,10.0,5.0,1.0,2.0,-18.549723,-1.854972,1.0,16.0,409.98,312.68,37.0,QB,1.0,1.0,1.0
5,00-0010346,P.Manning,2014,-13.0,-1.857143,7.0,3.0,0.0,2.0,-12.568753,-1.795536,1.0,16.0,312.68,91.36,38.0,QB,1.0,1.0,1.0


# Custom Graph for YPC Stability

## DF Manipulation for YPC Stability Graphs

In [6]:
df_rbs = df.query("position == 'RB'").copy()

In [7]:
df_now = df_rbs.copy()
df_last = df_rbs.copy()

In [8]:
rename_dict = {}
for col in list(df_last.columns[3:]):
    rename_dict[col] = f"{col}_last"

In [9]:
df_last.rename(columns=rename_dict, inplace=True)

In [10]:
df_last["season"] += 1

In [11]:
df_lag = df_now.merge(df_last, how='inner', on=['rusher_id', 'rusher', 'season'])

In [12]:
df_lag["sum_carries"] = df_lag['carries'] + df_lag['carries_last']

In [13]:
df_lag["min_games_played"] = df_lag[['games', 'games_last']].min(axis=1)

## Lines Created from DF Lag

In [None]:
grouped_data = {}
for threshold in games_played_thresholds:
    # Filter based on minimum games played
    df_filtered = df_lag[df_lag['min_games_played'] >= threshold]
    
    # Group by sum of carries and calculate mean YPC
    group = df_filtered.groupby('sum_carries')['yards_per_carry'].mean().reset_index()
    
    # Store the grouped data
    grouped_data[threshold] = group

## Graphing

In [None]:
base_path = "../../interactive/RB/stability-rushing/ypc-stability/ypc-v-carries"

In [None]:
# Thresholds for minimum games played
games_played_thresholds = range(1, 17 + 2)

In [None]:
# Initialize a Plotly figure
fig = go.Figure()

for threshold in games_played_thresholds:
    if threshold in [2, 8, 13, 16]:
        # Filter based on minimum games played
        df_filtered = df_lag[df_lag['min_games_played'] >= threshold]

        # Group by sum of carries and calculate mean YPC
        group = df_filtered.groupby('sum_carries')['yards_per_carry'].mean().reset_index()

        # Add a trace (line) to the figure for each threshold
        fig.add_trace(go.Scatter(x=group['sum_carries'], y=group['yards_per_carry'], mode='lines', name=f'Min Games: {threshold}'))

In [None]:
# Update layout
fig.update_layout(
    title='YPC vs Sum of Carries for Different Minimum Games Played',
    xaxis_title='Sum of Carries (Year T + Year T-1)',
    yaxis_title='Average Yards Per Carry (YPC)',
    legend_title='Minimum Games Played',
    # template='plotly_dark'  # or choose another template like 'plotly', 'plotly_white', etc.
)

# save the figure
fig.write_html(f"{base_path}-2-8-13-16.html")

# Show the figure
fig.show()

# YPC by Age and Fantasy Performance of Back

In [14]:
df_lag.head()

Unnamed: 0,rusher_id,rusher,season,rushing_yards,yards_per_carry,carries,fumbles,rushing_touchdowns,times_tackled_for_loss,total_epa,...,games_last,fantasy_points_last,fantasy_points_next_last,age_last,position_last,depth_team_last,round_last,pick_last,sum_carries,min_games_played
0,00-0022736,S.Jackson,2013,543.0,3.458599,157.0,0.0,6.0,15.0,-21.998011,...,16.0,162.6,115.4,29.0,RB,1.0,1.0,24.0,415.0,12.0
1,00-0022736,S.Jackson,2014,707.0,3.721053,190.0,0.0,6.0,16.0,-16.424412,...,12.0,115.4,121.5,30.0,RB,1.0,1.0,24.0,347.0,12.0
2,00-0023437,R.Brown,2013,157.0,3.488889,45.0,0.0,1.0,2.0,-1.680793,...,14.0,59.1,27.7,30.0,RB,2.166667,1.0,2.0,91.0,12.0
3,00-0023500,F.Gore,2013,1128.0,4.086957,276.0,3.0,9.0,36.0,-32.703735,...,16.0,196.8,174.9,29.0,RB,1.0,3.0,65.0,534.0,16.0
4,00-0023500,F.Gore,2014,1106.0,4.337255,255.0,2.0,4.0,26.0,-24.917688,...,16.0,174.9,147.7,30.0,RB,1.0,3.0,65.0,531.0,16.0


In [15]:
base_path = "../../interactive/RB/stability-rushing/ypc-stability/ypc-v-age"

## Raw Version

In [24]:
df_lag.head()

Unnamed: 0,rusher_id,rusher,season,rushing_yards,yards_per_carry,carries,fumbles,rushing_touchdowns,times_tackled_for_loss,total_epa,...,games_last,fantasy_points_last,fantasy_points_next_last,age_last,position_last,depth_team_last,round_last,pick_last,sum_carries,min_games_played
0,00-0022736,S.Jackson,2013,543.0,3.458599,157.0,0.0,6.0,15.0,-21.998011,...,16.0,162.6,115.4,29.0,RB,1.0,1.0,24.0,415.0,12.0
1,00-0022736,S.Jackson,2014,707.0,3.721053,190.0,0.0,6.0,16.0,-16.424412,...,12.0,115.4,121.5,30.0,RB,1.0,1.0,24.0,347.0,12.0
2,00-0023437,R.Brown,2013,157.0,3.488889,45.0,0.0,1.0,2.0,-1.680793,...,14.0,59.1,27.7,30.0,RB,2.166667,1.0,2.0,91.0,12.0
3,00-0023500,F.Gore,2013,1128.0,4.086957,276.0,3.0,9.0,36.0,-32.703735,...,16.0,196.8,174.9,29.0,RB,1.0,3.0,65.0,534.0,16.0
4,00-0023500,F.Gore,2014,1106.0,4.337255,255.0,2.0,4.0,26.0,-24.917688,...,16.0,174.9,147.7,30.0,RB,1.0,3.0,65.0,531.0,16.0


In [29]:
def get_ypc_by_age(df, query, sample_size=0):
    if query:
        df = df.query(query).copy()
        
    df_ypc_by_age = df \
        .groupby("age") \
        .aggregate({
            "yards_per_carry": ["mean"],
            "fantasy_points": ["mean"],
            "rusher": ["count"],
            "carries": ["mean"]
    })
    
    df_ypc_by_age.columns = list(map("_".join, df_ypc_by_age))
    df_ypc_by_age.reset_index(inplace=True)
    
    df_ypc_by_age.rename(columns={
        "yards_per_carry_mean": "average_yards_per_carry",
        "fantasy_points_mean": "average_fantasy_points",
        "rusher_count": "sample_size"
    }, inplace=True)
    
    if sample_size:
        df_ypc_by_age = df_ypc_by_age.query(f"sample_size >= {sample_size}").copy()
    
    norm = df_ypc_by_age["average_fantasy_points"] / df_ypc_by_age["average_fantasy_points"].max()
    df_ypc_by_age["average_fantasy_points_normalized"] = norm * df_ypc_by_age["average_yards_per_carry"].max()
    
    norm = df_ypc_by_age["carries_mean"] / df_ypc_by_age["carries_mean"].max()
    df_ypc_by_age["average_carries_normalized"] = norm * df_ypc_by_age["average_yards_per_carry"].max()
    
    return df_ypc_by_age

In [30]:
df_ypc_by_age = get_ypc_by_age(df_lag, "games >= 10", 10)

In [37]:
fig = px.line(df_ypc_by_age, x='age', y=['average_yards_per_carry', 'average_fantasy_points_normalized', 
                                        'average_carries_normalized'],
             hover_data=["sample_size"])

# Update layout
fig.update_layout(
    title='Performance Metrics vs Age',
    xaxis_title='Age',
    yaxis_title='Performance Metrics',
    legend_title='Games Played',
)

# save the figure
fig.write_html(f"{base_path}-10-games-10-samples-carries.html")

# Show the figure
fig.show()

## Normalized Version

In [None]:
df_ypc_by_age_normalized = df_lag \
    .groupby(["age", "games"]) \
    .aggregate({
        "yards_per_carry": ["mean"],
        "fantasy_points": ["mean"],
        "rusher": ["count"]
})

In [None]:
df_ypc_by_age_normalized.columns = list(map("_".join, df_ypc_by_age_normalized))
df_ypc_by_age_normalized.reset_index(inplace=True)

In [None]:
df_ypc_by_age_normalized.rename(columns={
    "yards_per_carry_mean": "average_yards_per_carry",
    "fantasy_points_mean": "average_fantasy_points",
    "rusher_count": "sample_size"
}, inplace=True)

In [None]:
df_ypc_by_age_normalized.query("games >= 10")

## Bar Graph

In [None]:
fig = px.bar(df_ypc_by_age_normalized, x="age", y="average_yards_per_carry",
             color="games", title="Long-Form Input", barmode='group',
             hover_data=["average_fantasy_points", "sample_size"])
fig.show()

## Line Graph

In [None]:
df_ypc_by_age_normalized = df_ypc_by_age_normalized.sort_values(by="games")

In [None]:
fig = px.line(df_ypc_by_age_normalized, x='age', y='average_yards_per_carry', color='games',
              hover_data=['average_fantasy_points', 'sample_size'])

In [None]:
# Update layout
fig.update_layout(
    title='YPC vs Age for Different Number of Games Played',
    xaxis_title='Age',
    yaxis_title='Average Yards Per Carry (YPC)',
    legend_title='Games Played',
)

# Show the figure
fig.show()