# Imports

In [1]:
import plotly.express as px
import pandas as pd

In [2]:
import sys
import os
sys.path.append(os.path.abspath('..'))
from helpers import plot_correlation_matrix, create_correlation_matrix, filter_df

# Read Data

This data set takes into account passing data from 2012 - 2022. This data is unfiltered.

In [3]:
df_depth_chart = pd.read_pickle("../../interactive/df_dc_mean_2012_2022.pkl")

In [4]:
df_draft_picks = pd.read_pickle("../../interactive/df_draft_picks_1980_2022.pkl")

In [5]:
df_roster = pd.read_pickle("../../interactive/df_roster_2012_2022.pkl")

In [6]:
df_seasonal = pd.read_pickle("../../interactive/df_seasonal_2012_2022.pkl")

In [7]:
df_pbp = pd.read_pickle("../../interactive/df_pbp_2012_2022.pkl")

In [8]:
df_pbp.head()

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,offense_formation,offense_personnel,defenders_in_box,defense_personnel,number_of_pass_rushers,players_on_play,offense_players,defense_players,n_offense,n_defense
0,1.0,2012_01_ATL_KC,2012090908,KC,ATL,REG,1,,,,...,,,,,,,,,,
1,35.0,2012_01_ATL_KC,2012090908,KC,ATL,REG,1,ATL,away,KC,...,,,,,,,,,,
2,53.0,2012_01_ATL_KC,2012090908,KC,ATL,REG,1,ATL,away,KC,...,,,,,,,,,,
3,74.0,2012_01_ATL_KC,2012090908,KC,ATL,REG,1,ATL,away,KC,...,,,,,,,,,,
4,95.0,2012_01_ATL_KC,2012090908,KC,ATL,REG,1,ATL,away,KC,...,,,,,,,,,,


# Create Data Frame for Runs

In [9]:
df_runs = df_pbp.query("play_type == 'run' and season_type == 'REG'")

In [10]:
df_runs.head()

Unnamed: 0,play_id,game_id,old_game_id,home_team,away_team,season_type,week,posteam,posteam_type,defteam,...,offense_formation,offense_personnel,defenders_in_box,defense_personnel,number_of_pass_rushers,players_on_play,offense_players,defense_players,n_offense,n_defense
2,53.0,2012_01_ATL_KC,2012090908,KC,ATL,REG,1,ATL,away,KC,...,,,,,,,,,,
3,74.0,2012_01_ATL_KC,2012090908,KC,ATL,REG,1,ATL,away,KC,...,,,,,,,,,,
7,165.0,2012_01_ATL_KC,2012090908,KC,ATL,REG,1,ATL,away,KC,...,,,,,,,,,,
11,256.0,2012_01_ATL_KC,2012090908,KC,ATL,REG,1,ATL,away,KC,...,,,,,,,,,,
16,359.0,2012_01_ATL_KC,2012090908,KC,ATL,REG,1,KC,home,ATL,...,,,,,,,,,,


# Create DF for Rushing Stats by Season

In [11]:
df_rushing_yards_by_season = df_runs \
    .groupby(["rusher_id", "rusher", "season"]) \
    .agg({
        "rushing_yards": ["sum", "mean"],
        "rush_attempt": ["sum"],
        "fumble": ["sum"],
        "rush_touchdown": ["sum"],
        "tackled_for_loss": ["sum"],
        "epa": ["sum", "mean"],
        "success": ["sum"]
    })

In [12]:
df_rushing_yards_by_season.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rushing_yards,rushing_yards,rush_attempt,fumble,rush_touchdown,tackled_for_loss,epa,epa,success
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,sum,mean,sum,sum,sum,sum,sum,mean,sum
rusher_id,rusher,season,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2
00-0007091,M.Hasselbeck,2012,0.0,0.0,1.0,1.0,0.0,0.0,-1.360733,-1.360733,0.0
00-0007091,M.Hasselbeck,2014,-5.0,-5.0,1.0,0.0,0.0,1.0,-1.280506,-1.280506,0.0
00-0007091,M.Hasselbeck,2015,4.0,4.0,1.0,0.0,0.0,0.0,-0.035615,-0.035615,0.0
00-0010346,P.Manning,2012,-2.0,-1.0,2.0,0.0,0.0,1.0,-0.817786,-0.408893,0.0
00-0010346,P.Manning,2013,-9.0,-0.9,10.0,5.0,1.0,2.0,-18.549723,-1.854972,1.0


In [13]:
df_rushing_yards_by_season.columns = list(map("_".join, df_rushing_yards_by_season.columns))
df_rushing_yards_by_season.reset_index(inplace=True)

In [14]:
df_rushing_yards_by_season.dropna(inplace=True)
df_rushing_yards_by_season_cleaned = df_rushing_yards_by_season.copy()

In [15]:
for i, row in df_rushing_yards_by_season_cleaned.copy().iterrows():
    r_id = row["rusher_id"]
    season = row["season"]
    
    seasonal_row = df_seasonal.query(f"player_id == '{r_id}' & season == {season} & season_type == 'REG'")
    roster_row = df_roster.query(f"player_id == '{r_id}' & season == {season}")
    depth_chart_row = df_depth_chart.query(f"gsis_id == '{r_id}' & season == {season}")
    draft_pick_row = df_draft_picks.query(f"gsis_id == '{r_id}'")
    
    seasonal_row_next = df_seasonal.query(f"player_id == '{r_id}' & season == {season + 1} & season_type == 'REG'")
    
    if seasonal_row.empty or draft_pick_row.empty \
        or depth_chart_row.empty or roster_row.empty or seasonal_row_next.empty:
        pass
#         print("seasonal row:")
#         print(seasonal_row)
#         print()
        
#         print("draft pick row:")
#         print(draft_pick_row)
#         print()
        
#         print("depth chart row:")
#         print(depth_chart_row)
#         print()
        
#         print("roster row")
#         print(roster_row)
#         print()
        
#         print("seasonal next row:")
#         print(seasonal_row_next)
#         print()
#         break
    else:
        # from seasonal df
        df_rushing_yards_by_season_cleaned.at[i, "games"] = seasonal_row["games"].iloc[0]
        df_rushing_yards_by_season_cleaned.at[i, "fantasy_points"] = seasonal_row["fantasy_points"].iloc[0]
        df_rushing_yards_by_season_cleaned.at[i, "fantasy_points_next"] = seasonal_row_next["fantasy_points"].iloc[0]

        # from roster df
        df_rushing_yards_by_season_cleaned.at[i, "age"] = roster_row["age"].iloc[0]
        df_rushing_yards_by_season_cleaned.at[i, "position"] = roster_row["position"].iloc[0]
        
        # from depth chart df
        df_rushing_yards_by_season_cleaned.at[i, "depth_team"] = depth_chart_row["depth_team_mean"].iloc[0]
        
        # from draft pick df
        df_rushing_yards_by_season_cleaned.at[i, "round"] = draft_pick_row["round"].iloc[0]
        df_rushing_yards_by_season_cleaned.at[i, "pick"] = draft_pick_row["pick"].iloc[0]

  df_rushing_yards_by_season_cleaned.at[i, "position"] = roster_row["position"].iloc[0]


In [16]:
df_rushing_yards_by_season_cleaned.dropna(inplace=True)
df_rushing_yards_by_season_cleaned.head()

Unnamed: 0,rusher_id,rusher,season,rushing_yards_sum,rushing_yards_mean,rush_attempt_sum,fumble_sum,rush_touchdown_sum,tackled_for_loss_sum,epa_sum,epa_mean,success_sum,games,fantasy_points,fantasy_points_next,age,position,depth_team,round,pick
0,00-0007091,M.Hasselbeck,2012,0.0,0.0,1.0,1.0,0.0,0.0,-1.360733,-1.360733,0.0,8.0,76.48,3.0,36.0,QB,1.861111,6.0,187.0
1,00-0007091,M.Hasselbeck,2014,-5.0,-5.0,1.0,0.0,0.0,1.0,-1.280506,-1.280506,0.0,4.0,16.94,91.1,38.0,QB,2.0,6.0,187.0
3,00-0010346,P.Manning,2012,-2.0,-1.0,2.0,0.0,0.0,1.0,-0.817786,-0.408893,0.0,16.0,310.96,409.98,36.0,QB,1.0,1.0,1.0
4,00-0010346,P.Manning,2013,-9.0,-0.9,10.0,5.0,1.0,2.0,-18.549723,-1.854972,1.0,16.0,409.98,312.68,37.0,QB,1.0,1.0,1.0
5,00-0010346,P.Manning,2014,-13.0,-1.857143,7.0,3.0,0.0,2.0,-12.568753,-1.795536,1.0,16.0,312.68,91.36,38.0,QB,1.0,1.0,1.0


# Data Manipulations

Keep a copy of our unfiltered data so it is untouched if we need to reset our exploration.

In [47]:
df = df_rushing_yards_by_season_cleaned.copy()

Rename columns to labels that make more sense.

In [48]:
df.rename(columns={
    'rushing_yards_sum': "rushing_yards",
    'rushing_yards_mean': "yards_per_carry",
    'rush_attempt_sum': "carries",
    'fumble_sum': "fumbles",
    'rush_touchdown_sum': "rushing_touchdowns",
    'tackled_for_loss_sum': "times_tackled_for_loss",
    'epa_sum': "total_epa",
    'epa_mean': "epa_per_play",
    'success_sum': "successful_plays",
    'rushing_yards_sum_last': "rushing_yards_last",
    'rushing_yards_mean_last': "yards_per_carry_last",
    'rush_attempt_sum_last': "carries_last",
    'fumble_sum_last': "fumbles_last",
    'rush_touchdown_sum_last': "rushing_touchdowns_last",
    'tackled_for_loss_sum_last': "times_tackled_for_loss_last",
    'epa_sum_last': "total_epa_last",
    'epa_mean_last': "epa_per_play_last",
    'success_sum_last': "successful_plays_last"
}, inplace=True)

# Save Data for Future Use

In [49]:
df.to_pickle("./rushing-mlr.pkl")