# Imports

In [1]:
import plotly.express as px
import pandas as pd

In [2]:
from helpers import plot_correlation_matrix, create_correlation_matrix

# Read Data

This data set takes into account passing data from 2012 - 2022. This data is unfiltered.

In [3]:
file_path = "../../interactive/QB/stability-passing/df_combined.pkl"

In [4]:
df_combined = pd.read_pickle(file_path)
df_combined.head()

Unnamed: 0,passer_id,passer,season,passing_yards,passing_attempts,yards_per_pass,total_epa,passing_touchdowns,completions,games,...,completion_percentage_mean,completion_percentage_mean_last,expected_completion_percentage_mean,expected_completion_percentage_mean_last,completion_percentage_above_expectation_mean,completion_percentage_above_expectation_mean_last,avg_air_distance_mean,avg_air_distance_mean_last,max_air_distance_mean,max_air_distance_mean_last
11,00-0019596,T.Brady,2018,4355.0,568,7.667253,125.592216,29.0,375.0,16.0,...,65.776652,66.115541,67.763737,64.926853,-1.987085,0.909055,20.552595,21.376685,48.604158,51.114926
12,00-0019596,T.Brady,2019,4057.0,613,6.618271,64.109032,24.0,373.0,16.0,...,60.47486,65.776652,64.023087,67.763737,-3.548227,-1.987085,20.601148,20.552595,46.929331,48.604158
13,00-0019596,T.Brady,2020,4633.0,606,7.645215,154.74057,40.0,401.0,16.0,...,64.754609,60.47486,63.954946,64.023087,0.799663,-3.548227,22.281251,20.601148,52.355251,46.929331
14,00-0019596,T.Brady,2021,5316.0,717,7.414226,178.601776,43.0,485.0,17.0,...,67.314116,64.754609,68.858282,63.954946,-1.544166,0.799663,20.585013,22.281251,50.405201,52.355251
15,00-0019596,T.Brady,2022,4694.0,730,6.430137,85.29232,25.0,490.0,17.0,...,66.485965,67.314116,67.86671,68.858282,-1.380745,-1.544166,20.160353,20.585013,49.659557,50.405201


# Data Manipulations

Keep a copy of our unfiltered data so it is untouched if we need to reset our exploration.

In [5]:
df_temp = df_combined.copy()

In [6]:
filter_query = ""
if filter_query:
    df_temp = df_temp.query(filter_query)

## Per Game Version

In [7]:
df_temp.columns

Index(['passer_id', 'passer', 'season', 'passing_yards', 'passing_attempts',
       'yards_per_pass', 'total_epa', 'passing_touchdowns', 'completions',
       'games', 'fantasy_points', 'age', 'passing_yards_last',
       'passing_attempts_last', 'yards_per_pass_last', 'total_epa_last',
       'passing_touchdowns_last', 'completions_last', 'games_last',
       'fantasy_points_last', 'age_last', 'avg_time_to_throw_mean',
       'avg_time_to_throw_mean_last', 'avg_completed_air_yards_mean',
       'avg_completed_air_yards_mean_last', 'avg_intended_air_yards_mean',
       'avg_intended_air_yards_mean_last', 'avg_air_yards_differential_mean',
       'avg_air_yards_differential_mean_last', 'aggressiveness_mean',
       'aggressiveness_mean_last', 'max_completed_air_distance_mean',
       'max_completed_air_distance_mean_last', 'avg_air_yards_to_sticks_mean',
       'avg_air_yards_to_sticks_mean_last', 'attempts_mean',
       'attempts_mean_last', 'pass_yards_mean', 'pass_yards_mean_last',
 

In [8]:
filter_cols = ["age", "yards_per_pass", "age_last", "yards_per_pass_last", "season", 
               "passer", "passer_id", "games", "games_last"
              ]

per_game_cols = []
for col in list(df_temp.columns):
    if col not in filter_cols and "mean" not in col:
        per_game_cols.append(col)

In [9]:
print(per_game_cols)

['passing_yards', 'passing_attempts', 'total_epa', 'passing_touchdowns', 'completions', 'fantasy_points', 'passing_yards_last', 'passing_attempts_last', 'total_epa_last', 'passing_touchdowns_last', 'completions_last', 'fantasy_points_last']


In [10]:
df_per_game = df_temp
for col in per_game_cols:
    new_col = f"{col}_per_game"
    df_per_game[new_col] = df_temp[col] / df_temp["games"]

In [11]:
df_per_game.head()

Unnamed: 0,passer_id,passer,season,passing_yards,passing_attempts,yards_per_pass,total_epa,passing_touchdowns,completions,games,...,total_epa_per_game,passing_touchdowns_per_game,completions_per_game,fantasy_points_per_game,passing_yards_last_per_game,passing_attempts_last_per_game,total_epa_last_per_game,passing_touchdowns_last_per_game,completions_last_per_game,fantasy_points_last_per_game
11,00-0019596,T.Brady,2018,4355.0,568,7.667253,125.592216,29.0,375.0,16.0,...,7.849514,1.8125,23.4375,17.51875,286.0625,36.3125,12.566085,2.0,24.0625,18.4925
12,00-0019596,T.Brady,2019,4057.0,613,6.618271,64.109032,24.0,373.0,16.0,...,4.006814,1.5,23.3125,16.48,272.1875,35.5,7.849514,1.8125,23.4375,17.51875
13,00-0019596,T.Brady,2020,4633.0,606,7.645215,154.74057,40.0,401.0,16.0,...,9.671286,2.5,25.0625,21.12,253.5625,38.3125,4.006814,1.5,23.3125,16.48
14,00-0019596,T.Brady,2021,5316.0,717,7.414226,178.601776,43.0,485.0,17.0,...,10.505987,2.529412,28.529412,22.043529,272.529412,35.647059,9.102386,2.352941,23.588235,19.877647
15,00-0019596,T.Brady,2022,4694.0,730,6.430137,85.29232,25.0,490.0,17.0,...,5.017195,1.470588,28.823529,15.98,312.705882,42.176471,10.505987,2.529412,28.529412,22.043529


In [12]:
df = df_per_game.copy()

# Generate Graphs

In [13]:
df.head()

Unnamed: 0,passer_id,passer,season,passing_yards,passing_attempts,yards_per_pass,total_epa,passing_touchdowns,completions,games,...,total_epa_per_game,passing_touchdowns_per_game,completions_per_game,fantasy_points_per_game,passing_yards_last_per_game,passing_attempts_last_per_game,total_epa_last_per_game,passing_touchdowns_last_per_game,completions_last_per_game,fantasy_points_last_per_game
11,00-0019596,T.Brady,2018,4355.0,568,7.667253,125.592216,29.0,375.0,16.0,...,7.849514,1.8125,23.4375,17.51875,286.0625,36.3125,12.566085,2.0,24.0625,18.4925
12,00-0019596,T.Brady,2019,4057.0,613,6.618271,64.109032,24.0,373.0,16.0,...,4.006814,1.5,23.3125,16.48,272.1875,35.5,7.849514,1.8125,23.4375,17.51875
13,00-0019596,T.Brady,2020,4633.0,606,7.645215,154.74057,40.0,401.0,16.0,...,9.671286,2.5,25.0625,21.12,253.5625,38.3125,4.006814,1.5,23.3125,16.48
14,00-0019596,T.Brady,2021,5316.0,717,7.414226,178.601776,43.0,485.0,17.0,...,10.505987,2.529412,28.529412,22.043529,272.529412,35.647059,9.102386,2.352941,23.588235,19.877647
15,00-0019596,T.Brady,2022,4694.0,730,6.430137,85.29232,25.0,490.0,17.0,...,5.017195,1.470588,28.823529,15.98,312.705882,42.176471,10.505987,2.529412,28.529412,22.043529


## Correlation Matrix

In [14]:
x = list()
y = list()
for col in list(df.columns)[3:]:
    if "last" in col and ("per_game" in col or "mean" in col):
        y.append(col)
    elif "per_game" in col or "mean" in col:
        x.append(col)

In [32]:
# x = [
#     "aggressiveness_mean", "aggressiveness_mean_last",
#     "fantasy_points_per_game", "fantasy_points_last_per_game"
# ]
x = list(df.columns[3:])

In [33]:
# y = [
#     "aggressiveness_mean", "aggressiveness_mean_last",
#     "fantasy_points_per_game", "fantasy_points_last_per_game"
# ]
y = list(df.columns[3:])

In [35]:
corr_mat, x, y = create_correlation_matrix(df, x, y)
plot_correlation_matrix(corr_mat, x, y, "/per-game/all")

# Filtered DFs

In [90]:
def filter_df(df, query, x, y, title=""):
    # create filtered df
    df_filter = df.copy()
    df_filter[["games", "games_last"]] = df_temp[["games", "games_last"]]
    
    if query:
        df_filter = df_filter.query(query)
    
    # create and show corr_mat
    corr_mat, x, y = create_correlation_matrix(df_filter, x, y)
    plot_correlation_matrix(corr_mat, x, y, title=f"per-game/{title}")
    
    # visualize highest correlation pairing
    series = corr_mat.max().iloc[0:]
    temp_arr = list(series)

    max_series_value = series.max()
    i = temp_arr.index(max_series_value)
    col = list(corr_mat.columns)[i]
    col_prev = col.replace("_per_game", "_last_per_game")
    print(col, col_prev)
    
    fig = px.scatter(df_filter, x=col_prev, y=col,
                 hover_data=["passer", "season", "age"]
                )
    if title:
        path = f"../../interactive/QB/stability-passing/per-game/{title}-scatter-{col}.html"
        fig.write_html(path)
    fig.show()
    
    return df_filter

## 12 Games 12 Games Last

In [None]:
query = "games >= 12 and games_last >= 12"
filter_df(df, query, x, y)

## 12 Games; 12 Game Last  |  150 passing yards per game; 150 passing yards last per game

In [None]:
query = "games >= 12 and games_last >= 12 and passing_yards_per_game >= 150 and passing_yards_last_per_game >= 150"
filter_df(df, query, x, y)

## Young

In [None]:
query = "games >= 12 and games_last >= 12 \
and passing_yards_per_game >= 150 and passing_yards_last_per_game >= 150 \
and age <= 30"
filter_df(df, query, x, y)

## Old

In [None]:
query = "games >= 12 and games_last >= 12 \
and passing_yards_per_game >= 150 and passing_yards_last_per_game >= 150 \
and age_last >= 30"
filter_df(df, query, x, y)

## Exploration

In [None]:
query = "games >= 12 and games_last >= 12 \
and passing_yards_per_game >= 200 and passing_yards_last_per_game >= 200"
filter_df(df, query, x, y)

# Further Exploration

In [177]:
query = "games >= 12 and games_last >= 12 and passing_yards_per_game >= 150 and passing_yards_last_per_game >= 150"
explore_df = filter_df(df, query, x, y)

completions completions


In [92]:
col_dicts = {
    'passing_yards': 'passing_yards_last',
    'passing_attempts': 'passing_attempts_last',
    'yards_per_pass': 'yards_per_pass_last',
    'total_epa': 'total_epa_last',
    'passing_touchdowns': 'passing_touchdowns_last',
    'completions': 'completions_last',
    'games': 'games_last',
    'fantasy_points': 'fantasy_points_last',
    'age': 'age_last',
    'avg_time_to_throw_mean': 'avg_time_to_throw_mean_last',
    'avg_completed_air_yards_mean': 'avg_completed_air_yards_mean_last',
    'avg_intended_air_yards_mean': 'avg_intended_air_yards_mean_last',
    'avg_air_yards_differential_mean': 'avg_air_yards_differential_mean_last',
    'aggressiveness_mean': 'aggressiveness_mean_last',
    'max_completed_air_distance_mean': 'max_completed_air_distance_mean_last',
    'avg_air_yards_to_sticks_mean': 'avg_air_yards_to_sticks_mean_last',
    'attempts_mean': 'attempts_mean_last',
    'pass_yards_mean': 'pass_yards_mean_last',
    'passer_rating_mean': 'passer_rating_mean_last',
    'completion_percentage_mean': 'completion_percentage_mean_last',
    'expected_completion_percentage_mean': 'expected_completion_percentage_mean_last',
    'completion_percentage_above_expectation_mean': 'completion_percentage_above_expectation_mean_last',
    'avg_air_distance_mean': 'avg_air_distance_mean_last',
    'max_air_distance_mean': 'max_air_distance_mean_last'
}

In [132]:
corr_list = list()
compare_col = "fantasy_points"
for col in list(explore_df.columns[3:]):
    corr_value = -100
    corr_value_fantasy = -100
    
    if col != compare_col and "fantasy_points" not in col:
        temp_corr_df = explore_df[[col, compare_col]].corr()    
        corr_value_fantasy = temp_corr_df[col].iloc[1]
        
        if col in col_dicts:
            temp_corr_df = explore_df[[col, col_dicts[col]]].corr()
            corr_value = temp_corr_df[col].iloc[1]
        
        
        corr_list.append((col, corr_value_fantasy, corr_value))

In [133]:
corr_list_sorted = sorted(corr_list, key = lambda x: x[1], reverse=True)

In [134]:
for item in corr_list_sorted:
    if item[1] > 0.4 and item[2] > 0.4:
        print(f"{item[0]}  |  {item[1]}  |  {item[2]}")

passing_yards  |  0.6709649226938393  |  0.5219369792008731
completions  |  0.5260358476397307  |  0.5509124166790051
passing_attempts  |  0.46046787932670324  |  0.5216158035305932


In [210]:
x = [
    "passing_yards",
    "max_air_distance_mean",
    "completions",
    "passing_attempts"
]

In [211]:
y = [
    "fantasy_points_per_game",
    "fantasy_points",
]

In [212]:
query = "age_last >= 30"

In [213]:
corr_mat, x, y = create_correlation_matrix(df.query(query), x, y)
plot_correlation_matrix(corr_mat, x, y, "per-game/old-best-stat-corr-fantasy")

In [217]:
x = [
    "passing_yards",
    "max_air_distance_mean",
    "completions",
    "passing_attempts"
]

In [218]:
y = [
    "passing_yards_last",
    "max_air_distance_mean_last",
    "completions_last",
    "passing_attempts_last"
]

In [219]:
corr_mat, x, y = create_correlation_matrix(df.query(query), x, y)
plot_correlation_matrix(corr_mat, x, y)

# Specific Player Checks

In [173]:
x = [
    "fantasy_points_per_game",
    "fantasy_points",
    "passing_yards",
    "max_air_distance_mean",
    "completions",
    "passing_attempts"
]

In [174]:
y = [
    "fantasy_points_last_per_game",
    "fantasy_points_last",
    "passing_yards_last",
    "max_air_distance_mean_last",
    "completions_last",
    "passing_attempts_last"
]

In [180]:
query = "passer == 'P.Mahomes'"
player_df = filter_df(explore_df, query, x, y)

max_air_distance_mean max_air_distance_mean
