# Imports

In [1]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

# Grabbing Data

In [2]:
df_all = pd.read_pickle("../interactive/df_all_2005_2023.pkl")

In [3]:
position = 'QB'

In [5]:
df_review = df_all.query(f"position == '{position}' and games >= 5")

### Verifying Data

In [7]:
assert(df_review["position"].unique()[0] == position)
assert(df_review["games"].max() == 17)

# Grouping By Year

In [11]:
df_review[["season", "fantasy_points"]].head()

Unnamed: 0,season,fantasy_points
15,2006,38.28
16,2007,10.58
29,2006,71.36
47,2006,46.6
59,2006,100.96


In [8]:
df_review.columns

Index(['player_id', 'season', 'season_type', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
       'wopr_x', 'special_teams_tds', 'fantasy_points', 'fantasy_points_ppr',
       'games', 'tgt_sh', 'ay_sh', 'yac_sh', 'wopr_y', 'ry_sh', 'rtd_sh',
       'rfd_sh', 'rtdfd_sh', 'dom', '

# Average and Median Tier 1

In [30]:
df_fantasy_by_season = df_review.copy().groupby("season")["fantasy_points"].nlargest(12).reset_index()

In [31]:
df_fantasy_by_season.drop(columns="level_1", inplace=True)

In [44]:
df_avg_fp_by_season = df_fantasy_by_season \
    .groupby("season")["fantasy_points"] \
    .agg(['mean', 'median']) \
    .reset_index()

In [47]:
df_avg_fp_by_season.head()

Unnamed: 0,season,mean,median
0,2006,229.433333,218.72
1,2007,256.491667,249.44
2,2008,250.601667,249.5
3,2009,270.848333,272.07
4,2010,266.528333,263.82


# 12th Highest Value

In [48]:
def get_12th_highest(series):
    sorted_series = series.sort_values(ascending=False)
    if len(sorted_series) >= 12:
        return sorted_series.iloc[11]  # iloc is 0-based, so 11 is the 12th item
    else:
        return None

In [49]:
df_12th_highest_by_season = df_review.copy().groupby('season')['fantasy_points'] \
    .apply(get_12th_highest).reset_index()

In [50]:
df_12th_highest_by_season.head()

Unnamed: 0,season,fantasy_points
0,2006,190.2
1,2007,205.86
2,2008,204.02
3,2009,227.84
4,2010,229.14


# Combining the Two

In [53]:
merged_df = df_avg_fp_by_season.merge(df_12th_highest_by_season, on="season", how="left")

In [58]:
merged_df.rename(columns=
                {
                    "season": "Season",
                    "mean": "Average QB1 Points",
                    "median": "Median QB 1 Points",
                    "fantasy_points": "QB12 Points"
                }, inplace=True)

In [60]:
merged_df.head()

Unnamed: 0,Season,Average QB1 Points,Median QB 1 Points,QB12 Points
0,2006,229.433333,218.72,190.2
1,2007,256.491667,249.44,205.86
2,2008,250.601667,249.5,204.02
3,2009,270.848333,272.07,227.84
4,2010,266.528333,263.82,229.14


# Bar Graph

In [73]:
fig = px.bar(merged_df, x="Season", y=list(merged_df.columns)[1:], barmode='group')

# Add a horizontal line at threshold for low QB
threshold = 285
fig.add_shape(type="line",
              x0=0, x1=1, y0=threshold, y1=threshold,
              line=dict(color="black", width=2),
              xref="paper", yref="y")

fig.update_layout(
    title='Seasonal Average and Median QB1 and QB12',
    xaxis_title="Season",
    yaxis_title='Fantasy Points (Season)',
    legend_title='Quarterback Finishes',
)

fig.write_html("../interactive-2.0/QB/2023-review/threshold-bar-qb1.html")

fig.show()

In [72]:
ls ../

2022 Data.ipynb
AdvancedStats.ipynb
[34mClusterNotebooks[m[m/
[34mFantasyData[m[m/
Graph Data.ipynb
Interactive Clustering Advanced.ipynb
NFLData.ipynb
Plotly For Quarterback Passing Stability.ipynb
Principal Component Analysis.ipynb
README.md
[34mSeasonReviewNotebooks[m[m/
[34mSoSNotebooks[m[m/
[34m__pycache__[m[m/
[34menv[m[m/
[34mimages[m[m/
[34minteractive[m[m/
[34minteractive-2.0[m[m/
main.py
[34mmodels[m[m/
[34mnfldata[m[m/
[34mnfldataexploration[m[m/
[34mpca[m[m/
[34mplotly[m[m/
[34mutils[m[m/


In [71]:
125 / 4

31.25

In [None]:
3.38 per game
44 sacks

In [74]:
44/3.38

13.017751479289942

In [75]:
143 / 7

20.428571428571427