# Imports

In [117]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

# Grabbing Data

In [118]:
df_all = pd.read_pickle("../interactive/df_all_2005_2023.pkl")

In [119]:
season = 2023
position = 'QB'

In [120]:
df_review = df_all.query(f"season == {season} and position == '{position}' and games >= 5")

### Verifying Data

In [121]:
assert(df_review["position"].unique()[0] == position)
assert(df_review["season"].unique()[0] == season)
assert(df_review["games"].max() == 17)

# Line Graph

In [122]:
col_to_graph = "fantasy_points"

In [123]:
df_sorted = df_review.sort_values(by=col_to_graph, ascending=True).copy()

In [124]:
per_game = False

In [125]:
if per_game:
    df_sorted[f"{col_to_graph}_per_game"] = df_sorted[col_to_graph] / df_sorted["games"]
    col_to_graph += "_per_game"
    df_sorted.sort_values(by=col_to_graph, ascending=True, inplace=True)

In [126]:
df_sorted.reset_index(inplace=True)

In [127]:
# Calculate the reverse index for each row (starting from the bottom)
reverse_index = (len(df_sorted) - 1) - df_sorted.index

# Calculate the group for each row based on the reverse index
group = reverse_index // 12

In [128]:
# Map each group to a color
colors = {0: 'Tier 1', 1: 'Tier 2', 2: 'Tier 3', 3: 'Tier 4', 4: 'Tier 5', 5: 'Tier 6'}
df_sorted['Color'] = group.map(colors)

In [132]:
# scatter plot
start_index = -30
fig = px.scatter(df_sorted[start_index:], x="player_name", y=col_to_graph, color='Color')

# Add a horizontal line at qb1 line
qb1_line_h = df_sorted[col_to_graph].iloc[-12]
fig.add_shape(type="line",
              x0=0, x1=1, y0=qb1_line_h, y1=qb1_line_h,
              line=dict(color="black", width=2),
              xref="paper", yref="y")

# add a vertical line at qb1
qb1_line_v = df_sorted["player_name"].iloc[-12]
fig.add_shape(type="line",
              x0=qb1_line_v, x1=qb1_line_v, y0=df_sorted[col_to_graph].iloc[start_index - 10], 
              y1=df_sorted[col_to_graph].max()* 1.1,
              line=dict(color="black", width=2),
              xref="x", yref="y")

# Add a horizontal line at qb2 line
qb2_line_h = df_sorted[col_to_graph].iloc[-24]
fig.add_shape(type="line",
              x0=0, x1=1, y0=qb2_line_h, y1=qb2_line_h,
              line=dict(color="black", width=2),
              xref="paper", yref="y")

# add a vertical line at qb2
qb2_line_v = df_sorted["player_name"].iloc[-24]
fig.add_shape(type="line",
              x0=qb2_line_v, x1=qb2_line_v, y0=df_sorted[col_to_graph].iloc[start_index - 10], 
              y1=df_sorted[col_to_graph].max()* 1.1,
              line=dict(color="black", width=2),
              xref="x", yref="y")

fig.update_traces(marker=dict(size=12))

fig.update_layout(
    title='2023 QB Fantasy Points',
    xaxis_title="Player Names",
    yaxis_title='Fantasy Points (Season)',
    legend_title='QB Tiers',
)

# fig.write_html("../interactive-2.0/QB/2023-review/fantasy-points-season.html")
fig.show()

In [136]:
df_sorted[["fantasy_points", "player_name"]][-30:]

Unnamed: 0,fantasy_points,player_name
30,110.52,Taysom Hill
31,119.94,Zach Wilson
32,125.82,Aidan O'Connell
33,143.44,Jake Browning
34,146.36,Kyler Murray
35,147.16,Joe Burrow
36,149.74,Kirk Cousins
37,156.38,Bryce Young
38,180.14,Desmond Ridder
39,196.2,Gardner Minshew


In [143]:
df_sorted["fantasy_points"].iloc[-1] - df_sorted["fantasy_points"].iloc[-12:].mean()

86.93333333333328

In [144]:
df_sorted["fantasy_points"].iloc[-12:].mean()

307.7066666666667

In [145]:
86.93333333333328 / 17

5.113725490196075