# QB Filter For Success

Was your sack rate 8.7% or above?
- sack rate > 8.7% => bust
- sack rate < 8.7% => not sure yet, go to filter 2

If not, was your adjusted yards gained per pass attempt 6.5 or below? (because of the ''if not'' we are now at filter 2)
- AY/A < 6.5 => bust
- AY/A > 6.5 => hit

# Imports

In [1]:
import numpy as np
import pandas as pd
import nfl_data_py as nfl

# Grabbing Data

In [2]:
years = range(1999, 2023 + 1)

In [3]:
df_pbp = pd.read_pickle("./df_pbp_99_23.pkl")

In [4]:
df_roster = pd.read_pickle("./df_roster_99_23.pkl")

In [5]:
df_fantasy = pd.read_pickle("../../DataManagementNotebooks/basic_99_23.pkl")

# Filter Data

In [6]:
df_pass = df_pbp.query("season_type == 'REG'")

In [7]:
df = df_pass \
    .groupby(["season", "passer_player_id"]) \
    .agg({"sack": "sum", "passing_yards": "sum", "interception": "sum", "pass_touchdown": "sum", 
          "complete_pass": "sum", "incomplete_pass": "sum", "pass_attempt": "sum"})

In [8]:
clean_df = df.reset_index() \
    .query("pass_attempt >= 1") \
    .rename(columns={"pass_attempt": "drop_backs", "passer_player_id": "player_id"})

In [9]:
clean_df["pass_attempts"] = clean_df["complete_pass"] + clean_df["incomplete_pass"] + clean_df["interception"]

In [10]:
clean_df = clean_df.query("pass_attempts >= 100")

**Note:** snaps and passing attempts are the same as we have filtered the play by play data to only have passing plays.

# Adding Two Heuristics

1. Sack rate: caluclated as `sacks / snaps`.
2. Adjusted yards gained per pass attempt (or ay_a): `(Passing Yards + 20(Passing Touchdowns) – 45(Interceptions Thrown))/(Passing Attempts)`

Remember, snaps and passing attempts are synonymous for our dataset

In [11]:
clean_df["sack_rate"] = clean_df["sack"] / clean_df["drop_backs"]

In [12]:
num = (clean_df["passing_yards"] + 20 * (clean_df["pass_touchdown"]) - 45 * (clean_df["interception"]))
denom = clean_df["pass_attempts"]
clean_df["ay_a"] = num / denom

In [13]:
clean_df.head()

Unnamed: 0,season,player_id,sack,passing_yards,interception,pass_touchdown,complete_pass,incomplete_pass,drop_backs,pass_attempts,sack_rate,ay_a
0,1999,00-0000104,19.0,2964.0,12.0,17.0,263.0,166.0,460.0,441.0,0.041304,6.267574
1,1999,00-0000722,33.0,2136.0,8.0,17.0,169.0,143.0,353.0,320.0,0.093484,6.6125
2,1999,00-0000865,36.0,1957.0,7.0,13.0,151.0,111.0,307.0,269.0,0.117264,7.070632
5,1999,00-0001218,50.0,4436.0,15.0,36.0,343.0,211.0,621.0,569.0,0.080515,7.87522
6,1999,00-0001335,30.0,2670.0,12.0,16.0,215.0,162.0,421.0,389.0,0.071259,6.298201


### Rookie DF

In [14]:
min_season_df = clean_df.groupby("player_id")['season'].min().reset_index()
min_season_df.rename(columns={'season': 'min_season'}, inplace=True)

# Merge on 'player_id' to add the 'min_season' to each row of 'clean_df'
temp_df = pd.merge(clean_df, min_season_df, on="player_id")

# Filter rows where 'season' matches 'min_season'
temp_rook_df = temp_df[temp_df['season'] == temp_df['min_season']]

In [15]:
rook_df = temp_rook_df.query("drop_backs > 5")

In [16]:
rook_df.head()

Unnamed: 0,season,player_id,sack,passing_yards,interception,pass_touchdown,complete_pass,incomplete_pass,drop_backs,pass_attempts,sack_rate,ay_a,min_season
0,1999,00-0000104,19.0,2964.0,12.0,17.0,263.0,166.0,460.0,441.0,0.041304,6.267574,1999
1,1999,00-0000722,33.0,2136.0,8.0,17.0,169.0,143.0,353.0,320.0,0.093484,6.6125,1999
2,1999,00-0000865,36.0,1957.0,7.0,13.0,151.0,111.0,307.0,269.0,0.117264,7.070632,1999
3,1999,00-0001218,50.0,4436.0,15.0,36.0,343.0,211.0,621.0,569.0,0.080515,7.87522,1999
4,1999,00-0001335,30.0,2670.0,12.0,16.0,215.0,162.0,421.0,389.0,0.071259,6.298201,1999


# Magic Time

## Sack Rate - Filter 1

In [17]:
low_sack_rate = rook_df.query("sack_rate < 0.087")

In [18]:
low_sack_rate.head(10)

Unnamed: 0,season,player_id,sack,passing_yards,interception,pass_touchdown,complete_pass,incomplete_pass,drop_backs,pass_attempts,sack_rate,ay_a,min_season
0,1999,00-0000104,19.0,2964.0,12.0,17.0,263.0,166.0,460.0,441.0,0.041304,6.267574,1999
3,1999,00-0001218,50.0,4436.0,15.0,36.0,343.0,211.0,621.0,569.0,0.080515,7.87522,1999
4,1999,00-0001335,30.0,2670.0,12.0,16.0,215.0,162.0,421.0,389.0,0.071259,6.298201,1999
7,1999,00-0002110,29.0,3060.0,9.0,14.0,259.0,173.0,473.0,441.0,0.061311,6.655329,1999
10,1999,00-0003292,16.0,2318.0,11.0,8.0,190.0,130.0,347.0,331.0,0.04611,5.990937,1999
12,1999,00-0003761,15.0,1475.0,9.0,8.0,124.0,66.0,214.0,199.0,0.070093,6.180904,1999
14,1999,00-0005106,35.0,4091.0,23.0,22.0,341.0,230.0,631.0,594.0,0.055468,5.885522,1999
15,1999,00-0005363,26.0,3171.0,16.0,19.0,264.0,198.0,504.0,478.0,0.051587,5.922594,1999
17,1999,00-0005741,49.0,3840.0,14.0,24.0,304.0,197.0,566.0,515.0,0.086572,7.165049,1999
18,1999,00-0005755,15.0,2544.0,11.0,11.0,225.0,137.0,390.0,373.0,0.038462,6.08311,1999


# Grabbing Filtered Info

In [19]:
df_rost_clean = df_roster.query("position == 'QB'")[["season", "player_name", "player_id", "depth_chart_position", 
                                     "college", "height", "weight"]]

## Those Filtered Out

In [20]:
bust_one = rook_df.query("sack_rate >= 0.087")

In [21]:
bust_one_clean = pd.merge(bust_one, df_rost_clean, on=["season", "player_id"], how="left")

In [22]:
nfl_qbs_rounds_combined = {
    "Tim Couch": 1, "Donovan McNabb": 1, "Akili Smith": 1,
    "Daunte Culpepper": 1, "Cade McNown": 1, "Mike Vick": 1,
    "David Carr": 1, "Chad Hutchinson": "Unknown", "Mike McMahon": 5,
    "Anthony Wright": 0, "Jesse Palmer": 4, "Josh McCown": 3,
    "Craig Krenzel": 5, "Ben Roethlisberger": 1, "Brooks Bollinger": 6,
    "J.P. Losman": 1, "Alex Smith": 1, "Charlie Frye": 3,
    "Andrew Walter": 3, "Luke McCown": 4, "Kellen Clemens": 2,
    "J.T. O'Sullivan": 6, "Keith Null": 6, "Troy Smith": 5,
    "Jimmy Clausen": 2, "Colt McCoy": 3, "Caleb Hanie": 0,
    "Tim Tebow": 1, "Blaine Gabbert": 1, "Christian Ponder": 1,
    "T.J. Yates": 5, "Matt Flynn": 7, "Thaddeus Lewis": 0,
    "Terrelle Pryor": 3, "Mike Glennon": 3, "Geno Smith": 2,
    "Charlie Whitehurst": 3, "Austin Davis": 0, "Teddy Bridgewater": 1,
    "Zach Mettenberger": 6, "Blake Bortles": 1, "A.J. McCarron": 5,
    "Marcus Mariota": 1, "Bryce Petty": 4, "Cody Kessler": 3,
    "Jared Goff": 1, "Jacoby Brissett": 3, "Josh Rosen": 1,
    "Dwayne Haskins": 1, "Taysom Hill": 0, "Justin Fields": 1,
    "Zach Wilson": 1, "Sam Ehlinger": 6, "Jake Browning": 0,
    "Sam Howell": 5, "Bailey Zappe": 6, "Tommy DeVito": 0,
    "Bryce Young": 1, "Will Levis": 2,
}

In [23]:
bust_one_clean["round"] = bust_one_clean["player_name"].map(nfl_qbs_rounds_combined).fillna(-1)

In [24]:
bust_one_clean.query("round == 1")[["player_name", "sack_rate", "ay_a"]].reset_index()

Unnamed: 0,index,player_name,sack_rate,ay_a
0,6,Tim Couch,0.123077,5.445844
1,12,Donovan McNabb,0.114286,3.671296
2,14,Akili Smith,0.110465,3.782895
3,21,Mike Vick,0.156716,6.106194
4,24,David Carr,0.145594,4.722973
5,29,Ben Roethlisberger,0.092025,8.359322
6,31,J.P. Losman,0.098425,4.978166
7,32,Alex Smith,0.149485,2.424242
8,43,Tim Tebow,0.108197,6.269372
9,44,Blaine Gabbert,0.0883,4.743341


In [25]:
len(bust_one_clean.query("round == 1"))

20

In [26]:
df_fail_one = bust_one_clean \
    .copy() \
    .query("round == 1")[["player_id", "player_name", "sack_rate", "ay_a"]] \
    .reset_index()

In [27]:
df_fail_one.head()

Unnamed: 0,index,player_id,player_name,sack_rate,ay_a
0,6,00-0003535,Tim Couch,0.123077,5.445844
1,12,00-0011022,Donovan McNabb,0.114286,3.671296
2,14,00-0015082,Akili Smith,0.110465,3.782895
3,21,00-0020245,Mike Vick,0.156716,6.106194
4,24,00-0020608,David Carr,0.145594,4.722973


Model failures:
- Donovan McNabb 	0.114286 	3.671296
- Mike Vick 	0.156716 	6.106194
- Ben Roethlisberger 	0.092025 	8.359322
- Alex Smith 	0.149485 	2.424242
- Jared Goff 	0.112554 	4.263415

5 out of 20 or 25% chance of still going against the odds. This falls to 4 of 20 (20% chance) if we remove Vick's first season, since he only played two games.

## AY_A - Filter 2

In [28]:
df_success = low_sack_rate.query("ay_a > 6.5")

In [29]:
df_success.head(10)

Unnamed: 0,season,player_id,sack,passing_yards,interception,pass_touchdown,complete_pass,incomplete_pass,drop_backs,pass_attempts,sack_rate,ay_a,min_season
3,1999,00-0001218,50.0,4436.0,15.0,36.0,343.0,211.0,621.0,569.0,0.080515,7.87522,1999
7,1999,00-0002110,29.0,3060.0,9.0,14.0,259.0,173.0,473.0,441.0,0.061311,6.655329,1999
17,1999,00-0005741,49.0,3840.0,14.0,24.0,304.0,197.0,566.0,515.0,0.086572,7.165049,1999
19,1999,00-0005885,28.0,2816.0,12.0,23.0,191.0,125.0,358.0,328.0,0.078212,8.341463,1999
27,1999,00-0008442,29.0,4005.0,13.0,24.0,316.0,190.0,549.0,519.0,0.052823,7.514451,1999
32,1999,00-0010346,14.0,4135.0,15.0,26.0,331.0,186.0,548.0,532.0,0.025547,7.481203,1999
40,1999,00-0012261,9.0,1382.0,5.0,10.0,116.0,74.0,204.0,195.0,0.044118,6.958974,1999
48,1999,00-0017200,26.0,4044.0,11.0,38.0,297.0,147.0,482.0,455.0,0.053942,9.470329,1999
55,2000,00-0001823,15.0,1514.0,6.0,9.0,113.0,75.0,210.0,194.0,0.071429,7.340206,2000
60,2000,00-0003739,34.0,3937.0,16.0,33.0,297.0,161.0,510.0,474.0,0.066667,8.179325,2000


## Those filtered out by Sacks

In [30]:
df_bust_two = low_sack_rate.query("ay_a <= 6.5")

In [31]:
df_bust_two_clean = pd.merge(df_bust_two, df_rost_clean, on=["season", "player_id"], how="left")

In [32]:
nfl_qbs_rounds_post_1998 = {
    "Drew Brees": 2, "Joey Harrington": 1, "Patrick Ramsey": 1,
    "Kyle Boller": 1, "Byron Leftwich": 1, "Carson Palmer": 1,
    "Eli Manning": 1, "Chris Simms": 3, "Kyle Orton": 4,
    "Ryan Fitzpatrick": 7, "Jason Campbell": 1, "Derek Anderson": 6,
    "Vince Young": 1, "Matt Leinart": 1, "Bruce Gradkowski": 6,
    "Tarvaris Jackson": 2, "Brodie Croyle": 3, "John Beck": 2,
    "Trent Edwards": 3, "JaMarcus Russell": 1, "Joe Flacco": 1,
    "Brady Quinn": 1, "Chad Henne": 2, "Josh Johnson": 5,
    "Matthew Stafford": 1, "Mark Sanchez": 1, "Josh Freeman": 1,
    "Kevin Kolb": 2, "Sam Bradford": 1, "Andy Dalton": 2,
    "Jake Locker": 1, "Ryan Lindley": 6, "Nick Foles": 3,
    "Andrew Luck": 1, "Brandon Weeden": 1, "Ryan Tannehill": 1,
    "Kirk Cousins": 4, "E.J. Manuel": 1, "Derek Carr": 2,
    "Ryan Mallett": 3, "Johnny Manziel": 1, "Matt Barkley": 4,
    "Carson Wentz": 1, "Mitchell Trubisky": 1, "DeShone Kizer": 2,
    "C.J. Beathard": 3, "Josh Allen": 1, "Sam Darnold": 1,
    "Mason Rudolph": 3, "Daniel Jones": 1, "Tua Tagovailoa": 1,
    "Jake Luton": 6, "Mike White": 5, "Davis Mills": 3,
    "Trevor Lawrence": 1, "Kenny Pickett": 1, "Joshua Dobbs": 4,
    "Jeff Garcia": 0, "Jon Kitna": 0, "Erik Kramer": 0, "Ray Lucas": 0,
    "Shane Matthews": 0, "Doug Pederson": 0, "Mike Tomczak": 0,
    "Jay Fiedler": 0, "Jamie Martin": 0, "Todd Bouman": 0,
    "Jake Delhomme": 0, "Doug Johnson": 0, "Tim Hasselbeck": 0,
    "Kurt Kittner": 5, "Matthew McGloin": 0, "Kellen Moore": 0,
    "Cleo Lemon": 0, "Matt Moore": 0, "Tyler Thigpen": 7,
    "Tyler Palko": 0, "Curtis Painter": 6, "Kyle Allen": 0,
    "David Blough": 0, "Devlin Hodges": 0, "Brandon Allen": 0,
    "Taylor Heinicke": 0, "Tyler Huntley": 0, "Phillip Walker": 0,
    "Cooper Rush": 0, "Tyson Bagent": 0, "Aidan O'Connell": 0,
    "Dorian Thompson-Robinson": 0
}

In [33]:
df_bust_two_clean["round"] = df_bust_two_clean["player_name"].map(nfl_qbs_rounds_post_1998).fillna(-1)

In [34]:
df_bust_two_clean.query("round == 1")[["player_name", "sack_rate", "ay_a"]].reset_index()

Unnamed: 0,index,player_name,sack_rate,ay_a
0,38,Joey Harrington,0.018307,4.228438
1,39,Patrick Ramsey,0.072874,5.986784
2,44,Kyle Boller,0.069106,4.441964
3,45,Byron Leftwich,0.043478,5.691388
4,46,Carson Palmer,0.054705,5.664352
5,48,Eli Manning,0.061611,3.847716
6,55,Jason Campbell,0.037209,5.927536
7,57,Vince Young,0.068063,5.207865
8,58,Matt Leinart,0.050378,5.909575
9,68,JaMarcus Russell,0.075377,6.3297


In [35]:
df_fail_two = df_bust_two_clean \
    .copy() \
    .query("round == 1")[["player_id", "player_name", "sack_rate", "ay_a"]] \
    .reset_index()

In [36]:
df_fail_two.head()

Unnamed: 0,index,player_id,player_name,sack_rate,ay_a
0,38,00-0021141,Joey Harrington,0.018307,4.228438
1,39,00-0021379,Patrick Ramsey,0.072874,5.986784
2,44,00-0022164,Kyle Boller,0.069106,4.441964
3,45,00-0022177,Byron Leftwich,0.043478,5.691388
4,46,00-0021429,Carson Palmer,0.054705,5.664352


Potential outliers:
- Carson Palmer 	0.054705 	5.664352
- Eli Manning 	0.061611 	3.847716
- Joe Flacco 	0.071429 	6.334112
- Matthew Stafford 	0.057214 	4.315650
- Andrew Luck 	0.061285 	6.417863
- Ryan Tannehill 	0.067179 	6.092975
- Carson Wentz 	0.051322 	5.719934
- Josh Allen 	0.080460 	5.341693
- Daniel Jones 	0.076000 	6.464052
- Tua Tagovailoa 	0.063898 	6.237931
- Trevor Lawrence 	0.050078 	5.176080

11 players out of 30. Success rate for filtering out busts is still 63% effective.

# Merge in Roster DF

In [37]:
merged_df = pd.merge(df_success, df_rost_clean, on=["season", "player_id"], how="left")

In [38]:
merged_df.dropna(subset="player_name", inplace=True)

## Successful QBs

In [39]:
nfl_qbs_rounds = {
    "Daunte Culpepper": 1,
    "Chad Pennington": 1,
    "Marc Bulger": 6,
    "David Garrard": 4,
    "Tony Romo": 0,  # Representing undrafted with 0
    "Philip Rivers": 1,
    "Aaron Rodgers": 1,
    "Jay Cutler": 1,
    "Matt Schaub": 3,
    "Matt Ryan": 1,
    "Cam Newton": 1,
    "Colin Kaepernick": 2,
    "Russell Wilson": 3,
    "Robert Griffin III": 1,
    "Case Keenum": 0,  # Representing undrafted with 0
    "Jimmy Garoppolo": 2,
    "Jameis Winston": 1,
    "Dak Prescott": 4,
    "Patrick Mahomes": 1,
    "Deshaun Watson": 1,
    "Lamar Jackson": 1,
    "Baker Mayfield": 1,
    "Kyler Murray": 1,
    "Gardner Minshew": 6,
    "Drew Lock": 2,
    "Justin Herbert": 1,
    "Jalen Hurts": 2,
    "Joe Burrow": 1,
    "Mac Jones": 1,
    "Brock Purdy": 7,
    "Desmond Ridder": 3,
    "Jordan Love": 1,
    "C.J. Stroud": 1,
}

In [40]:
merged_df["round"] = merged_df["player_name"].map(nfl_qbs_rounds).fillna(-1)

In [41]:
final_df = merged_df.query("round >= 0")

In [42]:
final_df["completion_percentage"] = final_df["complete_pass"] / final_df["pass_attempts"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df["completion_percentage"] = final_df["complete_pass"] / final_df["pass_attempts"]


In [43]:
final_df["bmi"] = final_df["weight"] / np.sqrt(final_df["height"])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  final_df["bmi"] = final_df["weight"] / np.sqrt(final_df["height"])


In [44]:
round_one = final_df.query("round == 1")[["player_id", "player_name", 'sack', 'passing_yards', 'interception',
       'pass_touchdown', 'complete_pass', 'incomplete_pass', 'drop_backs',
       'pass_attempts', 'sack_rate', 'ay_a', 'height', 'weight', "bmi", "completion_percentage"]]

In [45]:
round_one

Unnamed: 0,player_id,player_name,sack,passing_yards,interception,pass_touchdown,complete_pass,incomplete_pass,drop_backs,pass_attempts,sack_rate,ay_a,height,weight,bmi,completion_percentage
9,00-0003739,Daunte Culpepper,34.0,3937.0,16.0,33.0,297.0,161.0,510.0,474.0,0.066667,8.179325,76.0,266.0,30.512293,0.626582
12,00-0019559,Chad Pennington,22.0,3120.0,6.0,22.0,275.0,118.0,422.0,399.0,0.052133,8.245614,75.0,225.0,25.980762,0.689223
18,00-0022942,Philip Rivers,28.0,3387.0,9.0,22.0,284.0,167.0,489.0,460.0,0.05726,7.43913,77.0,228.0,25.983011,0.617391
19,00-0024226,Jay Cutler,13.0,1001.0,5.0,9.0,81.0,51.0,150.0,137.0,0.086667,6.978102,75.0,233.0,26.904523,0.591241
24,00-0023459,Aaron Rodgers,34.0,4038.0,13.0,28.0,341.0,182.0,572.0,536.0,0.059441,7.48694,74.0,220.0,25.574481,0.636194
26,00-0026143,Matt Ryan,17.0,3440.0,11.0,16.0,265.0,158.0,452.0,434.0,0.037611,7.523041,76.0,220.0,25.235731,0.610599
27,00-0027939,Cam Newton,35.0,4051.0,17.0,21.0,310.0,190.0,555.0,517.0,0.063063,7.168279,77.0,248.0,28.262223,0.599613
35,00-0031503,Jameis Winston,27.0,4042.0,15.0,22.0,312.0,208.0,565.0,535.0,0.047788,7.115888,76.0,231.0,26.497517,0.583178
39,00-0033537,Deshaun Watson,19.0,1699.0,8.0,19.0,126.0,70.0,226.0,204.0,0.084071,8.426471,74.0,215.0,24.993242,0.617647
41,00-0033873,Patrick Mahomes,26.0,5097.0,12.0,50.0,383.0,185.0,607.0,580.0,0.042834,9.581035,75.0,230.0,26.558112,0.660345


In [46]:
df_success = round_one.copy()

# Add In Fantasy Numbers

In [47]:
def add_fantasy(df):
    # Initialize new columns in the df DataFrame with NaN values
    df['min_rank'] = np.nan
    df['avg_rank'] = np.nan
    df['min_tier'] = np.nan
    df['avg_tier'] = np.nan
    
    # Iterate through each row of the df DataFrame
    for i, row in df.iterrows():
        p_id = row["player_id"]

        # Query player information from df_fantasy based on player_id
        player = df_fantasy.query(f"player_id == '{p_id}'")

        # Calculate minimum and average rank and tier for the player
        min_rank = player["rank"].min()
        avg_rank = player["rank"].mean()
        min_tier = player["tier"].min()
        avg_tier = player["tier"].mean()

        # Assign calculated values back to df at the current index
        df.loc[i, 'min_rank'] = min_rank
        df.loc[i, 'avg_rank'] = avg_rank
        df.loc[i, 'min_tier'] = min_tier
        df.loc[i, 'avg_tier'] = avg_tier
        
    # Return the modified DataFrame
    return df

# Full Picture Success

In [66]:
df_success_fantasy = add_fantasy(df_success.copy())
df_success_fantasy.reset_index()

Unnamed: 0,index,player_id,player_name,sack,passing_yards,interception,pass_touchdown,complete_pass,incomplete_pass,drop_backs,...,sack_rate,ay_a,height,weight,bmi,completion_percentage,min_rank,avg_rank,min_tier,avg_tier
0,9,00-0003739,Daunte Culpepper,34.0,3937.0,16.0,33.0,297.0,161.0,510.0,...,0.066667,8.179325,76.0,266.0,30.512293,0.626582,1.0,24.454545,1.0,2.727273
1,12,00-0019559,Chad Pennington,22.0,3120.0,6.0,22.0,275.0,118.0,422.0,...,0.052133,8.245614,75.0,225.0,25.980762,0.689223,9.0,35.636364,1.0,3.454545
2,18,00-0022942,Philip Rivers,28.0,3387.0,9.0,22.0,284.0,167.0,489.0,...,0.05726,7.43913,77.0,228.0,25.983011,0.617391,3.0,17.764706,1.0,1.882353
3,19,00-0024226,Jay Cutler,13.0,1001.0,5.0,9.0,81.0,51.0,150.0,...,0.086667,6.978102,75.0,233.0,26.904523,0.591241,4.0,20.833333,1.0,2.166667
4,24,00-0023459,Aaron Rodgers,34.0,4038.0,13.0,28.0,341.0,182.0,572.0,...,0.059441,7.48694,74.0,220.0,25.574481,0.636194,1.0,20.263158,1.0,2.315789
5,26,00-0026143,Matt Ryan,17.0,3440.0,11.0,16.0,265.0,158.0,452.0,...,0.037611,7.523041,76.0,220.0,25.235731,0.610599,2.0,12.4,1.0,1.533333
6,27,00-0027939,Cam Newton,35.0,4051.0,17.0,21.0,310.0,190.0,555.0,...,0.063063,7.168279,77.0,248.0,28.262223,0.599613,1.0,14.636364,1.0,1.909091
7,35,00-0031503,Jameis Winston,27.0,4042.0,15.0,22.0,312.0,208.0,565.0,...,0.047788,7.115888,76.0,231.0,26.497517,0.583178,5.0,31.555556,1.0,3.111111
8,39,00-0033537,Deshaun Watson,19.0,1699.0,8.0,19.0,126.0,70.0,226.0,...,0.084071,8.426471,74.0,215.0,24.993242,0.617647,4.0,18.666667,1.0,2.166667
9,41,00-0033873,Patrick Mahomes,26.0,5097.0,12.0,50.0,383.0,185.0,607.0,...,0.042834,9.581035,75.0,230.0,26.558112,0.660345,1.0,10.857143,1.0,1.571429


In [65]:
df_success_fantasy.query("min_rank <= 12")[["player_name", "min_rank", 
                                                                "min_tier", "avg_rank", "avg_tier"]] \
    .reset_index()

Unnamed: 0,index,player_name,min_rank,min_tier,avg_rank,avg_tier
0,9,Daunte Culpepper,1.0,1.0,24.454545,2.727273
1,12,Chad Pennington,9.0,1.0,35.636364,3.454545
2,18,Philip Rivers,3.0,1.0,17.764706,1.882353
3,19,Jay Cutler,4.0,1.0,20.833333,2.166667
4,24,Aaron Rodgers,1.0,1.0,20.263158,2.315789
5,26,Matt Ryan,2.0,1.0,12.4,1.533333
6,27,Cam Newton,1.0,1.0,14.636364,1.909091
7,35,Jameis Winston,5.0,1.0,31.555556,3.111111
8,39,Deshaun Watson,4.0,1.0,18.666667,2.166667
9,41,Patrick Mahomes,1.0,1.0,10.857143,1.571429


# Bust One

In [49]:
fail_one_fantasy = add_fantasy(df_fail_one.copy())
fail_one_fantasy

Unnamed: 0,index,player_id,player_name,sack_rate,ay_a,min_rank,avg_rank,min_tier,avg_tier
0,6,00-0003535,Tim Couch,0.123077,5.445844,18.0,26.0,2.0,2.4
1,12,00-0011022,Donovan McNabb,0.114286,3.671296,3.0,14.384615,1.0,1.615385
2,14,00-0015082,Akili Smith,0.110465,3.782895,39.0,52.75,4.0,5.0
3,21,00-0020245,Mike Vick,0.156716,6.106194,1.0,24.0,1.0,2.461538
4,24,00-0020608,David Carr,0.145594,4.722973,14.0,39.0,2.0,3.7
5,29,00-0022924,Ben Roethlisberger,0.092025,8.359322,3.0,15.944444,1.0,1.833333
6,31,00-0022912,J.P. Losman,0.098425,4.978166,15.0,48.857143,2.0,4.571429
7,32,00-0023436,Alex Smith,0.149485,2.424242,4.0,25.071429,1.0,2.642857
8,43,00-0027876,Tim Tebow,0.108197,6.269372,18.0,32.0,2.0,3.333333
9,44,00-0027948,Blaine Gabbert,0.0883,4.743341,28.0,48.083333,3.0,4.416667


In [58]:
df_success_fail_one = fail_one_fantasy.query("min_rank <= 12")[["player_name", "min_rank", 
                                                                "min_tier", "avg_rank", "avg_tier"]] \
    .reset_index()
df_success_fail_one

Unnamed: 0,index,player_name,min_rank,min_tier,avg_rank,avg_tier
0,1,Donovan McNabb,3.0,1.0,14.384615,1.615385
1,3,Mike Vick,1.0,1.0,24.0,2.461538
2,5,Ben Roethlisberger,3.0,1.0,15.944444,1.833333
3,7,Alex Smith,4.0,1.0,25.071429,2.642857
4,12,Blake Bortles,4.0,1.0,24.166667,2.5
5,14,Jared Goff,7.0,1.0,16.125,1.75
6,17,Justin Fields,6.0,1.0,18.333333,2.0


### Success Rate from First Group Filtered

In [115]:
len(df_success_fail_one) / len(fail_one_fantasy)

0.35

### Failure Rate from First Group Filtered

In [112]:
(len(fail_one_fantasy) - len(df_success_fail_one)) / len(fail_one_fantasy)

0.65

# Bust Two

In [51]:
df_fail_two_fantasy = add_fantasy(df_fail_two.copy())
df_fail_two_fantasy

Unnamed: 0,index,player_id,player_name,sack_rate,ay_a,min_rank,avg_rank,min_tier,avg_tier
0,38,00-0021141,Joey Harrington,0.018307,4.228438,16.0,23.166667,2.0,2.5
1,39,00-0021379,Patrick Ramsey,0.072874,5.986784,22.0,49.428571,2.0,4.428571
2,44,00-0022164,Kyle Boller,0.069106,4.441964,25.0,42.5,3.0,4.125
3,45,00-0022177,Byron Leftwich,0.043478,5.691388,15.0,39.222222,2.0,3.666667
4,46,00-0021429,Carson Palmer,0.054705,5.664352,1.0,18.214286,1.0,2.071429
5,48,00-0022803,Eli Manning,0.061611,3.847716,5.0,16.6875,1.0,1.75
6,55,00-0023460,Jason Campbell,0.037209,5.927536,13.0,30.666667,2.0,3.0
7,57,00-0024218,Vince Young,0.068063,5.207865,9.0,29.666667,1.0,2.833333
8,58,00-0024225,Matt Leinart,0.050378,5.909575,23.0,49.5,2.0,4.5
9,68,00-0025388,JaMarcus Russell,0.075377,6.3297,26.0,44.0,3.0,4.333333


In [71]:
df_success_fail_two = df_fail_two_fantasy.query("min_rank <= 12")[["player_name", "min_rank", 
                                                                   "min_tier", "avg_rank", "avg_tier"]] \
    .reset_index()
df_success_fail_two

Unnamed: 0,index,player_name,min_rank,min_tier,avg_rank,avg_tier
0,4,Carson Palmer,1.0,1.0,18.214286,2.071429
1,5,Eli Manning,5.0,1.0,16.6875,1.75
2,7,Vince Young,9.0,1.0,29.666667,2.833333
3,10,Joe Flacco,10.0,1.0,25.8125,2.625
4,12,Matthew Stafford,5.0,1.0,16.2,1.866667
5,13,Mark Sanchez,10.0,1.0,36.625,3.625
6,14,Josh Freeman,7.0,1.0,31.142857,3.142857
7,17,Andrew Luck,2.0,1.0,8.833333,1.333333
8,19,Ryan Tannehill,7.0,1.0,20.545455,2.181818
9,22,Carson Wentz,5.0,1.0,22.5,2.25


### Success Rate from Second Group Filtered

In [129]:
(len(df_success_fail_two)) / len(df_fail_two_fantasy)

0.4666666666666667

### Failure Rate from First Group Filtered

In [64]:
(len(df_fail_two_fantasy) - len(df_success_fail_two)) / len(df_fail_two_fantasy)

0.5333333333333333

# Further Exploration

In [72]:
df_success_fantasy.columns

Index(['player_id', 'player_name', 'sack', 'passing_yards', 'interception',
       'pass_touchdown', 'complete_pass', 'incomplete_pass', 'drop_backs',
       'pass_attempts', 'sack_rate', 'ay_a', 'height', 'weight', 'bmi',
       'completion_percentage', 'min_rank', 'avg_rank', 'min_tier',
       'avg_tier'],
      dtype='object')

In [83]:
df_success_fantasy.player_name.unique()

array(['Daunte Culpepper', 'Chad Pennington', 'Philip Rivers',
       'Jay Cutler', 'Aaron Rodgers', 'Matt Ryan', 'Cam Newton',
       'Jameis Winston', 'Deshaun Watson', 'Patrick Mahomes',
       'Lamar Jackson', 'Baker Mayfield', 'Kyler Murray',
       'Justin Herbert', 'Joe Burrow', 'Mac Jones', 'Jordan Love',
       'C.J. Stroud'], dtype=object)

In [74]:
import plotly.express as px

In [76]:
df_success_fantasy["color"] = np.where(df_success_fantasy["player_name"] == "Mac Jones", "red", "blue")

In [84]:
fig = px.bar(df_success_fantasy, x='player_name', y='sack', color="color")
# fig.show()

In [85]:
for col in df_success_fantasy.columns[3:]:
    fig = px.bar(df_success_fantasy, x='player_name', y=col, color="color")
#     fig.show()

# For HTML Conversion

In [95]:
def html_ready_df(df):
    df["Sack Rate Percentage"] = df["sack_rate"] * 100
    df["Adjusted Air Yards"] = df["ay_a"]
    df["Player Name"] = df["player_name"]
    df["Best Finish"] = df["min_rank"]    
    
    return df[["Player Name", "Sack Rate Percentage", "Adjusted Air Yards", "Best Finish"]]

In [96]:
success_html = html_ready_df(df_success_fantasy.copy())

In [99]:
def highlight_mac_jones(row):
    # Check if the player_name in the row is 'Mac Jones'
    if row['Player Name'] == 'Mac Jones':
        # Return a list of CSS properties 'background-color: yellow' for each cell in the row
        return ['background-color: yellow']*len(row)
    else:
        # Return a list of empty strings (no styling) for each cell in the row
        return ['']*len(row)

# Assuming df is your DataFrame
# Apply the highlighting function row-wise (axis=1)
styled_df = success_html.style.apply(highlight_mac_jones, axis=1)

# Display the styled DataFrame
styled_df

Unnamed: 0,Player Name,Sack Rate Percentage,Adjusted Air Yards,Best Finish
9,Daunte Culpepper,6.666667,8.179325,1.0
12,Chad Pennington,5.21327,8.245614,9.0
18,Philip Rivers,5.725971,7.43913,3.0
19,Jay Cutler,8.666667,6.978102,4.0
24,Aaron Rodgers,5.944056,7.48694,1.0
26,Matt Ryan,3.761062,7.523041,2.0
27,Cam Newton,6.306306,7.168279,1.0
35,Jameis Winston,4.778761,7.115888,5.0
39,Deshaun Watson,8.40708,8.426471,4.0
41,Patrick Mahomes,4.283361,9.581035,1.0


In [108]:
save_path = f"../../../interactive-2.0/QB/draft-capital"

In [109]:
with open(f'{save_path}/passing-qbs.html', 'w') as file:
    file.write(styled_df.to_html())

# Prepping the Fail

### First Threshold Fail

In [123]:
def highlight_qb_one_seasons(row):
    # Check if the player_name in the row is 'Mac Jones'
    if row['Best Finish'] <= 12:
        # Return a list of CSS properties 'background-color: yellow' for each cell in the row
        return ['background-color: yellow']*len(row)
    else:
        # Return a list of empty strings (no styling) for each cell in the row
        return ['']*len(row)

In [120]:
fail_one_html = html_ready_df(fail_one_fantasy)

In [122]:
styled_df_fail_one = fail_one_html.style.apply(highlight_qb_one_seasons, axis=1)
styled_df_fail_one

Unnamed: 0,Player Name,Sack Rate Percentage,Adjusted Air Yards,Best Finish
0,Tim Couch,12.307693,5.445844,18.0
1,Donovan McNabb,11.428572,3.671296,3.0
2,Akili Smith,11.046512,3.782895,39.0
3,Mike Vick,15.671642,6.106194,1.0
4,David Carr,14.559386,4.722973,14.0
5,Ben Roethlisberger,9.202455,8.359322,3.0
6,J.P. Losman,9.84252,4.978166,15.0
7,Alex Smith,14.948453,2.424242,4.0
8,Tim Tebow,10.819672,6.269372,18.0
9,Blaine Gabbert,8.830022,4.743341,28.0


In [127]:
with open(f'{save_path}/fail-one-qbs.html', 'w') as file:
    file.write(styled_df_fail_one.to_html())

### Second Threshold Fail

In [124]:
fail_two_html = html_ready_df(df_fail_two_fantasy)

In [126]:
styled_df_fail_two = fail_two_html.style.apply(highlight_qb_one_seasons, axis=1)
styled_df_fail_two

Unnamed: 0,Player Name,Sack Rate Percentage,Adjusted Air Yards,Best Finish
0,Joey Harrington,1.830664,4.228438,16.0
1,Patrick Ramsey,7.287449,5.986784,22.0
2,Kyle Boller,6.910569,4.441964,25.0
3,Byron Leftwich,4.347826,5.691388,15.0
4,Carson Palmer,5.470459,5.664352,1.0
5,Eli Manning,6.161137,3.847716,5.0
6,Jason Campbell,3.72093,5.927536,13.0
7,Vince Young,6.806283,5.207865,9.0
8,Matt Leinart,5.037784,5.909575,23.0
9,JaMarcus Russell,7.537688,6.3297,26.0


In [128]:
with open(f'{save_path}/fail-two-qbs.html', 'w') as file:
    file.write(styled_df_fail_two.to_html())