# Imports

In [1]:
import numpy as np
import pandas as pd
from utils.scatter_plot import create_scatter_plot, prep_df_for_scatter_plot

# Grabbing Data

In [2]:
df_all = pd.read_pickle("../../FantasyData/data-frames/df_all_2005_2023.pkl")

In [3]:
SEASON = 2023
POSITION = 'WR'

In [4]:
df_review = df_all.query(f"season == {SEASON} and position == '{POSITION}' and games >= 5")

### Verifying Data

In [5]:
assert(df_review["position"].unique()[0] == POSITION)
assert(df_review["season"].unique()[0] == SEASON)
assert(df_review["games"].max() == 17)

In [7]:
df_review[["targets", "fantasy_points_ppr"]].corr()

Unnamed: 0,targets,fantasy_points_ppr
targets,1.0,0.962021
fantasy_points_ppr,0.962021,1.0


In [14]:
df_review \
    .sort_values(by="fantasy_points_ppr", ascending=False)[["player_name", 
                                                            "fantasy_points_ppr", "targets"]]\
    .reset_index() \
    .iloc[:36]

Unnamed: 0,index,player_name,fantasy_points_ppr,targets
0,11767,CeeDee Lamb,405.2,181
1,9688,Tyreek Hill,376.4,171
2,12039,Amon-Ra St. Brown,330.9,164
3,12353,Puka Nacua,298.5,160
4,11418,A.J. Brown,289.6,158
5,10909,D.J. Moore,286.5,136
6,8591,Mike Evans,282.5,136
7,7674,Keenan Allen,278.86,150
8,8730,Stefon Diggs,273.8,160
9,8539,Davante Adams,265.4,175


In [10]:
df_review.query("targets >= 100")[["player_name", "fantasy_points", "targets"]]

Unnamed: 0,player_name,fantasy_points,targets
7481,Adam Thielen,128.0,137
7674,Keenan Allen,170.86,150
7978,DeAndre Hopkins,148.6,137
8539,Davante Adams,162.4,175
8591,Mike Evans,203.5,136
8663,Amari Cooper,155.0,128
8730,Stefon Diggs,166.8,160
9197,Tyler Lockett,123.4,122
9688,Tyreek Hill,257.4,171
10324,Chris Godwin,126.2,130


# Line Graph

Reviewing column options

In [15]:
df_review.query("player_name == 'Tyreek Hill'")[["tgt_sh", "target_share", "wopr_x", "wopr_y", 'dom', 'w8dom']]

Unnamed: 0,tgt_sh,target_share,wopr_x,wopr_y,dom,w8dom
9688,0.316667,5.282512,12.851239,0.825308,0.425368,0.411624


In [7]:
df_review.columns

Index(['player_id', 'season', 'season_type', 'completions', 'attempts',
       'passing_yards', 'passing_tds', 'interceptions', 'sacks', 'sack_yards',
       'sack_fumbles', 'sack_fumbles_lost', 'passing_air_yards',
       'passing_yards_after_catch', 'passing_first_downs', 'passing_epa',
       'passing_2pt_conversions', 'pacr', 'dakota', 'carries', 'rushing_yards',
       'rushing_tds', 'rushing_fumbles', 'rushing_fumbles_lost',
       'rushing_first_downs', 'rushing_epa', 'rushing_2pt_conversions',
       'receptions', 'targets', 'receiving_yards', 'receiving_tds',
       'receiving_fumbles', 'receiving_fumbles_lost', 'receiving_air_yards',
       'receiving_yards_after_catch', 'receiving_first_downs', 'receiving_epa',
       'receiving_2pt_conversions', 'racr', 'target_share', 'air_yards_share',
       'wopr_x', 'special_teams_tds', 'fantasy_points', 'fantasy_points_ppr',
       'games', 'tgt_sh', 'ay_sh', 'yac_sh', 'wopr_y', 'ry_sh', 'rtd_sh',
       'rfd_sh', 'rtdfd_sh', 'dom', '

In [None]:
rushing_stats = [
    'receiving_air_yards',
    'receiving_epa',
    'racr', 'target_share', 'air_yards_share',
    'wopr_x', 
    'tgt_sh', 'ay_sh', 'yac_sh', 'wopr_y', 'ry_sh',
    'dom', 'w8dom', 'yptmpa', 'ppr_sh',
]

In [54]:
df_review["receiving_air_yards_conversion"] = df_review["receiving_yards"] / df_review["receiving_air_yards"] 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Selecting column to graph the players by

In [55]:
col_to_graph = "receiving_air_yards_conversion"

Selecting appendix and division column. For example, if we wanted `x per game`, we would set the `appendix` to "per_game" and the `div_col` to "games". Alternatively, if we wanted `x per carry`. we would set the `appendix` to "per_carry" and the `div_col` to "carries".

In [56]:
appendix = "per_game"
div_col = "games"

In [57]:
df_sorted = df_review.sort_values(by=col_to_graph, ascending=True).copy()
df_sorted_per_game = df_sorted.copy()

### Per Game Version of Stat

In [58]:
df_sorted_per_game[f"{col_to_graph}_{appendix}"] = df_sorted[col_to_graph] / df_sorted[div_col]
df_sorted_per_game.sort_values(by=f"{col_to_graph}_{appendix}", ascending=True, inplace=True)

### Coloring Player Tiers

In [59]:
# season total version
df_sorted = prep_df_for_scatter_plot(df_sorted)

# per game version
df_sorted_per_game = prep_df_for_scatter_plot(df_sorted_per_game)

# Scatter Plot

In [60]:
save_path = f"../../interactive-2.0/{POSITION}/2023-review/scatter_{col_to_graph}.html"
# save_path = None

print(save_path)

../../interactive-2.0/WR/2023-review/scatter_receiving_air_yards_conversion.html


In [61]:
create_scatter_plot(df_sorted, stat=col_to_graph, custom_appendix=appendix, save_path=save_path,
                    start_index=-48, three_tiers=True)

In [62]:
create_scatter_plot(df_sorted_per_game, stat=col_to_graph, custom_appendix=appendix, save_path=save_path,
                    start_index=-48, three_tiers=True)