In [1]:
import sys
sys.path.append('../')
import pandas as pd
import numpy as np
import joblib
import ast
from tqdm import tqdm
tqdm.pandas()

In [2]:
filepath='../data/all_blocks_0926.csv'
models = joblib.load('../data/models/all_models.jblb')
data = joblib.load('../data/processed/data_1003.jblb')['df']


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
models['fv_model']['features']

['thrower_x',
 'thrower_y',
 'possession_num',
 'possession_throw',
 'game_quarter',
 'quarter_point',
 'score_diff',
 'times']

In [4]:
fv_before_throw_df = models['fv_model']['scaler'].transform(data[models['fv_model']['features']])
fv_before_throw = models['fv_model']['model'].predict_proba(fv_before_throw_df)[:,1]
receiver_features = [x.replace('thrower', 'receiver') for x in models['fv_model']['features']]
fv_after_throw_df = data[receiver_features]
fv_after_throw_df = fv_after_throw_df.rename(columns={'receiver_x': 'thrower_x', 'receiver_y': 'thrower_y'})



fv_opponent_after_throw_df = fv_after_throw_df.copy()
fv_opponent_after_throw_df.loc[:, 'thrower_x'] = -fv_opponent_after_throw_df.loc[:, 'thrower_x']
fv_opponent_after_throw_df.loc[:, 'thrower_y'] = (120 - fv_opponent_after_throw_df.loc[:, 'thrower_y']).clip(lower=20, upper=100)
fv_opponent_after_throw_df.loc[:, 'possession_num'] += 1
fv_opponent_after_throw_df.loc[:, 'possession_throw'] = 1
fv_opponent_after_throw_df.loc[:, 'score_diff'] = -fv_opponent_after_throw_df.loc[:, 'score_diff']

fv_after_throw_df = models['fv_model']['scaler'].transform(fv_after_throw_df)
fv_after_throw = models['fv_model']['model'].predict_proba(fv_after_throw_df)[:,1]

fv_opponent_after_throw_df = models['fv_model']['scaler'].transform(fv_opponent_after_throw_df)
fv_opponent_after_throw = models['fv_model']['model'].predict_proba(fv_opponent_after_throw_df)[:,1]

data['ec'] = np.where(data['turnover'] == 1, 
                      fv_opponent_after_throw - fv_before_throw,  # Use opponent FV if turnover
                      fv_after_throw - fv_before_throw)  # Otherwise use regular FV

In [6]:
data['current_line'] = data['current_line'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
df_exploded = data.explode('current_line')


results = []
def get_unique_count(player):
    unique_combos = df_exploded[df_exploded['current_line'] == player].drop_duplicates(
        subset=['gameID', 'quarter_point', 'home_team_score', 'away_team_score', 'possession_num']
    )
    return len(unique_combos)
unique_players = df_exploded['thrower'].unique()
for player in tqdm(unique_players):
    count = get_unique_count(player)
    results.append({'thrower': player, 'unique_count': count})
final_df = pd.DataFrame(results)

100%|██████████| 1560/1560 [02:01<00:00, 12.80it/s]


In [42]:
thrower_counts = data['thrower'].value_counts()
valid_throwers = thrower_counts[thrower_counts > 200].index
filtered_data = data[data['thrower'].isin(valid_throwers)]
result = filtered_data.groupby(['thrower', 'year']).agg({'ec': 'sum'}).sort_values('ec')
result

Unnamed: 0_level_0,Unnamed: 1_level_0,ec
thrower,year,Unnamed: 2_level_1
ocable,2024,-1.250191
bgfroerer,2024,-1.067261
gmartin,2022,-1.005335
cgall,2022,-0.837158
ajohnson,2022,-0.740304
...,...,...
pjanas,2023,16.978798
jcubitt,2022,17.167433
pjanas,2021,17.547783
emagsig,2024,20.535458


In [43]:
foo = pd.merge(result.reset_index(), final_df, how='left', on='thrower')
foo['ec_per_possession'] = foo.ec / foo.unique_count
foo[foo.year == 2024].sort_values('ec')

Unnamed: 0,thrower,year,ec,unique_count,ec_per_possession
0,ocable,2024,-1.250191,445,-0.002809
1,bgfroerer,2024,-1.067261,286,-0.003732
7,adavis4,2024,-0.524924,579,-0.000907
12,mdehlin,2024,-0.369724,165,-0.002241
13,jkelly,2024,-0.354355,535,-0.000662
...,...,...,...,...,...
1251,dbarram,2024,14.081258,298,0.047253
1254,pjanas,2024,14.478800,1261,0.011482
1257,jfelton,2024,14.521395,292,0.049731
1263,jfloyd,2024,16.209694,379,0.042770


In [40]:
foo.ec.describe()

count    1270.000000
mean        0.020337
std         0.013814
min        -0.079580
25%         0.013878
50%         0.020146
75%         0.026648
max         0.218817
Name: ec, dtype: float64

In [41]:
foo[foo.thrower == 'jkerr']

Unnamed: 0,thrower,year,ec,unique_count,ec_per_possession
578,jkerr,2023,0.019107,1200,1.6e-05
899,jkerr,2021,0.025064,1200,2.1e-05
932,jkerr,2022,0.026045,1200,2.2e-05
1039,jkerr,2024,0.029087,1200,2.4e-05
