In [6]:
import pandas as pd
import pymc as pm
import arviz as az

In [7]:
data = pd.read_csv('data_2024Grades.csv')

In [47]:
grades[['decScore', '95th_pBat_speed', 'smash_factor', 'conScore']].mean()

decScore           -0.451991
95th_pBat_speed    79.841703
smash_factor        0.398202
conScore           -0.162849
dtype: float64

In [None]:
    league_priors = {
        'decScore': {'mean': -0.45, 'std': 0.05},
        '95th_pBat_speed': {'mean': 79.0, 'std': 1.25},
        'smash_factor': {'mean': .40, 'std': 0.09},
        'conScore': {'mean': -0.15, 'std': 0.075}
    }


In [4]:
def calculate_bayesian_grades(data: pd.DataFrame, league_priors: dict) -> pd.DataFrame:
    """
    Calculate Bayesian estimates for player grades using league-wide priors
    """
    bayesian_grades = pd.DataFrame()
    
    for metric in ['decScore', '95th_pBat_speed', 'smash_factor', 'conScore']:
        # Prepare data by grouping by batter and calculating mean
        player_data = data.groupby('batter')[metric].mean().dropna()
        players = player_data.index.values
        values = player_data.values
        
        # Get league-wide prior parameters
        prior_mean = league_priors[metric]['mean']
        prior_std = league_priors[metric]['std']
        
        with pm.Model() as model:
            # Hierarchical model setup
            mu = pm.Normal('mu', mu=prior_mean, sigma=prior_std)
            sigma = pm.HalfNormal('sigma', sigma=prior_std)
            
            # Player-specific effects
            player_effects = pm.Normal('player_effects', 
                                     mu=mu, 
                                     sigma=sigma, 
                                     shape=len(players))
            
            # Likelihood
            y = pm.Normal('y', mu=player_effects, 
                         sigma=sigma, 
                         observed=values)
            
            # Inference
            trace = pm.sample(2000, tune=1000, return_inferencedata=True)

            
        # Extract posterior means and credible intervals
        summary = az.summary(trace, var_names=['player_effects'])
        
        # Create temporary DataFrame with player IDs
        temp_df = pd.DataFrame({
            f'{metric}_bayes': summary['mean'].values,
            f'{metric}_ci_lower': summary['hdi_3%'].values,
            f'{metric}_ci_upper': summary['hdi_97%'].values
        }, index=players)
        
        if bayesian_grades.empty:
            bayesian_grades = temp_df
        else:
            bayesian_grades = bayesian_grades.join(temp_df)
    
    return bayesian_grades

In [9]:
baes = calculate_bayesian_grades(data, league_priors)

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu, sigma, player_effects]


Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 3 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu, sigma, player_effects]


Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 3 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu, sigma, player_effects]


Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 3 seconds.
Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [mu, sigma, player_effects]


Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 3 seconds.


In [10]:
baes

Unnamed: 0,decScore_bayes,decScore_ci_lower,decScore_ci_upper,95th_pBat_speed_bayes,95th_pBat_speed_ci_lower,95th_pBat_speed_ci_upper,smash_factor_bayes,smash_factor_ci_lower,smash_factor_ci_upper,conScore_bayes,conScore_ci_lower,conScore_ci_upper
444482,-0.464,-0.515,-0.413,79.992,78.820,81.113,0.397,0.314,0.480,-0.148,-0.218,-0.074
453568,-0.454,-0.504,-0.403,79.865,78.650,81.036,0.413,0.326,0.495,-0.127,-0.200,-0.056
455117,-0.477,-0.527,-0.427,80.017,78.943,81.198,0.373,0.290,0.456,-0.223,-0.293,-0.151
456781,-0.484,-0.536,-0.436,79.899,78.792,81.096,0.403,0.322,0.490,-0.149,-0.222,-0.076
457705,-0.451,-0.501,-0.401,79.940,78.735,81.055,0.394,0.309,0.479,-0.193,-0.265,-0.123
...,...,...,...,...,...,...,...,...,...,...,...,...
702616,-0.431,-0.483,-0.382,79.868,78.653,81.016,0.381,0.296,0.458,-0.190,-0.258,-0.117
805373,-0.489,-0.540,-0.439,79.565,78.399,80.718,0.393,0.310,0.476,-0.125,-0.196,-0.053
805779,-0.449,-0.499,-0.400,79.791,78.601,80.966,0.468,0.386,0.557,-0.099,-0.168,-0.025
807799,-0.432,-0.480,-0.380,79.900,78.679,81.101,0.455,0.367,0.536,-0.106,-0.179,-0.036


In [40]:
grades = pd.read_csv('pre_grades.csv')
baes = pd.read_csv('pre_baes.csv')

In [None]:
grades = grades.set_index('batter')

Unnamed: 0,batter,decScore,pBat_speed,smash_factor,95th_pBat_speed,std_pBat_speed,conScore,xRV,std_EV,EV95,...,sprint_speed,decGrade,powGrade,altPowGrade,conGrade,SFGrade,speedGrade,95thPowGrade,EV95Grade,stdEVGrade
0,669288,0.062361,80.846150,1.115559,80.846150,,0.000000,-0.102429,,100.300,...,4.47377,219.814634,226.007556,,75.425715,181.360394,50.402215,75.301574,32.709284,
1,675915,0.031181,79.325948,1.048585,79.325948,,0.000000,-0.064484,,87.300,...,4.47377,209.517394,195.777354,,75.425715,168.853673,50.402215,25.088591,-16.596401,
2,622491,0.000000,,,,,,-0.062361,,,...,4.47377,199.220155,,,,,50.402215,,,
3,622110,-0.204596,66.984248,0.547307,74.240873,,-0.153846,0.039722,,104.620,...,4.47377,131.653259,-49.645374,,48.408136,75.244671,50.402215,-142.873837,49.093942,
4,672701,-0.317743,69.616146,0.500329,77.922685,,0.000000,0.002576,,98.875,...,4.47377,94.286938,2.691631,,75.425715,66.471956,50.402215,-21.261855,27.304622,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
646,676551,-0.610139,75.177969,0.069997,80.275490,,-0.375000,-0.031093,,104.110,...,4.47377,-2.275473,113.292090,,9.570365,-13.888738,50.402215,56.452393,47.159642,
647,595284,-0.613108,70.487729,0.286147,79.349336,,-0.600000,-0.044538,,100.860,...,4.47377,-3.256162,20.023614,,-29.942846,26.475353,50.402215,25.861099,34.833221,
648,660758,-0.613307,75.488236,0.399161,80.518722,0.314215,0.000000,0.002255,0.141421,99.300,...,4.47377,-3.321824,119.461951,13.420085,75.425715,47.579646,50.402215,64.486470,28.916539,-16.259692
649,679881,-0.633091,71.243932,0.262368,78.930696,0.694580,-0.230769,-0.043400,2.899138,97.730,...,4.47377,-9.855436,35.061201,67.142527,34.899346,22.034910,50.402215,12.033216,22.961929,68.088630


In [41]:
grades.join(baes, how='left')

Unnamed: 0,batter,decScore,pBat_speed,smash_factor,95th_pBat_speed,std_pBat_speed,conScore,xRV,std_EV,EV95,...,decScore_ci_upper,95th_pBat_speed_bayes,95th_pBat_speed_ci_lower,95th_pBat_speed_ci_upper,smash_factor_bayes,smash_factor_ci_lower,smash_factor_ci_upper,conScore_bayes,conScore_ci_lower,conScore_ci_upper
0,669288,0.062361,80.846150,1.115559,80.846150,,0.000000,-0.102429,,100.300,...,-0.417,79.999,78.806,81.157,0.398,0.315,0.481,-0.148,-0.218,-0.072
1,675915,0.031181,79.325948,1.048585,79.325948,,0.000000,-0.064484,,87.300,...,-0.406,79.855,78.682,81.009,0.413,0.335,0.498,-0.127,-0.201,-0.055
2,622491,0.000000,,,,,,-0.062361,,,...,-0.426,80.033,78.879,81.195,0.373,0.289,0.452,-0.224,-0.298,-0.152
3,622110,-0.204596,66.984248,0.547307,74.240873,,-0.153846,0.039722,,104.620,...,-0.435,79.911,78.734,81.063,0.403,0.318,0.483,-0.149,-0.219,-0.075
4,672701,-0.317743,69.616146,0.500329,77.922685,,0.000000,0.002576,,98.875,...,-0.401,79.955,78.889,81.176,0.395,0.315,0.478,-0.193,-0.266,-0.120
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
646,676551,-0.610139,75.177969,0.069997,80.275490,,-0.375000,-0.031093,,104.110,...,-0.378,79.864,78.674,81.066,0.380,0.296,0.465,-0.190,-0.261,-0.115
647,595284,-0.613108,70.487729,0.286147,79.349336,,-0.600000,-0.044538,,100.860,...,-0.441,79.555,78.398,80.761,0.393,0.307,0.474,-0.125,-0.194,-0.053
648,660758,-0.613307,75.488236,0.399161,80.518722,0.314215,0.000000,0.002255,0.141421,99.300,...,-0.398,79.782,78.506,80.910,0.467,0.385,0.553,-0.099,-0.166,-0.027
649,679881,-0.633091,71.243932,0.262368,78.930696,0.694580,-0.230769,-0.043400,2.899138,97.730,...,-0.381,79.900,78.721,81.028,0.455,0.374,0.537,-0.106,-0.179,-0.033


In [34]:
grades

Unnamed: 0_level_0,Name,decScore,pBat_speed,smash_factor,95th_pBat_speed,std_pBat_speed,conScore,xRV,std_EV,EV95,...,SFGrade,speedGrade,95thPowGrade,EV95Grade,stdEVGrade,decGrade_bayes,95thPowGrade_bayes,SFGrade_bayes,conGrade_bayes,IDfg
batter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
444482,Daniel Johnson,0.062361,80.846150,1.115559,80.846150,,0.000000,-0.102429,,100.300,...,181.360394,50.402215,75.301574,32.709284,,,,,,19643
444482,David Bañuelos,0.031181,79.325948,1.048585,79.325948,,0.000000,-0.064484,,87.300,...,168.853673,50.402215,25.088591,-16.596401,,,,,,-1
444482,Luis Castillo,0.000000,,,,,,-0.062361,,,...,,50.402215,,,,,,,,15689
444482,Matt Duffy,-0.204596,66.984248,0.547307,74.240873,,-0.153846,0.039722,,104.620,...,75.244671,50.402215,-142.873837,49.093942,,,,,,13836
444482,José Devers,-0.317743,69.616146,0.500329,77.922685,,0.000000,0.002576,,98.875,...,66.471956,50.402215,-21.261855,27.304622,,,,,,20540
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
444482,Brewer Hicklen,-0.610139,75.177969,0.069997,80.275490,,-0.375000,-0.031093,,104.110,...,-13.888738,50.402215,56.452393,47.159642,,,,,,20450
444482,Andrew Knapp,-0.613108,70.487729,0.286147,79.349336,,-0.600000,-0.044538,,100.860,...,26.475353,50.402215,25.861099,34.833221,,,,,,14942
444482,Sandro Fabian,-0.613307,75.488236,0.399161,80.518722,0.314215,0.000000,0.002255,0.141421,99.300,...,47.579646,50.402215,64.486470,28.916539,-16.259692,,,,,-1
444482,J. P. Martínez,-0.633091,71.243932,0.262368,78.930696,0.694580,-0.230769,-0.043400,2.899138,97.730,...,22.034910,50.402215,12.033216,22.961929,68.088630,,,,,24451


In [31]:
grades = grades.drop(columns=['decScore_ci_lower', 'decScore_ci_upper', '95th_pBat_speed_ci_lower',
       '95th_pBat_speed_ci_upper', 'smash_factor_ci_lower',
       'smash_factor_ci_upper', 'conScore_ci_lower', 'conScore_ci_upper'])

KeyError: "['decScore_ci_lower', 'decScore_ci_upper', '95th_pBat_speed_ci_lower', '95th_pBat_speed_ci_upper', 'smash_factor_ci_lower', 'smash_factor_ci_upper', 'conScore_ci_lower', 'conScore_ci_upper'] not found in axis"