In [1]:
import pandas as pd

# 📁 Load raw Statcast data and velocity grades
df_raw = pd.read_csv("../data/raw/statcast_2025.csv")
grades = pd.read_csv("../data/processed/pitcher_velocity_grades.csv")

# 🎯 Filter to 2025 BOS only
df_raw['season'] = pd.to_datetime(df_raw['game_date']).dt.year
df_bos_2025 = df_raw[(df_raw['season'] == 2025) & (df_raw['home_team'] == 'BOS')]

# 🧼 Drop missing
df_bos_2025 = df_bos_2025.dropna(subset=['pitch_type', 'release_speed', 'pitcher', 'player_name'])

# 🎯 Step 1: BOS pitchers' avg velocity by pitch type
bos_pitcher_vel = df_bos_2025.groupby(['pitcher', 'player_name', 'pitch_type'])['release_speed'].mean().reset_index()
bos_pitcher_vel.rename(columns={'release_speed': 'avg_speed'}, inplace=True)

# 🎯 Step 2: League avg velocity per pitch type
league_2025 = df_raw[df_raw['season'] == 2025]
league_avg = league_2025.groupby('pitch_type')['release_speed'].mean().reset_index()
league_avg.rename(columns={'release_speed': 'league_avg_speed'}, inplace=True)

# 🔗 Step 3: Merge and compare
comparison = bos_pitcher_vel.merge(league_avg, on='pitch_type', how='left')
comparison['velocity_diff'] = comparison['avg_speed'] - comparison['league_avg_speed']

# 🎓 Step 4: Grade each pitch
def grade_pitch(diff):
    if diff > 2:
        return 'A'
    elif diff > 1:
        return 'B'
    elif diff >= -1:
        return 'C'
    elif diff >= -2:
        return 'D'
    else:
        return 'F'

comparison['grade'] = comparison['velocity_diff'].apply(grade_pitch)

# 💾 Save result
comparison.to_csv("../data/processed/bos_pitcher_vs_league_2025.csv", index=False)
print("✅ Saved: bos_pitcher_vs_league_2025.csv")

# 🔍 Preview
comparison.sort_values(['pitcher', 'pitch_type']).head(10)


✅ Saved: bos_pitcher_vs_league_2025.csv


Unnamed: 0,pitcher,player_name,pitch_type,avg_speed,league_avg_speed,velocity_diff,grade
0,455119,"Martin, Chris",FC,91.6,89.492059,2.107941,A
1,455119,"Martin, Chris",FF,95.16,94.249791,0.910209,C
2,455119,"Martin, Chris",FS,87.7,86.495732,1.204268,B
3,455119,"Martin, Chris",SI,93.633333,93.612109,0.021225,C
4,458677,"Wilson, Justin",FC,91.185714,89.492059,1.693655,B
5,458677,"Wilson, Justin",FF,94.291026,94.249791,0.041234,C
6,458677,"Wilson, Justin",FS,83.6125,86.495732,-2.883232,F
7,458677,"Wilson, Justin",SL,87.22963,85.780459,1.44917,B
8,476594,"Stock, Robert",CH,79.366667,85.753104,-6.386437,F
9,476594,"Stock, Robert",FC,88.408333,89.492059,-1.083726,D
