# 2025 League Averages

Import Previous Data

In [26]:
import pandas as pd
import numpy as np
import pybaseball as pyb
from datetime import datetime, timedelta

statcast = pd.read_csv('statcast_2025.csv')
statcast['game_date'] = pd.to_datetime(statcast['game_date'])

Find Last Day that was Scraped and Scrape New Data

In [27]:
last_day = statcast['game_date'].sort_values(ascending=False).unique()[0]
next_day = (last_day + timedelta(days=1)).strftime('%Y-%m-%d')

new_data = pyb.statcast(start_dt=next_day, end_dt=datetime.today().strftime('%Y-%m-%d'))

This is a large query, it may take a moment to complete


  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
  data_copy[column] = data_copy[column].apply(pd.to_datetime, errors='ignore', format=date_format)
100%|██████████| 3/3 [00:05<00:00,  1.88s/it]
  final_data = pd.concat(dataframe_list, axis=0).convert_dtypes(convert_string=False)


In [28]:
next_day

'2025-05-02'

Clean the New Data and Add a Batting/Pitching Team Column

In [29]:
new_data = new_data[new_data['game_type'] == 'R']
new_data['pitching_team'] = np.where(
    new_data['inning_topbot'] == 'Top',
    new_data['home_team'],
    new_data['away_team']
)
new_data['batting_team'] = np.where(
    new_data['inning_topbot'] == 'Top',
    new_data['away_team'],
    new_data['home_team']
)

Concatenate the New Data into Statcast

In [30]:
statcast = pd.concat([statcast, new_data], ignore_index=True)

  statcast = pd.concat([statcast, new_data], ignore_index=True)


Write to CSV

In [31]:
statcast.to_csv('statcast_2025.csv', index=False)

## Compile League Average Pitch Movement by Handedness

In [32]:
pitch_movement = statcast.groupby(['pitch_type', 'p_throws']).agg(
    pfx_z=('pfx_z', 'mean'),
    pfx_x=('pfx_x', 'mean')).reset_index()

pitch_movement['pfx_x'] *= 12
pitch_movement['pfx_z'] *= 12

In [33]:
pitch_movement.to_csv('statcast_2025_pitch_movement.csv', index=False)

## Compile League Averages

In [34]:
swing_desc = ['hit_into_play', 'swinging_strike', 'foul', 'swinging_strike_blocked', 'foul_tip', 'foul_bunt', 'missed_bunt', 'bunt_foul_tip']
whiff_desc = ['swinging_strike', 'swinging_strike_blocked', 'missed_bunt']
in_zone_desc =  list(range(1, 10))
out_zone_desc = [11,12,13,14]

statcast['swing'] = statcast['description'].isin(swing_desc)
statcast['whiff'] = statcast['description'].isin(whiff_desc)
statcast['in_zone'] = statcast['zone'].isin(in_zone_desc)
statcast['out_zone'] = statcast['zone'].isin(out_zone_desc)
statcast['chase'] = statcast['swing'] & statcast['out_zone']

In [35]:
statcast_group = statcast.groupby('pitch_type').agg(
    pitch=('release_speed', 'count'),
    release_speed=('release_speed', 'mean'),
    pfx_z=('pfx_z', 'mean'),
    pfx_x=('pfx_x', 'mean'),
    release_spin_rate=('release_spin_rate', 'mean'),
    release_pos_x=('release_pos_x', 'mean'),
    release_pos_z=('release_pos_z', 'mean'),
    release_extension=('release_extension', 'mean'),
    delta_run_exp=('delta_run_exp', 'sum'),
    swing=('swing', 'sum'),
    whiff=('whiff', 'sum'),
    in_zone=('in_zone', 'sum'),
    out_zone=('out_zone','sum'),
    chase=('chase','sum'),
    xwoba=('estimated_woba_using_speedangle', 'mean')
).reset_index()

# Filter for balls in play
in_play = statcast[statcast['type'] == 'X']

# Calculate xwOBAcon for each pitch type
xwobacon_group = in_play.groupby('pitch_type')['estimated_woba_using_speedangle'].mean().reset_index()
xwobacon_group.rename(columns={'estimated_woba_using_speedangle': 'xwobacon'}, inplace=True)

# Merge with the main statcast_group
statcast_group = statcast_group.merge(xwobacon_group, on='pitch_type', how='left')

statcast_group['pfx_x'] *= 12
statcast_group['pfx_z'] *= 12
statcast_group['pitch_usage'] = statcast_group['pitch'] / statcast_group['pitch'].sum()
statcast_group['whiff_rate'] = statcast_group['whiff'] / statcast_group['swing']
statcast_group['in_zone_rate'] = statcast_group['in_zone'] / statcast_group['pitch']
statcast_group['chase_rate'] = statcast_group['chase'] / statcast_group['out_zone']
statcast_group['delta_run_exp_per_100'] = -statcast_group['delta_run_exp'] / statcast_group['pitch'] * 100

In [36]:
xwobacon_all = statcast[statcast['type'] == 'X']['estimated_woba_using_speedangle'].mean()

all_row = pd.DataFrame({
    'pitch_type': ['All'],
    'pitch': [statcast['release_speed'].count()],
    'release_speed': [statcast['release_speed'].mean()],
    'pfx_z': [statcast['pfx_z'].mean() * 12],
    'pfx_x': [statcast['pfx_x'].mean() * 12],
    'release_spin_rate': [statcast['release_spin_rate'].mean()],
    'release_pos_x': [statcast['release_pos_x'].mean()],
    'release_pos_z': [statcast['release_pos_z'].mean()],
    'release_extension': [statcast['release_extension'].mean()],
    'delta_run_exp': [statcast['delta_run_exp'].sum()],
    'swing': [statcast['swing'].sum()],
    'whiff': [statcast['whiff'].sum()],
    'in_zone': [statcast['in_zone'].sum()],
    'out_zone': [statcast['out_zone'].sum()],
    'chase': [statcast['chase'].sum()],
    'xwoba': [statcast['estimated_woba_using_speedangle'].mean()],
    'xwobacon': [xwobacon_all]
})

all_row['pitch_usage'] = 1.0
all_row['whiff_rate'] = all_row['whiff'] / all_row['swing']
all_row['in_zone_rate'] = all_row['in_zone'] / all_row['pitch']
all_row['chase_rate'] = all_row['chase'] / all_row['out_zone']
all_row['delta_run_exp_per_100'] = -all_row['delta_run_exp'] / all_row['pitch'] * 100

statcast_group = pd.concat([statcast_group, all_row], ignore_index=True)

In [37]:
statcast_group.to_csv('statcast_2025_grouped.csv', index=False)

# AAA Data

Import Previous Data

In [38]:
import statcast_minors as sm

statcast = pd.read_csv('statcast_2025_aaa.csv')
statcast['game_date'] = pd.to_datetime(statcast['game_date'])

Find Last Day that was Scraped and Scrape New Data

In [39]:
last_day = statcast['game_date'].sort_values(ascending=False).unique()[0]
next_day = (last_day + timedelta(days=1)).strftime('%Y-%m-%d')
print(next_day)

new_data = sm.statcast_minor_leagues(start_dt=next_day, end_dt=datetime.today().strftime('%Y-%m-%d'))
new_data = new_data.drop('index', axis=1)

2025-05-04


Clean the New Data and Add a Batting/Pitching Team Column

In [40]:
aaa_teams = ['ABQ', 'BUF', 'CLT', 'COL', 'DUR', 'ELP', 'GWN', 'IND', 'IOW', 'JAX', 'LV', 'LHV',
             'LOU', 'MEM', 'NAS', 'NOR', 'OKC', 'OMA', 'RNO', 'ROC', 'RR', 'SAC', 'SL', 'SWB',
             'STP', 'SUG', 'SYR', 'TAC', 'TOL', 'WOR']

new_data = new_data[new_data['home_team'].isin(aaa_teams)]

new_data['pitching_team'] = np.where(
    new_data['inning_topbot'] == 'Top',
    new_data['home_team'],
    new_data['away_team']
)
new_data['batting_team'] = np.where(
    new_data['inning_topbot'] == 'Top',
    new_data['away_team'],
    new_data['home_team'])

Concatenate the New Data into statcast

In [41]:
statcast = pd.concat([statcast, new_data], ignore_index=True)

statcast['pitcher'] = statcast['pitcher'].astype(int)
statcast['batter'] = statcast['batter'].astype(int)

  statcast = pd.concat([statcast, new_data], ignore_index=True)


Write to CSV

In [42]:
statcast.to_csv('statcast_2025_aaa.csv', index=False)

## Compile League Average Pitch Movement by Handedness

In [43]:
pitch_movement = statcast.groupby(['pitch_type', 'p_throws']).agg(
    pfx_z=('pfx_z', 'mean'),
    pfx_x=('pfx_x', 'mean')).reset_index()

pitch_movement['pfx_x'] *= 12
pitch_movement['pfx_z'] *= 12

In [44]:
pitch_movement.to_csv('statcast_2025_pitch_movement_aaa.csv', index=False)

## Compile League Averages

In [45]:
swing_desc = ['hit_into_play', 'swinging_strike', 'foul', 'swinging_strike_blocked', 'foul_tip', 'foul_bunt', 'missed_bunt', 'bunt_foul_tip']
whiff_desc = ['swinging_strike', 'swinging_strike_blocked', 'missed_bunt']
in_zone_desc =  list(range(1, 10))
out_zone_desc = [11,12,13,14]

statcast['swing'] = statcast['description'].isin(swing_desc)
statcast['whiff'] = statcast['description'].isin(whiff_desc)
statcast['in_zone'] = statcast['zone'].isin(in_zone_desc)
statcast['out_zone'] = statcast['zone'].isin(out_zone_desc)
statcast['chase'] = statcast['swing'] & statcast['out_zone']

In [46]:
statcast_group = statcast.groupby('pitch_type').agg(
    pitch=('release_speed', 'count'),
    release_speed=('release_speed', 'mean'),
    pfx_z=('pfx_z', 'mean'),
    pfx_x=('pfx_x', 'mean'),
    release_spin_rate=('release_spin_rate', 'mean'),
    release_pos_x=('release_pos_x', 'mean'),
    release_pos_z=('release_pos_z', 'mean'),
    release_extension=('release_extension', 'mean'),
    delta_run_exp=('delta_run_exp', 'sum'),
    swing=('swing', 'sum'),
    whiff=('whiff', 'sum'),
    in_zone=('in_zone', 'sum'),
    out_zone=('out_zone','sum'),
    chase=('chase','sum'),
    xwoba=('estimated_woba_using_speedangle', 'mean')
).reset_index()

# Filter for balls in play
in_play = statcast[statcast['type'] == 'X']

# Calculate xwOBAcon for each pitch type
xwobacon_group = in_play.groupby('pitch_type')['estimated_woba_using_speedangle'].mean().reset_index()
xwobacon_group.rename(columns={'estimated_woba_using_speedangle': 'xwobacon'}, inplace=True)

# Merge with the main statcast_group
statcast_group = statcast_group.merge(xwobacon_group, on='pitch_type', how='left')

statcast_group['pfx_x'] *= 12
statcast_group['pfx_z'] *= 12
statcast_group['pitch_usage'] = statcast_group['pitch'] / statcast_group['pitch'].sum()
statcast_group['whiff_rate'] = statcast_group['whiff'] / statcast_group['swing']
statcast_group['in_zone_rate'] = statcast_group['in_zone'] / statcast_group['pitch']
statcast_group['chase_rate'] = statcast_group['chase'] / statcast_group['out_zone']
statcast_group['delta_run_exp_per_100'] = -statcast_group['delta_run_exp'] / statcast_group['pitch'] * 100

In [47]:
xwobacon_all = statcast[statcast['type'] == 'X']['estimated_woba_using_speedangle'].mean()

all_row = pd.DataFrame({
    'pitch_type': ['All'],
    'pitch': [statcast['release_speed'].count()],
    'release_speed': [statcast['release_speed'].mean()],
    'pfx_z': [statcast['pfx_z'].mean() * 12],
    'pfx_x': [statcast['pfx_x'].mean() * 12],
    'release_spin_rate': [statcast['release_spin_rate'].mean()],
    'release_pos_x': [statcast['release_pos_x'].mean()],
    'release_pos_z': [statcast['release_pos_z'].mean()],
    'release_extension': [statcast['release_extension'].mean()],
    'delta_run_exp': [statcast['delta_run_exp'].sum()],
    'swing': [statcast['swing'].sum()],
    'whiff': [statcast['whiff'].sum()],
    'in_zone': [statcast['in_zone'].sum()],
    'out_zone': [statcast['out_zone'].sum()],
    'chase': [statcast['chase'].sum()],
    'xwoba': [statcast['estimated_woba_using_speedangle'].mean()],
    'xwobacon': [xwobacon_all]
})

all_row['pitch_usage'] = 1.0
all_row['whiff_rate'] = all_row['whiff'] / all_row['swing']
all_row['in_zone_rate'] = all_row['in_zone'] / all_row['pitch']
all_row['chase_rate'] = all_row['chase'] / all_row['out_zone']
all_row['delta_run_exp_per_100'] = -all_row['delta_run_exp'] / all_row['pitch'] * 100

statcast_group = pd.concat([statcast_group, all_row], ignore_index=True)

In [48]:
statcast_group.to_csv('statcast_2025_grouped_aaa.csv', index=False)

# FSL Data

Import Previous Data

In [49]:
import statcast_minors as sm

statcast = pd.read_csv('statcast_2025_fsl.csv')
statcast['game_date'] = pd.to_datetime(statcast['game_date'])

Find Last Day that was Scraped and Scrape New Data

In [50]:
last_day = statcast['game_date'].sort_values(ascending=False).unique()[0]
next_day = (last_day + timedelta(days=1)).strftime('%Y-%m-%d')
print(next_day)

new_data = sm.statcast_minor_leagues(start_dt=next_day, end_dt=datetime.today().strftime('%Y-%m-%d'))
new_data = new_data.drop('index', axis=1)

2025-05-04


Clean the New Data and Add a Batting/Pitching Team Column

In [51]:
# Florida State League (FSL) teams
fsl_teams = ['BRD', 'CLR', 'DBT', 'DUN', 'FTM', 'JUP', 'LAK', 'PMB', 'SLU', 'TAM']

new_data = new_data[new_data['home_team'].isin(aaa_teams)]

new_data['pitching_team'] = np.where(
    new_data['inning_topbot'] == 'Top',
    new_data['home_team'],
    new_data['away_team']
)
new_data['batting_team'] = np.where(
    new_data['inning_topbot'] == 'Top',
    new_data['away_team'],
    new_data['home_team'])

Concatenate the New Data into statcast

In [52]:
statcast = pd.concat([statcast, new_data], ignore_index=True)

statcast['pitcher'] = statcast['pitcher'].astype(int)
statcast['batter'] = statcast['batter'].astype(int)

  statcast = pd.concat([statcast, new_data], ignore_index=True)


Write to CSV

In [53]:
statcast.to_csv('statcast_2025_fsl.csv', index=False)

## Compile League Average Pitch Movement by Handedness

In [54]:
pitch_movement = statcast.groupby(['pitch_type', 'p_throws']).agg(
    pfx_z=('pfx_z', 'mean'),
    pfx_x=('pfx_x', 'mean')).reset_index()

pitch_movement['pfx_x'] *= 12
pitch_movement['pfx_z'] *= 12

In [55]:
pitch_movement.to_csv('statcast_2025_pitch_movement_fsl.csv', index=False)

## Compile League Averages

In [56]:
swing_desc = ['hit_into_play', 'swinging_strike', 'foul', 'swinging_strike_blocked', 'foul_tip', 'foul_bunt', 'missed_bunt', 'bunt_foul_tip']
whiff_desc = ['swinging_strike', 'swinging_strike_blocked', 'missed_bunt']
in_zone_desc =  list(range(1, 10))
out_zone_desc = [11,12,13,14]

statcast['swing'] = statcast['description'].isin(swing_desc)
statcast['whiff'] = statcast['description'].isin(whiff_desc)
statcast['in_zone'] = statcast['zone'].isin(in_zone_desc)
statcast['out_zone'] = statcast['zone'].isin(out_zone_desc)
statcast['chase'] = statcast['swing'] & statcast['out_zone']

In [57]:
statcast_group = statcast.groupby('pitch_type').agg(
    pitch=('release_speed', 'count'),
    release_speed=('release_speed', 'mean'),
    pfx_z=('pfx_z', 'mean'),
    pfx_x=('pfx_x', 'mean'),
    release_spin_rate=('release_spin_rate', 'mean'),
    release_pos_x=('release_pos_x', 'mean'),
    release_pos_z=('release_pos_z', 'mean'),
    release_extension=('release_extension', 'mean'),
    delta_run_exp=('delta_run_exp', 'sum'),
    swing=('swing', 'sum'),
    whiff=('whiff', 'sum'),
    in_zone=('in_zone', 'sum'),
    out_zone=('out_zone','sum'),
    chase=('chase','sum'),
    xwoba=('estimated_woba_using_speedangle', 'mean')
).reset_index()

# Filter for balls in play
in_play = statcast[statcast['type'] == 'X']

# Calculate xwOBAcon for each pitch type
xwobacon_group = in_play.groupby('pitch_type')['estimated_woba_using_speedangle'].mean().reset_index()
xwobacon_group.rename(columns={'estimated_woba_using_speedangle': 'xwobacon'}, inplace=True)

# Merge with the main statcast_group
statcast_group = statcast_group.merge(xwobacon_group, on='pitch_type', how='left')

statcast_group['pfx_x'] *= 12
statcast_group['pfx_z'] *= 12
statcast_group['pitch_usage'] = statcast_group['pitch'] / statcast_group['pitch'].sum()
statcast_group['whiff_rate'] = statcast_group['whiff'] / statcast_group['swing']
statcast_group['in_zone_rate'] = statcast_group['in_zone'] / statcast_group['pitch']
statcast_group['chase_rate'] = statcast_group['chase'] / statcast_group['out_zone']
statcast_group['delta_run_exp_per_100'] = -statcast_group['delta_run_exp'] / statcast_group['pitch'] * 100

In [58]:
xwobacon_all = statcast[statcast['type'] == 'X']['estimated_woba_using_speedangle'].mean()

all_row = pd.DataFrame({
    'pitch_type': ['All'],
    'pitch': [statcast['release_speed'].count()],
    'release_speed': [statcast['release_speed'].mean()],
    'pfx_z': [statcast['pfx_z'].mean() * 12],
    'pfx_x': [statcast['pfx_x'].mean() * 12],
    'release_spin_rate': [statcast['release_spin_rate'].mean()],
    'release_pos_x': [statcast['release_pos_x'].mean()],
    'release_pos_z': [statcast['release_pos_z'].mean()],
    'release_extension': [statcast['release_extension'].mean()],
    'delta_run_exp': [statcast['delta_run_exp'].sum()],
    'swing': [statcast['swing'].sum()],
    'whiff': [statcast['whiff'].sum()],
    'in_zone': [statcast['in_zone'].sum()],
    'out_zone': [statcast['out_zone'].sum()],
    'chase': [statcast['chase'].sum()],
    'xwoba': [statcast['estimated_woba_using_speedangle'].mean()],
    'xwobacon': [xwobacon_all]
})

all_row['pitch_usage'] = 1.0
all_row['whiff_rate'] = all_row['whiff'] / all_row['swing']
all_row['in_zone_rate'] = all_row['in_zone'] / all_row['pitch']
all_row['chase_rate'] = all_row['chase'] / all_row['out_zone']
all_row['delta_run_exp_per_100'] = -all_row['delta_run_exp'] / all_row['pitch'] * 100

statcast_group = pd.concat([statcast_group, all_row], ignore_index=True)

In [59]:
statcast_group.to_csv('statcast_2025_grouped_fsl.csv', index=False)