In [None]:
# input player name and set date range

FIRST_NAME = "Kevin"
LAST_NAME  = "Gausman"

START_DATE = "2025-03-01" # first day of 2025 season
END_DATE   = "2025-10-01" # final day of 2025 season

# if set to True, re-pulls Statcast even if CSV exists
# True would be used to get new data in real time during a game

FORCE_REFRESH = False

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from pybaseball import statcast_pitcher, playerid_lookup

# cache to only have to pull player data once
# saves into data_cache folder

CACHE_DIR = "data_cache"
os.makedirs(CACHE_DIR, exist_ok=True)

def load_or_cache_csv(filename, fetch_func, force_refresh=False):
    """
    Load cached CSV if it exists; otherwise fetch data and cache it.
    """
    if os.path.exists(filename) and not force_refresh:
        print(f"Loading cached data: {filename}")
        return pd.read_csv(filename)

    print(f"Fetching Statcast data → caching to {filename}")
    df = fetch_func()
    df.to_csv(filename, index=False)
    return df


In [None]:
# data pulled if necessary
# Statcast ID for player found

def lookup_statcast_id(first_name, last_name):
    """
    Look up MLBAM (Statcast) ID from player name.
    """
    lookup = playerid_lookup(last_name, first_name)

    if lookup.empty:
        raise ValueError("Player not found. Check spelling.")

    player_id = int(lookup.iloc[0]["key_mlbam"])
    full_name = f"{lookup.iloc[0]['name_first']} {lookup.iloc[0]['name_last']}"

    print(f"Statcast ID found for {full_name}: {player_id}")
    return player_id

PITCHER_ID = lookup_statcast_id(FIRST_NAME, LAST_NAME)
PITCHER_NAME = f"{FIRST_NAME} {LAST_NAME}"

pitcher_csv = (
    f"{CACHE_DIR}/{LAST_NAME}_{START_DATE}_{END_DATE}.csv"
)

pitcher_df = load_or_cache_csv(
    pitcher_csv,
    lambda: statcast_pitcher(START_DATE, END_DATE, PITCHER_ID),
    force_refresh=FORCE_REFRESH
)


In [None]:
# colors are purely cosmetic
# can be customized for any team or personal preference 
# will not affecting underlying analysis.

pitch_colors = {
    'FF': "#E8291C",        # Four-seamer
    'FS': "#1D8BF1",        # Splitter
    'SL': "#134A8E",        # Slider
    'SI': "#10BDDB",        # Sinker
    'CU': '#FF8C00',        # Curveball
    'CH': "#10C018"         # Changeup
}

DEFAULT_PITCH_COLOR = "#4D4D4D"
TITLE_COLOR = "#070000"


In [None]:
# calculate percentage of each pitch type
# data included in legend

def putaway_percentages(df):
    counts = df['pitch_type'].value_counts()
    total = counts.sum()
    return (counts / total * 100).round(1)


In [None]:
# visualization

def plot_putaway_chart(df, name="Pitcher"):

    put = df[
        df['events'].isin(['strikeout', 'strikeout_double_play'])
    ].dropna(subset=['plate_x', 'plate_z', 'pitch_type'])

    if put.empty:
        print("No put-away pitches found.")
        return

    pct = putaway_percentages(put)

    fig, ax = plt.subplots(figsize=(7.5, 7.5))
    fig.patch.set_facecolor("white")

    for pitch, group in put.groupby('pitch_type'):
        label = f"{pitch} ({pct[pitch]}%)"
        ax.scatter(
            group['plate_x'],
            group['plate_z'],
            s=45,
            alpha=0.85,
            color=pitch_colors.get(pitch, DEFAULT_PITCH_COLOR),
            label=label,
            edgecolors="white",
            linewidths=0.4
        )

    # strike zone outline
    ax.plot([-0.83, 0.83], [1.5, 1.5], color="black", lw=1)
    ax.plot([-0.83, 0.83], [3.5, 3.5], color="black", lw=1)
    ax.plot([-0.83, -0.83], [1.5, 3.5], color="black", lw=1)
    ax.plot([0.83, 0.83], [1.5, 3.5], color="black", lw=1)

    ax.set_xlim(-2, 2)
    ax.set_ylim(0, 5)

    ax.set_title(
        f"{name} — Put-Away Pitch Locations\n"
        "Strikeouts Only | % by Pitch Type",
        fontsize=14,
        weight="bold",
        color=TITLE_COLOR
    )

    # labels on X and Y axis
    ax.set_xlabel("Horizontal Location (ft)")
    ax.set_ylabel("Vertical Location (ft)")

    # legend title, font size, optional frame
    ax.legend(
        title="Pitch Type (Put-Away %)",
        frameon=False,
        fontsize=9
    )

    plt.tight_layout()
    plt.show()

plot_putaway_chart(
    pitcher_df,
    name=PITCHER_NAME
)