In [1]:
# Player Similiarity Program
import pandas as pd
import numpy as np
import polars as pl
from api_scraper import MLB_Scrape

# Set display options to print all columns without truncation
pd.set_option("display.max_columns", None)  # Ensure all columns are displayed
pd.set_option("display.max_rows", None)  # Ensure all rows are displayed
pd.set_option("display.width", None)  # Remove column width limit

In [16]:
ab_flag_dict = {'Walk':0, 'Field Error': 1, 'Caught Stealing Home': 0, 'Strikeout Double Play': 1,
                'Sac Fly Double Play': 0,   'Hit By Pitch': 0, 'Runner Out': 0, 'Triple Play': 1, 
                'Field Out': 1, 'Flyout': 1, 'Bunt Lineout': 1, 'Catcher Interference': 0, 'Fielders Choice': 1,
                'Sac Bunt': 0, 'Pop Out': 1, 'Caught Stealing 2B': 0, 'Fielders Choice Out': 1, 'Stolen Base 2B': 0,
                'Sac Fly': 0, 'Groundout': 1, 'Home Run': 1, 'Caught Stealing 3B': 0, 'Intent Walk': 0, 'Double Play': 1,
                'Grounded Into DP': 1, 'Forceout': 1, 'Lineout': 1, 'Single': 1, 'Triple': 1, 'Wild Pitch': 0, 'Double': 1,
                'Strikeout': 1, 'Bunt Groundout': 0, 'Bunt Pop Out': 0}

ab_events = [event for event, flag in ab_flag_dict.items() if flag == 1]

pa_flag_dict = {'Walk':1, 'Field Error': 1, 'Caught Stealing Home': 0, 'Strikeout Double Play': 1,
                'Sac Fly Double Play': 1,   'Hit By Pitch': 1, 'Runner Out': 0, 'Triple Play': 1, 
                'Field Out': 1, 'Flyout': 1, 'Bunt Lineout': 1, 'Catcher Interference': 1, 'Fielders Choice': 1,
                'Sac Bunt': 1, 'Pop Out': 1, 'Caught Stealing 2B': 0, 'Fielders Choice Out': 1, 'Stolen Base 2B': 0,
                'Sac Fly': 1, 'Groundout': 1, 'Home Run': 1, 'Caught Stealing 3B': 0, 'Intent Walk': 1, 'Double Play': 1,
                'Grounded Into DP': 1, 'Forceout': 1, 'Lineout': 1, 'Single': 1, 'Triple': 1, 'Wild Pitch': 0, 'Double': 1,
                'Strikeout': 1, 'Bunt Groundout': 1, 'Bunt Pop Out': 1}

pa_events = [event for event, flag in pa_flag_dict.items() if flag == 1]

hit_flag_dict = {'Walk':0, 'Field Error': 0, 'Caught Stealing Home': 0, 'Strikeout Double Play': 0,
                'Sac Fly Double Play': 0,   'Hit By Pitch': 0, 'Runner Out': 0, 'Triple Play': 0, 
                'Field Out': 0, 'Flyout': 0, 'Bunt Lineout': 0, 'Catcher Interference': 0, 'Fielders Choice': 0,
                'Sac Bunt': 0, 'Pop Out': 0, 'Caught Stealing 2B': 0, 'Fielders Choice Out': 0, 'Stolen Base 2B': 0,
                'Sac Fly': 0, 'Groundout': 0, 'Home Run': 1, 'Caught Stealing 3B': 0, 'Intent Walk': 0, 'Double Play': 0,
                'Grounded Into DP': 0, 'Forceout': 0, 'Lineout': 0, 'Single': 1, 'Triple': 1, 'Wild Pitch': 0, 'Double': 1,
                'Strikeout': 0, 'Bunt Groundout': 0, 'Bunt Pop Out': 0}

hit_events = [event for event, flag in hit_flag_dict.items() if flag == 1]

In [55]:
data = pl.read_csv("2025_data.csv")

batters = pl.read_csv("batter_id.csv")

In [57]:
player_name = "Wyatt Langford"

player_data = data.filter(pl.col("batter_name") == player_name)

player_ab_data = player_data.filter(pl.col("event").is_in(ab_events))

player_pa_data = player_data.filter(pl.col("event").is_in(pa_events))

player_hit_data = player_data.filter(pl.col("event").is_in(hit_events))

In [58]:
# Get number of at-bats and hits
num_abs = player_ab_data.height
num_hits = player_hit_data.height

# Calculate batting average
batting_average = (num_hits / num_abs if num_abs > 0 else 0)

# Round to three decimal places
batting_average = round(batting_average, 3)

batting_average

0.241

In [59]:
fastball_dict = {'Four-Seam Fastball': 1, 'Sinker': 1, 'Cutter': 1}

offspeed_dict = {'Splitter': 1, 'Changeup': 1, 'Forkball': 1, 'Screwball': 1}

breaking_dict = {'Curveball': 1, 'Knuckle Ball': 1, 'Knuckle Curve': 1, 'Slider': 1, 
                 'Sweeper': 1, 'Slurve': 1, 'Slow Curve': 1}

In [83]:
# Helper function for counting fastball events
def count_fastball_events(events):
    return player_data.filter(
        pl.col("pitch_description").is_in(fastball_dict) &
        pl.col("event").is_in(events)
    ).height

# ABs and PA for fastballs
fastball_num_abs = count_fastball_events(ab_events)
fastball_num_pa = count_fastball_events(pa_events)

# Hits for fastballs
fastball_num_hits = count_fastball_events(hit_events)

# Batting Average
fastball_batting_average = round(
    fastball_num_hits / fastball_num_abs if fastball_num_abs > 0 else 0,
    3
)

# Slugging Percentage
fastball_slug = round(
    (
        1 * count_fastball_events(["Single"]) +
        2 * count_fastball_events(["Double"]) +
        3 * count_fastball_events(["Triple"]) +
        4 * count_fastball_events(["Home Run"])
    ) / fastball_num_abs if fastball_num_abs > 0 else 0,
    3
)

# On-Base Percentage
fastball_obp = round(
    (fastball_num_hits + count_fastball_events(["Walk", "Hit By Pitch"])) /
    fastball_num_pa if fastball_num_pa > 0 else 0,
    3
)

print(f"Fastball AVG: {fastball_batting_average}")
print(f"Fastball OBP: {fastball_obp}")
print(f"Fastball SLG: {fastball_slug}")
print(f"Fastball OPS: {fastball_slug + fastball_obp}")


Fastball AVG: 0.248
Fastball OBP: 0.359
Fastball SLG: 0.483
Fastball OPS: 0.842


In [85]:
# Helper function to calculate pitch group stats
def calculate_pitch_group_stats(pitch_dict):
    def count_events(events):
        return player_data.filter(
            pl.col("pitch_description").is_in(pitch_dict) &
            pl.col("event").is_in(events)
        ).height
    
    # ABs, Hits, and OBP denominator (exclude sac bunts)
    num_ab = count_events(ab_events)
    num_hits = count_events(hit_events)
    obp_denom = count_events([e for e in pa_events if e != "Sac Bunt"])
    
    # Batting Average
    avg = round(num_hits / num_ab if num_ab > 0 else 0, 3)
    
    # On-Base Percentage
    obp = round(
        (num_hits + count_events(["Walk", "Hit By Pitch"])) / obp_denom if obp_denom > 0 else 0,
        3
    )
    
    # Slugging Percentage
    slg = round(
        (1*count_events(["Single"]) + 2*count_events(["Double"]) +
         3*count_events(["Triple"]) + 4*count_events(["Home Run"])) / num_ab if num_ab > 0 else 0,
        3
    )

    ops = round(obp + slg, 3)

    return {"AVG": avg, "OBP": obp, "SLG": slg, "OPS": ops}

# Calculate stats for each pitch group
fastball_stats = calculate_pitch_group_stats(fastball_dict)
breaking_stats = calculate_pitch_group_stats(breaking_dict)
offspeed_stats = calculate_pitch_group_stats(offspeed_dict)

print("Fastball Stats:", fastball_stats)
print("Breaking Ball Stats:", breaking_stats)
print("Offspeed Stats:", offspeed_stats)


Fastball Stats: {'AVG': 0.248, 'OBP': 0.359, 'SLG': 0.483, 'OPS': 0.842}
Breaking Ball Stats: {'AVG': 0.252, 'OBP': 0.321, 'SLG': 0.366, 'OPS': 0.687}
Offspeed Stats: {'AVG': 0.178, 'OBP': 0.286, 'SLG': 0.288, 'OPS': 0.574}


In [61]:
breaking_ab_data = player_ab_data.filter(pl.col("pitch_description").is_in(breaking_dict))
breaking_hit_data = player_hit_data.filter(pl.col("pitch_description").is_in(breaking_dict))

breaking_num_abs = breaking_ab_data.height
breaking_num_hits = breaking_hit_data.height

breaking_batting_average = (breaking_num_hits / breaking_num_abs if breaking_num_abs > 0 else 0)
breaking_batting_average = round(breaking_batting_average, 3)

breaking_batting_average

0.252

In [62]:
offspeed_ab_data = player_ab_data.filter(pl.col("pitch_description").is_in(offspeed_dict))
offspeed_hit_data = player_hit_data.filter(pl.col("pitch_description").is_in(offspeed_dict))

offspeed_num_abs = offspeed_ab_data.height
offspeed_num_hits = offspeed_hit_data.height

offspeed_batting_average = (offspeed_num_hits / offspeed_num_abs if offspeed_num_abs > 0 else 0)
offspeed_batting_average = round(offspeed_batting_average, 3)

offspeed_batting_average

0.178

In [70]:
print(f"{player_name}'s 2025 Season Pitch Split Batting Average\n")
print("Overall Batting Average:", batting_average, "\n")
print("Fastball Batting Average:", fastball_batting_average, "\n")
print("Breaking Ball Batting Average:", breaking_batting_average, "\n")
print("Offspeed Batting Average:", offspeed_batting_average, "\n")

Wyatt Langford's 2025 Season Pitch Split Batting Average

Overall Batting Average: 0.241 

Fastball Batting Average: 0.248 

Breaking Ball Batting Average: 0.252 

Offspeed Batting Average: 0.178 

