In [None]:
import polars as pl
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load your data
on_base = pl.scan_parquet("../data/throw_home_runner_on_third_wide_sprint_arm.parquet")

on_base = on_base.with_columns(
    pl.when(pl.col("event_type").is_in(["sac_fly", "sac_fly_double_play"]))
    .then(pl.lit("Successful"))
    .otherwise(pl.lit('Unsuccessful'))
    .alias("is_successful")
)

on_base = on_base.select(["arm_strength", "sprint_speed_runner", "is_successful"]).collect().to_pandas()

# Create the violin plot
plt.figure(figsize=(10, 6))
sns.violinplot(x= "is_successful", y='sprint_speed_runner', data=on_base)

# Add a line at 28 ft/sec
plt.axhline(y=28, color='red', linestyle='--', label='28 ft/sec')
plt.gca().text(0.65, 28.5, 'Avg MLB Sprint Speed\n(MLB Statcast)', color='red', fontsize=14, ha='center')

# Formatting the plot
plt.title("Distribution of Sprint Speed by Sac Fly Outcome", fontsize=16, pad=15)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlabel("Sac Fly Outcome", fontsize=14)
plt.ylabel("Sprint Speed (ft/sec)", fontsize=14)

# Show the plot
plt.tight_layout()
plt.show()

# Create the violin plot
plt.figure(figsize=(10, 6))
sns.violinplot(x= "is_successful", y='arm_strength', data=on_base)

# Add a line at 28 ft/sec
plt.axhline(y=85, color='red', linestyle='--', label='85 ft/sec')
plt.gca().text(0.5, 95, 'Avg MLB Arm Strength\n(MLB Statcast)', color='red', fontsize=14, ha='center')

# Formatting the plot
plt.title("Distribution of Arm Strength by Sac Fly Outcome", fontsize=16, pad=15)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlabel("Sac Fly Outcome", fontsize=14)
plt.ylabel("Arm Strength (mph)", fontsize=14)

# Show the plot
plt.tight_layout()
plt.show()