In [1]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from constants import DATA_DIR
import polars as pl
import os

In [2]:
pitchers_outings: pl.DataFrame = pl.read_parquet(
    os.path.join(DATA_DIR, "pitchers_outings.parquet"),
)

In [3]:
# create visualization of the distribution delta_run_exp for each team in statcast era

PER_PITCHES = 100

TEAMS: list[str] = (
    pitchers_outings.unique("home_team").select("home_team").to_numpy().flatten()
)

num_rows = len(TEAMS) // 5
num_cols = len(TEAMS) // 5

fig = make_subplots(
    rows=num_rows,
    cols=num_cols,
    subplot_titles=[team for team in TEAMS],
)

for i, team in enumerate(TEAMS):
    team_data = (
        pitchers_outings.filter(pl.col("home_team") == team)
        .select(
            f"delta_run_exp_per_{PER_PITCHES}_pitches",
        )
        .to_numpy()
        .flatten()
    )

    # Create histogram for the team
    histogram = go.Histogram(
        x=team_data,
        name=team,
        histnorm="probability",
    )
    row = (i // num_cols) + 1
    col = (i % num_cols) + 1

    # Add histogram to the subplot
    fig.add_trace(histogram, row=row, col=col)

    # Update subplot title
    fig.update_yaxes(title_text="", row=row, col=col)
    fig.update_xaxes(title_text=f"delta run exp", row=row, col=col)

fig.update_layout(
    title=f"Distribution of delta_run_exp per {PER_PITCHES} pitches for Each Team (2015-2023, minimum 30 pitches thrown)",
    height=1000,  # Adjust height if needed
    showlegend=False,
)

fig.show()