In [1]:
import pandas as pd

# Load the CSV
df = pd.read_csv("kcl_xba_data.csv")
# Define what counts as an AB (at-bat)
# Exclude: Walks, HitByPitch, Sacrifice, CatcherInterference, IntentionalWalk, etc.
# Include: Out, Error, FieldersChoice, Single, Double, Triple, HomeRun, etc.

# You may need to adjust these based on your dataset's PlayResult or KorBB values
not_ab = [
    "Walk", "IntentionalWalk", "HitByPitch", "SacrificeBunt", "SacrificeFly", "CatcherInterference"
]
# If you have a column for PlayResult or KorBB, use it to filter
ab_mask = ~df["PlayResult"].isin(not_ab)

# Group by batter and calculate xBA/AB
result = (
    df[ab_mask]
    .groupby("Batter")
    .agg(
        AB=("PlayResult", "count"),
        xBA_sum=("xBA", "sum")
    )
    .assign(xBA_per_AB=lambda x: x["xBA_sum"] / x["AB"])
    .reset_index()
    .sort_values("xBA_per_AB", ascending=False)
)

result["xBA_percentile"] = result["xBA_per_AB"].rank(pct=True)

# Sort by xBA_per_AB descending
result = result.sort_values("xBA_per_AB", ascending=False)

# Show the result
print(result[["Batter", "xBA_per_AB", "xBA_percentile"]])

             Batter  xBA_per_AB  xBA_percentile
84      Zach Spidle    0.557895        1.000000
82        Zach Bava    0.526316        0.988235
8    Brady Veselack    0.486068        0.976471
28     Donte Frantz    0.473684        0.964706
45   Justin Trusner    0.454887        0.952941
..              ...         ...             ...
14  Brayden Elliott    0.000000        0.041176
10     Brandon Ward    0.000000        0.041176
26      Connor Hale    0.000000        0.041176
27    Deegan Aeilts    0.000000        0.041176
11    Brandown Ward         NaN             NaN

[86 rows x 3 columns]


In [2]:
result.to_csv("kcl_xba.csv", index=False)