In [4]:
import pandas as pd

# Load the CSV
df = pd.read_csv("kcl_xba_data.csv")
# Define what counts as an AB (at-bat)
# Exclude: Walks, HitByPitch, Sacrifice, CatcherInterference, IntentionalWalk, etc.
# Include: Out, Error, FieldersChoice, Single, Double, Triple, HomeRun, etc.

# You may need to adjust these based on your dataset's PlayResult or KorBB values
not_ab = [
    "Walk", "IntentionalWalk", "HitByPitch", "SacrificeBunt", "SacrificeFly", "CatcherInterference"
]
# If you have a column for PlayResult or KorBB, use it to filter
ab_mask = ~df["PlayResult"].isin(not_ab)

# Group by batter and calculate xBA/AB
result = (
    df[ab_mask]
    .groupby("Batter")
    .agg(
        AB=("PlayResult", "count"),
        xBA_sum=("xBA", "sum")
    )
    .assign(xBA_per_AB=lambda x: x["xBA_sum"] / x["AB"])
    .reset_index()
    .sort_values("xBA_per_AB", ascending=False)
)

result["xBA_percentile"] = result["xBA_per_AB"].rank(pct=True)

# Sort by xBA_per_AB descending
result = result.sort_values("xBA_per_AB", ascending=False)

# Show the result
print(result[["Batter", "xBA_per_AB", "xBA_percentile"]])

               Batter  xBA_per_AB  xBA_percentile
3     Braden Deverman    0.777778        1.000000
17         Colin Karr    0.555556        0.984615
52     Sammy Driscoll    0.518519        0.969231
31     Justin Trusner    0.500000        0.953846
28     Jake Zimmerman    0.422222        0.938462
..                ...         ...             ...
46       Nolan Hunter    0.018519        0.076923
21        Gabe Peters    0.000000        0.038462
4         Braden Mehn    0.000000        0.038462
1   Andrew Schweigert    0.000000        0.038462
45       Nolan Bowles    0.000000        0.038462

[65 rows x 3 columns]


In [5]:
result.to_csv("kcl_xba.csv", index=False)