In [4]:
import pandas as pd

# Load the CSV
df = pd.read_csv("cornbelters_xba.csv")
df = df[df['BatterTeam'] == 'Normal cornbelters']
# Define what counts as an AB (at-bat)
# Exclude: Walks, HitByPitch, Sacrifice, CatcherInterference, IntentionalWalk, etc.
# Include: Out, Error, FieldersChoice, Single, Double, Triple, HomeRun, etc.

# You may need to adjust these based on your dataset's PlayResult or KorBB values
not_ab = [
    "Walk", "IntentionalWalk", "HitByPitch", "SacrificeBunt", "SacrificeFly", "CatcherInterference"
]
# If you have a column for PlayResult or KorBB, use it to filter
ab_mask = ~df["PlayResult"].isin(not_ab)

# Group by batter and calculate xBA/AB
result = (
    df[ab_mask]
    .groupby("Batter")
    .agg(
        AB=("PlayResult", "count"),
        xBA_sum=("xBA", "sum")
    )
    .assign(xBA_per_AB=lambda x: x["xBA_sum"] / x["AB"])
    .reset_index()
    .sort_values("xBA_per_AB", ascending=False)
)

result["xBA_percentile"] = result["xBA_per_AB"].rank(pct=True)

# Sort by xBA_per_AB descending
result = result.sort_values("xBA_per_AB", ascending=False)

# Show the result
print(result[["Batter", "xBA_per_AB", "xBA_percentile", "AB"]])

             Batter  xBA_per_AB  xBA_percentile  AB
11   Tyler Thompson    0.409357        1.000000  19
2      Cole Freeman    0.326797        0.923077  17
10     Shea Zbrozek    0.320988        0.846154  18
4     Jackson Smith    0.312757        0.769231  27
12   Zach Zychowski    0.296296        0.692308  18
3        Jack Novak    0.248677        0.615385  21
8   Nolan Mccrossin    0.246914        0.538462  18
0       Caleb Royer    0.241830        0.461538  17
7     Michael Kuska    0.212121        0.384615  11
5    Jackson Stanek    0.188034        0.307692  13
1    Charlie Graham    0.161616        0.230769  11
9      Ryan Colucci    0.059829        0.153846  13
6     Kannan Kleine    0.018519        0.076923   6


In [None]:
result.to_csv("cornbelters_xba.csv" , index=False)