In [None]:
import pandas as pd
import numpy as np
import pickle
# Read the Yakkertech CSV file
data_path = 'Data/cornbelters6-21.csv'
df = pd.read_csv(data_path)

Weights = {"Walk": .695, "HitByPitch": .727,
           "Single": .891, "Double": 1.269,
            "Triple": 1.609, "HomeRun": 2.078}

df['RunValue'] = df['PlayResult'].map(Weights)
df['RunValue'] = df['RunValue'].fillna(0)
with open("../Stuff+/stuff_plus_model.pkl", "rb") as f:
    stuff_plus_model = pickle.load(f)

with open("../xBA_CornBelters/xba_model.pkl", "rb") as f:
    xba_model = pickle.load(f)

with open("xwoba.pkl", "rb") as f:
    xwoba_model = pickle.load(f)
# Rename HitType to TaggedHitType


# Define standard strike zone boundaries
vert_strike_min, vert_strike_max = 1.5, 3.5  # Standard vertical strike zone
horz_strike_min, horz_strike_max = -1, 1  # Standard horizontal strike zone
df['PlateLocSide'] = df['PlateLocSide']*-1
# Add strike zone columns
df['Vert Strike'] = (df['PlateLocHeight'].between(vert_strike_min, vert_strike_max)).astype(int)
df['Horz Strike'] = (df['PlateLocSide'].between(horz_strike_min, horz_strike_max)).astype(int) 
# Add pitch outcome columns
df['Strike?'] = (df['PitchCall'] == 'StrikeCalled').astype(int)
df['Foul?'] = (df['PitchCall'] == 'FoulBall').astype(int)
df['In Play?'] = (df['PitchCall'] == 'InPlay').astype(int)
df['Swing Strike?'] = (df['PitchCall'] == 'StrikeSwinging').astype(int)
df['Swing?'] = (df['PitchCall'].isin(['StrikeSwinging', 'FoulBall', 'InPlay'])).astype(int)
df['Ball Called?'] = (df['PitchCall'] == 'BallCalled').astype(int)
df['First Pitch'] = (df['PitchofPA'] == 1).astype(int)

# Add strike zone logic columns
df['In Strike Zone?'] = (df['Vert Strike'] & df['Horz Strike']).astype(int)
df['Chase?'] = (df['Swing?'] & ~df['In Strike Zone?']).astype(int)
df['In-zone take'] = ((df['PitchCall'].isin(['StrikeCalled', 'BallCalled'])) & df['In Strike Zone?']).astype(int)
df['In-zone whiff'] = ((df['PitchCall'] == 'StrikeSwinging') & df['In Strike Zone?']).astype(int)
df['In-zone/swing'] = (df['Swing?'] & df['In Strike Zone?']).astype(int)

# Add batted ball columns
df['LA<10'] = ((df['Angle'] < 10) & (df['PitchCall'] == 'InPlay')).astype(int)
df['Ground Ball?'] = ((df['TaggedHitType'] == 'GroundBall') | ((df['Angle'] < 10) & (df['PitchCall'] == 'InPlay'))).astype(int)
df['Fly Ball?'] = ((df['TaggedHitType'] == 'FlyBall') | ((df['Angle'] > 25) & (df['PitchCall'] == 'InPlay'))).astype(int)
df['EV>90'] = ((df['ExitSpeed'] > 90) & (df['PitchCall'] == 'InPlay')).astype(int)
df['EV>100'] = ((df['ExitSpeed'] > 100) & (df['PitchCall'] == 'InPlay')).astype(int)
df['EV>105'] = ((df['ExitSpeed'] > 105) & (df['PitchCall'] == 'InPlay')).astype(int)

def predict_stuff_plus(event):
    # Make sure all required features are present
    features = ['RelSpeed','SpinRate','RelHeight','RelSide','Extension','InducedVertBreak', 'HorzBreak','VertApprAngle','ZoneSpeed'] 
    if all(f in event and not pd.isna(event[f]) for f in features):
            # Predict using the trained model
        pred = stuff_plus_model.predict([[event[f] for f in features]])[0]
        # Scale so mean is 100 (use your training mean or pass as argument if needed)
        mean_playresult = .5297397769516728 # or use a fixed value if you want
        return pred * (100 / mean_playresult)
    else:
        return np.nan  # Not enough data to predict

def predict_xba(event):
    if (not pd.isna(event['ExitSpeed']) and not pd.isna(event['Angle']) and event['PitchCall'] =='InPlay'):
        return xba_model.predict([[event['ExitSpeed'], event['Angle']]])[0]
    elif event.get('KorBB', None) == "Strikeout":
        return 0
    elif event.get('KorBB', None) == "Walk":
        return np.nan  # Leave Walk as NA
    elif ((pd.isna(event['ExitSpeed']) or pd.isna(event['Angle'])) and event['PitchCall'] == "InPlay"):
        return 0  # If no batted ball data, treat as 0 for xBA
    else:
        return 0  # Default to 0 for all other cases

def event_predict(event):
    if(not pd.isna(event['ExitSpeed']) and not pd.isna(event['Angle']) and event['PitchCall'] == "InPlay"):
        return xwoba_model.predict([[event['ExitSpeed'], event['Angle']]])[0]
    elif(event['KorBB'] == "Strikeout"):
        return 0
    elif(event['KorBB'] == "Walk"):
        return .695
    elif((pd.isna(event['ExitSpeed']) or pd.isna(event['Angle'])) and event['PitchCall'] == "InPlay"):
        return event['RunValue']

df['Stuff+'] = df.apply(predict_stuff_plus, axis = 1)
df['xBA'] = df.apply(predict_xba, axis = 1)
df['xWOBA'] = df.apply(event_predict, axis = 1)
# Add Good Swing Decision
df['Good Swing Decision'] = (df['In-zone/swing'] | (~df['Swing?'] & ~df['In Strike Zone?'])).astype(int)

# Save the modified dataframe to a new CSV
df.to_csv(data_path, index=False)

print(f'File processed and saved as {data_path} with new columns.')



File processed and saved as Data/cornbelters6-21.csv with new columns.


In [1]:
import glob
import pandas as pd
kcl_files = glob.glob("../CornBeltersData/*.csv")
all_files =  kcl_files

# Read and concatenate all CSVs into one DataFrame
xba_data_list = [pd.read_csv(f) for f in all_files]
df = pd.concat(xba_data_list, ignore_index=True)
df.to_csv('Data/2025.csv', index=False)