In [None]:
# add_xSLG.py: Add xSLG column to BeesBelters5-28.csv for all hit types and list all batters by average xSLG

# Import libraries
import pandas as pd
import numpy as np
import pickle
import os

# File paths
data_path = '../Data/2025.csv'
output_path = '../Data/2025wSLG.csv'
summary_path = '../Data/batter_SLG.csv'

# Load models and scalers for each hit type
hit_types = ['GroundBall', 'Popup', 'LineDrive', 'FlyBall']
models = {}
scalers = {}
for hit_type in hit_types:
    model_path = f'xSLG/xgb_model_{hit_type}.pkl'
    scaler_path = f'xSLG/scaler_{hit_type}.pkl'
    try:
        with open(model_path, 'rb') as f:
            models[hit_type] = pickle.load(f)
        with open(scaler_path, 'rb') as f:
            scalers[hit_type] = pickle.load(f)
    except FileNotFoundError as e:
        print(f"Error: {e}. Ensure xSLG.py has been run to generate {model_path} and {scaler_path}.")
        exit(1)

# Load dataset
try:
    df = pd.read_csv(data_path, low_memory=False)
except FileNotFoundError:
    print(f"Error: {data_path} not found.")
    exit(1)

# Inspect dataset
print("Dataset columns:", df.columns.tolist())
print("Dataset shape:", df.shape)
print("Unique TaggedHitType values:", df['TaggedHitType'].unique())
print("Unique PlayResult values:", df['PlayResult'].unique())

# Ensure features are numeric
features = ['ExitSpeed', 'Angle']
for col in features:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Check for missing features
missing_cols = [col for col in features if col not in df.columns]
if missing_cols:
    print(f"Error: Missing columns {missing_cols} in dataset.")
    exit(1)

# Function to predict xSLG for a row
def predict_xSLG(row, models, scalers, features):
    hit_type = row['TaggedHitType']
    if hit_type not in models or pd.isna(row[features]).any():
        return np.nan
    input_data = row[features].values.reshape(1, -1)
    input_scaled = scalers[hit_type].transform(input_data)
    return models[hit_type].predict(input_scaled)[0]


# Filter batted ball events
batted_balls = df['TaggedHitType'].isin(hit_types)

# Predict xSLG for batted balls
print("Predicting xSLG for batted balls...")
df['xSLG'] = df[batted_balls].apply(lambda row: predict_xSLG(row, models, scalers, features), axis=1)

# Move xSLG to the end
cols = [col for col in df.columns if col != 'xSLG'] + ['xSLG']
df = df[cols]

# Check xSLG results
print("xSLG column added. Summary:")
print(df['xSLG'].describe())
print("Missing xSLG values:", df['xSLG'].isna().sum())

# Save updated dataset
df.to_csv(output_path, index=False)
print(f"Updated dataset saved to {output_path}")

# Compute average xSLG by Batter for batted balls
xSLG_summary = df[df['xSLG'].notna()].groupby('Batter')['xSLG'].mean().reset_index()
xSLG_summary = xSLG_summary.sort_values('xSLG', ascending=False)
xSLG_summary.columns = ['Batter', 'Average_xSLG']

# Filter batters with minimum plate appearances
min_pa = 10
pa_counts = df[df['xSLG'].notna()]['Batter'].value_counts()
xSLG_summary = xSLG_summary[xSLG_summary['Batter'].isin(pa_counts[pa_counts >= min_pa].index)]

# Save xSLG summary
xSLG_summary.to_csv(summary_path, index=False)
print(f"Batter xSLG summary saved to {summary_path}")

# Display all batters
print("\nAll Batters by Average xSLG:")
print(xSLG_summary)

Dataset columns: ['PitchNo', 'Date', 'Time', 'PAofInning', 'PitchofPA', 'Pitcher', 'PitcherId', 'PitcherThrows', 'PitcherTeam', 'Batter', 'BatterId', 'BatterSide', 'BatterTeam', 'PitcherSet', 'Inning', 'Top/Bottom', 'Outs', 'Balls', 'Strikes', 'TaggedPitchType', 'AutoPitchType', 'PitchCall', 'KorBB', 'TaggedHitType', 'PlayResult', 'OutsOnPlay', 'RunsScored', 'Notes', 'RelSpeed', 'VertRelAngle', 'HorzRelAngle', 'SpinRate', 'SpinAxis', 'Tilt', 'RelHeight', 'RelSide', 'Extension', 'VertBreak', 'InducedVertBreak', 'HorzBreak', 'PlateLocHeight', 'PlateLocSide', 'ZoneSpeed', 'VertApprAngle', 'HorzApprAngle', 'ZoneTime', 'ExitSpeed', 'Angle', 'Direction', 'HitSpinRate', 'PositionAt110X', 'PositionAt110Y', 'PositionAt110Z', 'Distance', 'LastTrackedDistance', 'Bearing', 'HangTime', 'TaggedPitchType2', 'GeneralPitchType', 'Vert 1 Strike?', 'Vert 2 Strike?', 'Vert Strike', 'Horz 1 Strike?', 'Horz 2 Strike?', 'Horz Strike', 'Strike?', 'Foul?', 'in Play?', 'Swing Strike?', 'Swing?', 'Chase?', 'in-z



xSLG column added. Summary:
count    1748.000000
mean        0.583185
std         0.919476
min        -0.777304
25%         0.001724
50%         0.063133
75%         0.982894
max         5.547213
Name: xSLG, dtype: float64
Missing xSLG values: 9122


OSError: Cannot save file into a non-existent directory: 'Data'