In [None]:
import pandas as pd
import numpy as np
import pickle
# Read the Yakkertech CSV file
data_path = 'Data/report2.csv'
df = pd.read_csv(data_path)

with open("../Stuff+/stuff_plus_model.pkl", "rb") as f:
    stuff_plus_model = pickle.load(f)
# Rename HitType to TaggedHitType
df = df.rename(columns={'HitType': 'TaggedHitType'})

# Define pitch type mappings for TaggedPitchType2 and GeneralPitchType
pitch_type_map = {
    'FourSeamFastball': ('Fastball', 'Fastball'),
    'Sinker': ('Sinker', 'Fastball'),
    'Cutter': ('Cutter', 'Fastball'),
    'Slider': ('Slider', 'Breaking'),
    'Curveball': ('Curveball', 'Breaking'),
    'Changeup': ('Changeup', 'Offspeed'),
    'Splitter': ('Splitter', 'Offspeed'),
    'Knuckleball': ('Knuckleball', 'Offspeed')
}

# Add TaggedPitchType2 and GeneralPitchType
df['TaggedPitchType2'] = df['TaggedPitchType'].map(lambda x: pitch_type_map.get(x, ('Other', 'Other'))[0])
df['GeneralPitchType'] = df['TaggedPitchType'].map(lambda x: pitch_type_map.get(x, ('Other', 'Other'))[1])

# Define standard strike zone boundaries
vert_strike_min, vert_strike_max = 1.5, 3.5  # Standard vertical strike zone
horz_strike_min, horz_strike_max = -1, 1  # Standard horizontal strike zone
df['PlateLocSide'] = df['PlateLocSide']*-1
# Add strike zone columns
df['Vert Strike'] = (df['PlateLocHeight'].between(vert_strike_min, vert_strike_max)).astype(int)
df['Horz Strike'] = (df['PlateLocSide'].between(horz_strike_min, horz_strike_max)).astype(int) 
# Add pitch outcome columns
df['Strike?'] = (df['PitchCall'] == 'StrikeCalled').astype(int)
df['Foul?'] = (df['PitchCall'] == 'FoulBall').astype(int)
df['In Play?'] = (df['PitchCall'] == 'InPlay').astype(int)
df['Swing Strike?'] = (df['PitchCall'] == 'StrikeSwinging').astype(int)
df['Swing?'] = (df['PitchCall'].isin(['StrikeSwinging', 'FoulBall', 'InPlay'])).astype(int)
df['Ball Called?'] = (df['PitchCall'] == 'BallCalled').astype(int)
df['First Pitch'] = (df['PitchofPA'] == 1).astype(int)

# Add strike zone logic columns
df['In Strike Zone?'] = (df['Vert Strike'] & df['Horz Strike']).astype(int)
df['Chase?'] = (df['Swing?'] & ~df['In Strike Zone?']).astype(int)
df['In-zone take'] = ((df['PitchCall'].isin(['StrikeCalled', 'BallCalled'])) & df['In Strike Zone?']).astype(int)
df['In-zone whiff'] = ((df['PitchCall'] == 'StrikeSwinging') & df['In Strike Zone?']).astype(int)
df['In-zone/swing'] = (df['Swing?'] & df['In Strike Zone?']).astype(int)

# Add batted ball columns
df['LA<10'] = ((df['Angle'] < 10) & (df['PitchCall'] == 'InPlay')).astype(int)
df['Ground Ball?'] = ((df['TaggedHitType'] == 'GroundBall') | ((df['Angle'] < 10) & (df['PitchCall'] == 'InPlay'))).astype(int)
df['Fly Ball?'] = ((df['TaggedHitType'] == 'FlyBall') | ((df['Angle'] > 25) & (df['PitchCall'] == 'InPlay'))).astype(int)
df['EV>90'] = ((df['ExitSpeed'] > 90) & (df['PitchCall'] == 'InPlay')).astype(int)
df['EV>100'] = ((df['ExitSpeed'] > 100) & (df['PitchCall'] == 'InPlay')).astype(int)
df['EV>105'] = ((df['ExitSpeed'] > 105) & (df['PitchCall'] == 'InPlay')).astype(int)

def predict_stuff_plus(event):
    # Make sure all required features are present
    features = ['RelSpeed', 'SpinRate', 'RelHeight', 'RelSide', 'Extension', 'InducedVertBreak', 'HorzBreak']
    if all(f in event and not pd.isna(event[f]) for f in features):
        # Predict using the trained model
        pred = stuff_plus_model.predict([[event[f] for f in features]])[0]
        # Scale so mean is 100 (use your training mean or pass as argument if needed)
        mean_playresult = 0.5843009753831863  # or use a fixed value if you want
        return pred * (100 / mean_playresult)
    else:
        return np.nan  # Not enough data to predict
df['Stuff+'] = df.apply(predict_stuff_plus, axis = 1)
# Add Good Swing Decision
df['Good Swing Decision'] = (df['In-zone/swing'] | (~df['Swing?'] & ~df['In Strike Zone?'])).astype(int)

# Save the modified dataframe to a new CSV
df.to_csv(data_path, index=False)

print(f'File processed and saved as {data_path} with new columns.')

FileNotFoundError: [Errno 2] No such file or directory: 'Data/week2.csv'

In [26]:
import pandas as pd

# Load your data
df = pd.read_csv("Data/clean.csv")

# Drop the existing PitchNo column
df = df.drop('PitchNo', axis=1)
df['PitchNo'] = range(1, len(df) + 1)
# Move 'new_col' to the first column
cols = ['PitchNo'] + [col for col in df.columns if col != 'PitchNo']
df = df[cols]
# Recreate PitchNo as a sequential column starting from 1


# Save the cleaned DataFrame
df.to_csv("Data/clean1.csv", index=False)