In [1]:
import pandas as pd
import numpy as np

# Read the Yakkertech CSV file
df = pd.read_csv("Data/sloths22.csv")

# Rename HitType to TaggedHitType
df = df.rename(columns={'HitType': 'TaggedHitType'})

# Define pitch type mappings for TaggedPitchType2 and GeneralPitchType
pitch_type_map = {
    'FourSeamFastball': ('Fastball', 'Fastball'),
    'Sinker': ('Sinker', 'Fastball'),
    'Cutter': ('Cutter', 'Fastball'),
    'Slider': ('Slider', 'Breaking'),
    'Curveball': ('Curveball', 'Breaking'),
    'Changeup': ('Changeup', 'Offspeed'),
    'Splitter': ('Splitter', 'Offspeed'),
    'Knuckleball': ('Knuckleball', 'Offspeed')
}
#Add Arm Angle
df['VerticalArmAngle'] = np.arctan2(df['RelHeight'], df['Extension']) * (180 / np.pi)


# Add TaggedPitchType2 and GeneralPitchType
df['TaggedPitchType2'] = df['TaggedPitchType'].map(lambda x: pitch_type_map.get(x, ('Other', 'Other'))[0])
df['GeneralPitchType'] = df['TaggedPitchType'].map(lambda x: pitch_type_map.get(x, ('Other', 'Other'))[1])

# Define standard strike zone boundaries
vert_strike_min, vert_strike_max = 1.5, 3.5  # Standard vertical strike zone
horz_strike_min, horz_strike_max = -1.66, 0  # Standard horizontal strike zone

# Add strike zone columns
df['Vert Strike'] = (df['PlateLocHeight'].between(vert_strike_min, vert_strike_max)).astype(int)
df['Horz Strike'] = (df['PlateLocSide'].between(horz_strike_min, horz_strike_max)).astype(int)

# Add pitch outcome columns
df['Strike?'] = (df['PitchCall'] == 'StrikeCalled').astype(int)
df['Foul?'] = (df['PitchCall'] == 'FoulBall').astype(int)
df['In Play?'] = (df['PitchCall'] == 'InPlay').astype(int)
df['Swing Strike?'] = (df['PitchCall'] == 'StrikeSwinging').astype(int)
df['Swing?'] = (df['PitchCall'].isin(['StrikeSwinging', 'FoulBall', 'InPlay'])).astype(int)
df['Ball Called?'] = (df['PitchCall'] == 'BallCalled').astype(int)
df['First Pitch'] = (df['PitchofPA'] == 1).astype(int)

# Add strike zone logic columns
df['In Strike Zone?'] = (df['Vert Strike'] & df['Horz Strike']).astype(int)
df['Chase?'] = (df['Swing?'] & ~df['In Strike Zone?']).astype(int)
df['In-zone take'] = ((df['PitchCall'].isin(['StrikeCalled', 'BallCalled'])) & df['In Strike Zone?']).astype(int)
df['In-zone whiff'] = ((df['PitchCall'] == 'StrikeSwinging') & df['In Strike Zone?']).astype(int)
df['In-zone/swing'] = (df['Swing?'] & df['In Strike Zone?']).astype(int)

# Add batted ball columns
df['LA<10'] = ((df['Angle'] < 10) & (df['PitchCall'] == 'InPlay')).astype(int)
df['Ground Ball?'] = ((df['TaggedHitType'] == 'GroundBall') | ((df['Angle'] < 10) & (df['PitchCall'] == 'InPlay'))).astype(int)
df['Fly Ball?'] = ((df['TaggedHitType'] == 'FlyBall') | ((df['Angle'] > 25) & (df['PitchCall'] == 'InPlay'))).astype(int)
df['EV>90'] = ((df['ExitSpeed'] > 90) & (df['PitchCall'] == 'InPlay')).astype(int)
df['EV>100'] = ((df['ExitSpeed'] > 100) & (df['PitchCall'] == 'InPlay')).astype(int)
df['EV>105'] = ((df['ExitSpeed'] > 105) & (df['PitchCall'] == 'InPlay')).astype(int)

# Add Good Swing Decision
df['Good Swing Decision'] = (df['In-zone/swing'] | (~df['Swing?'] & ~df['In Strike Zone?'])).astype(int)

# Save the modified dataframe to a new CSV
df.to_csv('Data/sloths22.csv', index=False)

print("File processed and saved as 'modified_yakkertech_file.csv' with new columns.")

File processed and saved as 'modified_yakkertech_file.csv' with new columns.


In [None]:
import pandas as pd

# List of Excel file paths
files = ['05_27_2025 7_30_10 PM-KCL GroundSloths 2025@KCL Merchants 2025.csv', '05_27_2025 6_05_46 PM-KCL BobCats 2025@KCL BlueCaps 2025.csv', '05_27_2025 6_05_46 PM-KCL BobCats 2025@KCL BlueCaps 2025.csv']

# Read and concatenate all files
merged_df = pd.concat([pd.read_csv(file) for file in files], ignore_index=True)

# Save to a new Excel file
merged_df.to_csv('merged_output.csv', index=False)

In [3]:
df = pd.read_csv('modified_yakkertech_file.csv')

In [6]:
df[df['TaggedPitchType'] =='Changeup']

Unnamed: 0,PitchNo,Date,Time,PAofInning,PitchofPA,Pitcher,PitcherId,PitcherThrows,PitcherTeam,Batter,...,In-zone take,In-zone whiff,In-zone/swing,LA<10,Ground Ball?,Fly Ball?,EV>90,EV>100,EV>105,Good Swing Decision
189,190,05/27/2025,9:06:48 PM,2,1,Caleb Ochs,,Right,Kcl merchants 2025,Brandon Schultz,...,0,0,0,0,0,0,0,0,0,-1
224,225,05/27/2025,9:21:38 PM,5,4,Caleb Ochs,,Right,Kcl merchants 2025,Ty Cribbett,...,0,0,0,0,0,0,0,0,0,-2
232,3,05/27/2025,6:08:16 PM,1,3,Payton Knoll,,Left,Kcl bluecaps 2025,Colin Karr,...,0,0,0,0,0,0,0,0,0,-2
235,6,05/27/2025,6:08:55 PM,2,3,Payton Knoll,,Left,Kcl bluecaps 2025,Brady Veselack,...,0,0,0,0,0,0,0,0,0,-2
266,37,05/27/2025,6:23:02 PM,1,4,Ethan Robbins,,Left,Kcl bluecaps 2025,Kaileb Hackman,...,0,0,0,0,0,0,0,0,0,-1
334,3,05/27/2025,6:08:16 PM,1,3,Payton Knoll,,Left,Kcl bluecaps 2025,Colin Karr,...,0,0,0,0,0,0,0,0,0,-2
337,6,05/27/2025,6:08:55 PM,2,3,Payton Knoll,,Left,Kcl bluecaps 2025,Brady Veselack,...,0,0,0,0,0,0,0,0,0,-2
368,37,05/27/2025,6:23:02 PM,1,4,Ethan Robbins,,Left,Kcl bluecaps 2025,Kaileb Hackman,...,0,0,0,0,0,0,0,0,0,-1
