In [5]:
import pandas as pd

In [6]:
def process_all_csv(files):
    # Concatenate all CSV files into one DataFrame
    combined_df = pd.concat([pd.read_csv(file) for file in files])

    # Filter to include only pitchers from 'UNO_MAV'
    uno_mav_df = combined_df[combined_df['PitcherTeam'] == 'UNO_MAV']

    # Group by 'Pitcher' and 'TaggedPitchType' and aggregate 'PitchCall'
    grouped_df = uno_mav_df.groupby(['Pitcher', 'TaggedPitchType'])['PitchCall'].apply(list).reset_index(name='PitchCall')

    # Calculate total pitches
    total_pitches = uno_mav_df.groupby(['Pitcher', 'TaggedPitchType']).size().reset_index(name='Times Thrown')

    # Merge the total pitches data with the grouped DataFrame
    final_df = pd.merge(grouped_df, total_pitches, on=['Pitcher', 'TaggedPitchType'], how='left')

    # Define a function to calculate the strike percentage
    def calculate_strike_percentage(pitch_calls):
        counts = pd.Series(pitch_calls).value_counts()
        total_pitches = len(pitch_calls)
        strike_types = ['InPlay', 'FoulBall', 'FoulBallNotFieldable', 'StrikeCalled', 'StrikeSwinging']
        strike_count = sum(counts.get(pitch_type, 0) for pitch_type in strike_types)
        return (strike_count / total_pitches) * 100 if total_pitches else 0

    # Apply the function to get the 'Strike%' column
    final_df['Strike%'] = final_df['PitchCall'].apply(calculate_strike_percentage)

    # Calculate 'Whiff Rate'
    final_df['Whiff Rate'] = final_df['PitchCall'].apply(lambda x: 
                             (x.count('StrikeSwinging') / 
                             (x.count('InPlay') + x.count('FoulBall') + x.count('StrikeSwinging')) * 100 
                              if (x.count('InPlay') + x.count('FoulBall') + x.count('StrikeSwinging')) > 0 else 0))

    # Handle NaN and round percentages
    final_df['Strike%'] = final_df['Strike%'].fillna(0).round(0).astype(int).astype(str) + '%'
    final_df['Whiff Rate'] = final_df['Whiff Rate'].fillna(0).round(0).astype(int).astype(str) + '%'
    
    # Rename the columns as needed
    final_df = final_df.rename(columns={
        'TaggedPitchType': 'Pitch Type'
    })

    # Select only the required columns for the final output
    final_df = final_df[['Pitcher', 'Pitch Type', 'Times Thrown', 'Strike%', 'Whiff Rate']]

    # Export the DataFrame to a CSV file
    final_df.to_csv('Pitch_Control_Analysis.csv', index=False)

    return final_df
   

In [7]:
# List of CSV files
game_data = [
    '20240306-TalAndersonField-1_unverified.csv',
    '20240315-TalAndersonField-1_unverified.csv',
    '20240315-TalAndersonField-Private-4_unverified.csv',
    '20240316-TalAndersonField-1_unverified.csv',
    '20240316-TalAndersonField-2_unverified.csv',
    '20240328-TalAndersonField-1_unverified.csv',
    '20240329-TalAndersonField-1_unverified.csv',
    '20240330-TalAndersonField-1_unverified.csv', 
    '20240409-TalAndersonField-Private-1_unverified.csv',
    '20240409-TalAndersonField-1_unverified.csv',
    '20240410-TalAndersonField-Private-1_unverified.csv',
    '20240410-TalAndersonField-Private-2_unverified.csv',
    '20240412-TalAndersonField-1_unverified.csv',
    '20240413-TalAndersonField-1_unverified.csv',
    '20240414-TalAndersonField-1_unverified.csv',
    '20240417-TalAndersonField-Private-1_unverified.csv'
]


In [8]:
# Process all CSV files and get the final percentage DataFrame
final_df = process_all_csv(game_data)

final_df.head(50)

Unnamed: 0,Pitcher,Pitch Type,Times Thrown,Strike%,Whiff Rate
0,"Bell, Charlie",ChangeUp,45,76%,62%
1,"Bell, Charlie",Fastball,70,57%,41%
2,"Bell, Charlie",FourSeamFastBall,35,57%,57%
3,"Bell, Charlie",Slider,34,76%,30%
4,"Byhre, Chris",ChangeUp,18,33%,75%
5,"Byhre, Chris",Fastball,37,57%,29%
6,"Byhre, Chris",FourSeamFastBall,14,43%,50%
7,"Byhre, Chris",Slider,1,100%,0%
8,"Byhre, Chris",TwoSeamFastBall,1,0%,0%
9,"Curtis, Brayden",ChangeUp,9,67%,0%
