In [9]:
import pandas as pd

In [10]:
def process_all_csv(files):
    # Concatenate all CSV files into one DataFrame
    combined_df = pd.concat([pd.read_csv(file) for file in files])

    # Filter to include only pitchers from 'UNO_MAV'
    uno_mav_df = combined_df[combined_df['PitcherTeam'] == 'UNO_MAV'].copy()

    # Mark the last pitch of each at-bat by checking for changes in the 'Batter' column
    uno_mav_df.loc[:, 'is_last_pitch_of_ab'] = uno_mav_df['Batter'] != uno_mav_df['Batter'].shift(-1)
    
    # Get the last pitches of each at-bat
    last_pitches_of_abs = uno_mav_df[uno_mav_df['is_last_pitch_of_ab']]
    
    # Calculate total at-bats for each pitcher
    total_at_bats_by_pitcher = last_pitches_of_abs.groupby('Pitcher').size()
    
    # Count at-bats that ended in 3 pitches or less for each pitcher
    at_bats_3_pitches_or_less = last_pitches_of_abs[last_pitches_of_abs['PitchofPA'] <= 3]
    at_bats_3_pitches_or_less_by_pitcher = at_bats_3_pitches_or_less.groupby('Pitcher').size()
    
    # Calculate the percentage of at-bats completed in 3 pitches or less for each pitcher
    percentage_3_pitches_or_less = (at_bats_3_pitches_or_less_by_pitcher / total_at_bats_by_pitcher * 100).rename(">3P%")
    
    # Filter rows where PitchofPA is "1" for the 1PK% calculation
    first_pitches_df = uno_mav_df[uno_mav_df['PitchofPA'] == 1]
    strike_first_pitches = first_pitches_df[first_pitches_df['PitchCall'].isin(['StrikeCalled', 'StrikeSwinging', 'InPlay', 'FoulBallNotFieldable', 'FoulBall'])]
    strike_first_pitches_by_pitcher = strike_first_pitches.groupby('Pitcher').size()
    
    # Calculate 1PK%
    percentage_1pk = (strike_first_pitches_by_pitcher / total_at_bats_by_pitcher * 100).rename("1PK%")

    # Combine the data into one DataFrame
    combined_percentage_df = pd.DataFrame({
        'Batters': total_at_bats_by_pitcher,
        '1PK%': percentage_1pk,
        '>3P%': percentage_3_pitches_or_less
    }).sort_values(by='Batters', ascending=False)

    # Clean up the DataFrame (rounding, NaN handling)
    combined_percentage_df = combined_percentage_df.fillna(0).round(2)
    
    return combined_percentage_df

In [11]:
# List of CSV files
game_data = [
    '20240306-TalAndersonField-1_unverified.csv',
    '20240315-TalAndersonField-1_unverified.csv',
    '20240315-TalAndersonField-Private-4_unverified.csv',
    '20240316-TalAndersonField-1_unverified.csv',
    '20240316-TalAndersonField-2_unverified.csv',
    '20240328-TalAndersonField-1_unverified.csv',
    '20240329-TalAndersonField-1_unverified.csv',
    '20240330-TalAndersonField-1_unverified.csv', 
    '20240409-TalAndersonField-Private-1_unverified.csv',
    '20240409-TalAndersonField-1_unverified.csv',
    '20240410-TalAndersonField-Private-1_unverified.csv',
    '20240410-TalAndersonField-Private-2_unverified.csv',
    '20240412-TalAndersonField-1_unverified.csv',
    '20240413-TalAndersonField-1_unverified.csv',
    '20240414-TalAndersonField-1_unverified.csv',
    '20240417-TalAndersonField-Private-1_unverified.csv'
]


In [12]:
# Process all CSV files and get the final percentage DataFrame
final_df = process_all_csv(game_data)
final_df


Unnamed: 0_level_0,Batters,1PK%,>3P%
Pitcher,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
"Riedel, Caleb",50,60.0,44.0
"Bell, Charlie",49,61.22,42.86
"Louthan, Ethan",36,63.89,36.11
"Hackmann, Joe",26,65.38,46.15
"Curtis, Brayden",22,72.73,36.36
"Gainer, Luke",19,63.16,15.79
"Scanlon, Nick",16,43.75,37.5
"Byhre, Chris",15,66.67,20.0
"Kreiling, Harrison",15,73.33,53.33
"Ingram, Gage",9,77.78,55.56
