In [1]:
import pandas as pd

In [2]:
# Import game data
from game_data import game_data

In [3]:
def process_all_csv(files):
    # Concatenate all CSV files into one DataFrame
    combined_df = pd.concat([pd.read_csv(file) for file in files])

    # Filter to include only pitchers from 'UNO_MAV'
    uno_mav_df = combined_df[combined_df['PitcherTeam'] == 'UNO_MAV'].copy()

    # Mark the last pitch of each at-bat by checking for changes in the 'Batter' column
    uno_mav_df.loc[:, 'is_last_pitch_of_ab'] = uno_mav_df['Batter'] != uno_mav_df['Batter'].shift(-1)
    
    # Get the last pitches of each at-bat
    last_pitches_of_abs = uno_mav_df[uno_mav_df['is_last_pitch_of_ab']]
    
    # Calculate total at-bats for each pitcher
    total_at_bats_by_pitcher = last_pitches_of_abs.groupby('Pitcher').size()
    
    # Count at-bats that ended in 3 pitches or less for each pitcher
    at_bats_3_pitches_or_less = last_pitches_of_abs[last_pitches_of_abs['PitchofPA'] <= 3]
    at_bats_3_pitches_or_less_by_pitcher = at_bats_3_pitches_or_less.groupby('Pitcher').size()
    
    # Calculate the percentage of at-bats completed in 3 pitches or less for each pitcher
    percentage_3_pitches_or_less = (at_bats_3_pitches_or_less_by_pitcher / total_at_bats_by_pitcher * 100).rename(">3P%")
    
    # Filter rows where PitchofPA is "1" for the 1PK% calculation
    first_pitches_df = uno_mav_df[uno_mav_df['PitchofPA'] == 1]
    strike_first_pitches = first_pitches_df[first_pitches_df['PitchCall'].isin(['StrikeCalled', 'StrikeSwinging', 'InPlay', 'FoulBallNotFieldable', 'FoulBall'])]
    strike_first_pitches_by_pitcher = strike_first_pitches.groupby('Pitcher').size()
    
    # Calculate 1PK%
    percentage_1pk = (strike_first_pitches_by_pitcher / total_at_bats_by_pitcher * 100).rename("1PK%")

    # Combine the data into one DataFrame and reset the index to make 'Pitcher' a column
    combined_percentage_df = pd.DataFrame({
        'Pitcher': total_at_bats_by_pitcher.index,
        'Batters': total_at_bats_by_pitcher.values,
        '1PK%': percentage_1pk.values,
        '>3P%': percentage_3_pitches_or_less.values
    }).sort_values(by='Pitcher', ascending=True)

    # Clean up the DataFrame (rounding, NaN handling)
    combined_percentage_df = combined_percentage_df.fillna(0).round(2)

    # Export the DataFrame to a CSV file
    combined_percentage_df.to_csv('Pitcher_Count_Analysis.csv', index=False)
    
    return combined_percentage_df

In [4]:
# Process all CSV files and get the final percentage DataFrame
final_df = process_all_csv(game_data)
final_df


  combined_df = pd.concat([pd.read_csv(file) for file in files])


Unnamed: 0,Pitcher,Batters,1PK%,>3P%
0,"Bell, Charlie",121,62.81,44.63
1,"Byhre, Chris",15,66.67,20.0
2,"Curtis, Brayden",27,62.96,33.33
3,"Dreher, Matt",20,35.0,25.0
4,"Gainer, Luke",62,61.29,46.77
5,"Hackmann, Joe",69,62.32,39.13
6,"Ingram, Gage",13,69.23,61.54
7,"Kreiling, Harrison",91,63.74,42.86
8,"Louthan, Ethan",61,62.3,40.98
9,"Navin, Carter",12,50.0,41.67
