In [1]:
import pandas as pd

In [2]:
# Import game data
from game_data import game_data

In [3]:
def process_all_csv(files):
    # Concatenate all CSV files into one DataFrame
    combined_df = pd.concat([pd.read_csv(file) for file in files])

    # Filter the DataFrame
    uno_mav_df = combined_df[(combined_df['PitcherTeam'] == 'UNO_MAV') & (combined_df['TaggedPitchType'] != 'Other')]
    uno_mav_df = uno_mav_df.dropna(subset=['RelSpeed', 'SpinRate'])

    # Calculate means for UNO_MAV pitchers
    grouped_uno_mav = uno_mav_df.groupby(['Pitcher', 'TaggedPitchType']).agg(
        Average_Velocity=('RelSpeed', 'mean'),
        Average_SpinRate=('SpinRate', 'mean')
    ).reset_index()

    # Calculate global averages and round them
    global_averages = combined_df.groupby('TaggedPitchType').agg(
        Global_Avg_Velocity=('RelSpeed', 'mean'),
        Global_Avg_SpinRate=('SpinRate', 'mean')
    ).reset_index().round(1)

    # Rename the columns for the merge
    grouped_uno_mav.rename(columns={'TaggedPitchType': 'Pitch'}, inplace=True)
    global_averages.rename(columns={'TaggedPitchType': 'Pitch', 'Global_Avg_Velocity': 'Avg Velo', 'Global_Avg_SpinRate': 'Avg Spin'}, inplace=True)

    # Ensure renaming was successful by printing column names (Debugging step)
    print(grouped_uno_mav)
    print(global_averages)

    # Merge dataframes for UNO_MAV calculations
    merged_data_uno_mav = grouped_uno_mav.merge(global_averages, on='Pitch', how='left')
    merged_data_uno_mav['Velo Score'] = ((merged_data_uno_mav['Average_Velocity'] - merged_data_uno_mav['Avg Velo']) / merged_data_uno_mav['Avg Velo'] * 100).round(2).astype(str) + '%'
    merged_data_uno_mav['Spin Score'] = ((merged_data_uno_mav['Average_SpinRate'] - merged_data_uno_mav['Avg Spin']) / merged_data_uno_mav['Avg Spin'] * 100).round(2).astype(str) + '%'

    # Finalize DataFrame
    final_df = merged_data_uno_mav.drop(columns=['Avg Velo', 'Avg Spin'])
    final_df.rename(columns={
        'Average_Velocity': 'Average Velo',
        'Average_SpinRate': 'Average Spin'
    }, inplace=True)

    # Round and sort
    final_df['Average Velo'] = final_df['Average Velo'].round(1)
    final_df['Average Spin'] = final_df['Average Spin'].round(1)
    final_df = final_df.sort_values(by='Pitcher')

    # Suppress repeating names
    final_df['Pitcher'] = final_df['Pitcher'].where(final_df['Pitcher'] != final_df['Pitcher'].shift(), '')

    # Export to CSV
    final_df.to_csv('Pitch_Velo_Spin_Analysis.csv', index=False)
    
    return final_df


In [4]:
# Process all CSV files and get the final percentage DataFrame
final_df = process_all_csv(game_data)

final_df.head(50)

          Pitcher             Pitch  Average_Velocity  Average_SpinRate
0   Bell, Charlie          ChangeUp         78.056819       1602.995936
1   Bell, Charlie          Fastball         85.437268       2109.107647
2   Bell, Charlie  FourSeamFastBall         85.538749       2112.696791
3   Bell, Charlie            Slider         78.451914       1965.649890
4    Byhre, Chris          ChangeUp         81.053986       1663.578546
..            ...               ...               ...               ...
60    Scott, Reed         Curveball         80.186970       2101.848282
61    Scott, Reed          Fastball         88.215839       2114.167657
62    Scott, Reed  FourSeamFastBall         87.834765       2125.345838
63    Scott, Reed            Slider         80.027738       2144.408771
64    Scott, Reed   TwoSeamFastBall         87.270425       1998.246673

[65 rows x 4 columns]
               Pitch  Avg Velo  Avg Spin
0           ChangeUp      79.8    1677.3
1          Curveball      74.0 

  combined_df = pd.concat([pd.read_csv(file) for file in files])


Unnamed: 0,Pitcher,Pitch,Average Velo,Average Spin,Velo Score,Spin Score
0,"Bell, Charlie",ChangeUp,78.1,1603.0,-2.18%,-4.43%
1,,Fastball,85.4,2109.1,-3.24%,-1.17%
2,,FourSeamFastBall,85.5,2112.7,-2.24%,-0.99%
3,,Slider,78.5,1965.6,-0.82%,-15.9%
4,"Byhre, Chris",ChangeUp,81.1,1663.6,1.57%,-0.82%
5,,Fastball,89.1,2036.5,0.92%,-4.57%
6,,FourSeamFastBall,88.8,2146.5,1.53%,0.59%
7,,Slider,74.1,2271.9,-6.37%,-2.8%
8,,TwoSeamFastBall,88.8,2009.2,0.75%,-2.29%
9,"Curtis, Brayden",ChangeUp,79.6,1804.0,-0.23%,7.55%
