In [None]:
import pandas as pd

# Load the data
data = pd.read_csv('mlb_data.csv')

# Drop rows with missing values in 'plate_x' or 'plate_z'
data = data.dropna(subset=['plate_x', 'plate_z'])

# Define the strike zone
def is_in_strike_zone(row):
    if (-0.83 <= row['plate_x'] <= 0.83) and (1.5 <= row['plate_z'] <= 3.6):
        return True
    else:
        return False

# Apply the function to the data
data['InStrikeZone'] = data.apply(is_in_strike_zone, axis=1)

# Filter data to only include called balls or strikes
called_pitches = data[data['description'].isin(['ball', 'called_strike'])]

# Calculate the total accuracy, called ball accuracy, and called strike accuracy
total_called_pitches = len(called_pitches)

# Correct calls
correct_balls = len(called_pitches[(~called_pitches['InStrikeZone']) & (called_pitches['description'] == 'ball')])
correct_strikes = len(called_pitches[(called_pitches['InStrikeZone']) & (called_pitches['description'] == 'called_strike')])
total_correct_calls = correct_balls + correct_strikes

# Accuracy calculations
total_accuracy = total_correct_calls / total_called_pitches

# Total balls and strikes
total_balls_called = len(called_pitches[called_pitches['description'] == 'ball'])
total_strikes_called = len(called_pitches[called_pitches['description'] == 'called_strike'])

# Accuracy of called balls and strikes
ball_accuracy = correct_balls / total_balls_called
strike_accuracy = correct_strikes / total_strikes_called

print(f'Total Accuracy: {total_accuracy*100:.2f}%')
print(f'Called Ball Accuracy: {ball_accuracy*100:.2f}%')
print(f'Called Strike Accuracy: {strike_accuracy*100:.2f}%')