# Python Library Imports

In [66]:
import numpy as np
import plotly.express as px
import pandas as pd
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go

# Local Imports

In [22]:
from utils.baseball_data_loader import (
    PITCH_TYPES,
    CODE_TYPES,
    load_baseball_df,
    get_pitch_locations,
    scatter_trace,
    generate_sampled_pitch_view)

print("\nTypes Of Pitches Recorded...\n")
print(*PITCH_TYPES, sep="\n")

print("\nTypes Of At Bat Results Recorded...\n")
print(*CODE_TYPES, sep="\n")


Types Of Pitches Recorded...

Four Seam Fastball
Slider
Two Seam Fastball
Changeup
Sinker
Curveball
Cutter
Knuckle Curve
Splitter
Knuckleball
Eephus
Pitch Out
Pitch Out
Screwball
Unidentified
Fastball
Intentional Ball

Types Of At Bat Results Recorded...

Ball
Ball In Dirt
Swinging Strike
Called Strike
Foul
Foul Tip
Foul Bunt
Intentional Ball
Blocked
Missed Bunt
Pitch Out
Swinging Pitch Out
Foul Pitch Out
In Play Out(s)
In Play No Out
In Play Runs


# Load and Describe The Dataset

In [23]:
df = load_baseball_df("./data")

In [24]:
df.head()

Unnamed: 0,px,pz,start_speed,end_speed,spin_rate,spin_dir,break_angle,break_length,break_y,ax,...,umpire_1B,umpire_2B,umpire_3B,umpire_HP,venue_name,weather,wind,delay,batters_name,pitcher_name
0,0.416,2.963,92.9,84.1,2305.052,159.235,-25.0,3.2,23.7,7.665,...,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0,Matt Carpenter,Jon Lester
1,-0.191,2.347,92.8,84.1,2689.935,151.402,-40.7,3.4,23.7,12.043,...,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0,Matt Carpenter,Jon Lester
2,-0.518,3.284,94.1,85.2,2647.972,145.125,-43.7,3.7,23.7,14.368,...,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0,Matt Carpenter,Jon Lester
3,-0.641,1.221,91.0,84.0,1289.59,169.751,-1.3,5.0,23.8,2.104,...,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0,Matt Carpenter,Jon Lester
4,-1.821,2.083,75.4,69.6,1374.569,280.671,18.4,12.0,23.8,-10.28,...,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0,Matt Carpenter,Jon Lester


## Constants For Plotting

In [25]:
NAME = "Freddie Freeman"

In [27]:
batter_df = df[df['batters_name'] == NAME]

In [29]:
batter_df.head()

Unnamed: 0,px,pz,start_speed,end_speed,spin_rate,spin_dir,break_angle,break_length,break_y,ax,...,umpire_1B,umpire_2B,umpire_3B,umpire_HP,venue_name,weather,wind,delay,batters_name,pitcher_name
574,-1.185,3.632,91.5,84.2,1883.882,244.463,29.9,6.5,23.8,-15.713,...,Laz Diaz,Chris Guccione,Cory Blaser,Jeff Nelson,Marlins Park,"80 degrees, partly cloudy","16 mph, In from CF",16,Freddie Freeman,Henderson Alvarez
575,-0.399,3.182,81.1,73.4,1788.102,254.031,23.5,9.0,23.7,-14.062,...,Laz Diaz,Chris Guccione,Cory Blaser,Jeff Nelson,Marlins Park,"80 degrees, partly cloudy","16 mph, In from CF",16,Freddie Freeman,Henderson Alvarez
576,-0.93,2.859,91.1,83.8,1807.484,245.16,27.9,6.6,23.8,-15.092,...,Laz Diaz,Chris Guccione,Cory Blaser,Jeff Nelson,Marlins Park,"80 degrees, partly cloudy","16 mph, In from CF",16,Freddie Freeman,Henderson Alvarez
639,0.396,0.939,85.8,79.6,1003.269,286.212,10.8,9.3,23.8,-8.327,...,Laz Diaz,Chris Guccione,Cory Blaser,Jeff Nelson,Marlins Park,"80 degrees, partly cloudy","16 mph, In from CF",16,Freddie Freeman,Henderson Alvarez
640,-1.413,2.64,92.5,84.4,1817.218,213.458,23.5,4.7,23.7,-9.352,...,Laz Diaz,Chris Guccione,Cory Blaser,Jeff Nelson,Marlins Park,"80 degrees, partly cloudy","16 mph, In from CF",16,Freddie Freeman,Henderson Alvarez


In [33]:
print("Total At Bats By {}: {}".format(NAME, len(batter_df)))

Total At Bats By Freddie Freeman: 9340


In [36]:
print("{} At Plate Distribution.".format(NAME))
batter_df['code'].value_counts() / len(batter_df)

Freddie Freeman At Plate Distribution.


Ball                0.348501
Foul                0.212741
Swinging Strike     0.118522
In Play Out(s)      0.098501
Called Strike       0.085653
In Play No Out      0.042612
In Play Runs        0.027837
Ball In Dirt        0.022805
Foul Tip            0.010064
Intentional Ball    0.008030
Blocked             0.005996
Foul Bunt           0.000535
Name: code, dtype: float64

In [38]:
print("{} Final Result Distribution.".format(NAME))
batter_df['event'].value_counts() / len(batter_df)

Freddie Freeman Final Result Distribution.


Strikeout           0.264882
Walk                0.145075
Single              0.127837
Groundout           0.126017
Flyout              0.094647
Lineout             0.058672
Double              0.051071
Home Run            0.035760
Pop Out             0.021199
Intent Walk         0.020664
Grounded Into DP    0.011777
Hit By Pitch        0.010493
Forceout            0.008030
Field Error         0.006210
Sac Fly             0.005567
Triple              0.003854
Double Play         0.003747
Strikeout - DP      0.002784
Runner Out          0.001071
Fielders Choice     0.000428
Triple Play         0.000214
Name: event, dtype: float64

## Home Run Locations

In [60]:
balls = batter_df[batter_df.code == 'Ball']
len(balls)

3255

In [56]:
hits = batter_df[batter_df.code == 'In Play No Out']

In [58]:
px.density_heatmap(balls, x='px', y='pz')

In [67]:
def generate_sampled_batter_view(df, name, sample_rates):
    figure = go.Figure()
    for rate in sample_rates:
        indices = np.random.choice(len(df), int(rate*len(df)), replace=False)
        trace = go.Histogram2d(
            x=df.x.iloc[indices], y=df.y.iloc[indices],
            visible=False, name=name)
        figure.add_trace(trace)

    ### Make First Data Set Visble On Initial Render
    figure.data[0].visible = True

    steps = []
    for i in range(len(figure.data)):
        step = dict(
            method='update',
            args=[
                dict(visible=[False]*len(figure.data)),
                dict(title='Sample Rate {}'.format(sample_rates[i]))
            ])
        step['args'][0]['visible'][i] = True ### Make This i Slice of Data Visible
        steps.append(step)

    slider_descriptions = [
        dict(
            active=10,
            currentvalue={'prefix': 'Sample Rate'},
            pad={'t':50},
            steps=steps
        )
    ]

    #######################################################
    ### Return Tuple so that caller can invoke ...
    ### figure.update_layout(sliders=slider_descriptions)
    #######################################################
    return figure, slider_descriptions

In [68]:
fig, desc = generate_sampled_batter_view(
    balls, name='Balls',
    sample_rates=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0])

In [70]:
fig.update_layout(sliders=desc)