# Python Library Imports

In [1]:
import os
import numpy as np
import pandas as pd
import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go

# Raw Data Loading

In [2]:
data_root_dir = "./baseball_data"
print(os.listdir(data_root_dir))

['games.csv', 'pitches.csv', 'player_names.csv', 'atbats.csv']


In [3]:
game_data_path         = os.path.join(data_root_dir, "games.csv")
player_name_data_path  = os.path.join(data_root_dir, "player_names.csv")
pitches_data_path      = os.path.join(data_root_dir, "pitches.csv")
at_bats_data_path      = os.path.join(data_root_dir, "atbats.csv")

assert(os.path.exists(game_data_path)),        "Failed To Find: %s" % game_data_path
assert(os.path.exists(player_name_data_path)), "Failed To Find: %s" % player_name_data_path
assert(os.path.exists(pitches_data_path)),     "Failed To Find: %s" % pitches_data_path
assert(os.path.exists(at_bats_data_path)),     "Failed To Find: %s" % at_bats_data_path

## Load and Preview Game Data

In [4]:
games = pd.read_csv(game_data_path)
games.head()

Unnamed: 0,attendance,away_final_score,away_team,date,elapsed_time,g_id,home_final_score,home_team,start_time,umpire_1B,umpire_2B,umpire_3B,umpire_HP,venue_name,weather,wind,delay
0,35055,3,sln,2015-04-05,184,201500001,0,chn,7:17 PM,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0
1,45909,1,ana,2015-04-06,153,201500002,4,sea,1:12 PM,Ron Kulpa,Brian Knight,Vic Carapazza,Larry Vanover,Safeco Field,"54 degrees, cloudy","1 mph, Varies",0
2,36969,2,atl,2015-04-06,156,201500003,1,mia,4:22 PM,Laz Diaz,Chris Guccione,Cory Blaser,Jeff Nelson,Marlins Park,"80 degrees, partly cloudy","16 mph, In from CF",16
3,31042,6,bal,2015-04-06,181,201500004,2,tba,3:12 PM,Ed Hickox,Paul Nauert,Mike Estabrook,Dana DeMuth,Tropicana Field,"72 degrees, dome","0 mph, None",0
4,45549,8,bos,2015-04-06,181,201500005,0,phi,3:08 PM,Phil Cuzzi,Tony Randazzo,Will Little,Gerry Davis,Citizens Bank Park,"71 degrees, partly cloudy","11 mph, Out to RF",0


In [5]:
print("Games Data Types: \n\n{}".format(games.dtypes))

Games Data Types: 

attendance           int64
away_final_score     int64
away_team           object
date                object
elapsed_time         int64
g_id                 int64
home_final_score     int64
home_team           object
start_time          object
umpire_1B           object
umpire_2B           object
umpire_3B           object
umpire_HP           object
venue_name          object
weather             object
wind                object
delay                int64
dtype: object


## Load and Preview Player Data

In [6]:
players = pd.read_csv(player_name_data_path)
players.rename(columns={'id':'batter_id'}, inplace=True)
players.head()

Unnamed: 0,batter_id,first_name,last_name
0,452657,Jon,Lester
1,425794,Adam,Wainwright
2,457435,Phil,Coke
3,435400,Jason,Motte
4,519166,Neil,Ramirez


In [7]:
print("Players Data Types: \n\n{}".format(players.dtypes))

Players Data Types: 

batter_id      int64
first_name    object
last_name     object
dtype: object


## Load and Preview Pitching Data

In [8]:
pitches = pd.read_csv(pitches_data_path)
pitches['ab_id'] = pitches['ab_id'].astype(int)
pitches.head()

Unnamed: 0,px,pz,start_speed,end_speed,spin_rate,spin_dir,break_angle,break_length,break_y,ax,...,event_num,b_score,ab_id,b_count,s_count,outs,pitch_num,on_1b,on_2b,on_3b
0,0.416,2.963,92.9,84.1,2305.052,159.235,-25.0,3.2,23.7,7.665,...,3,0.0,2015000001,0.0,0.0,0.0,1.0,0.0,0.0,0.0
1,-0.191,2.347,92.8,84.1,2689.935,151.402,-40.7,3.4,23.7,12.043,...,4,0.0,2015000001,0.0,1.0,0.0,2.0,0.0,0.0,0.0
2,-0.518,3.284,94.1,85.2,2647.972,145.125,-43.7,3.7,23.7,14.368,...,5,0.0,2015000001,0.0,2.0,0.0,3.0,0.0,0.0,0.0
3,-0.641,1.221,91.0,84.0,1289.59,169.751,-1.3,5.0,23.8,2.104,...,6,0.0,2015000001,0.0,2.0,0.0,4.0,0.0,0.0,0.0
4,-1.821,2.083,75.4,69.6,1374.569,280.671,18.4,12.0,23.8,-10.28,...,7,0.0,2015000001,1.0,2.0,0.0,5.0,0.0,0.0,0.0


In [9]:
print("Pitches Data Types:\n\n{}".format(pitches.dtypes))

Pitches Data Types:

px                 float64
pz                 float64
start_speed        float64
end_speed          float64
spin_rate          float64
spin_dir           float64
break_angle        float64
break_length       float64
break_y            float64
ax                 float64
ay                 float64
az                 float64
sz_bot             float64
sz_top             float64
type_confidence    float64
vx0                float64
vy0                float64
vz0                float64
x                  float64
x0                 float64
y                  float64
y0                 float64
z0                 float64
pfx_x              float64
pfx_z              float64
nasty              float64
zone               float64
code                object
type                object
pitch_type          object
event_num            int64
b_score            float64
ab_id                int64
b_count            float64
s_count            float64
outs               float64
pitch_n

## Load and Preview Batting Data

In [10]:
batting = pd.read_csv(at_bats_data_path)
batting.head()

Unnamed: 0,ab_id,batter_id,event,g_id,inning,o,p_score,p_throws,pitcher_id,stand,top
0,2015000001,572761,Groundout,201500001,1,1,0,L,452657,L,True
1,2015000002,518792,Double,201500001,1,1,0,L,452657,L,True
2,2015000003,407812,Single,201500001,1,1,0,L,452657,R,True
3,2015000004,425509,Strikeout,201500001,1,2,0,L,452657,R,True
4,2015000005,571431,Strikeout,201500001,1,3,0,L,452657,L,True


In [11]:
print("Batting Data Types:\n\n{}".format(batting.dtypes))

Batting Data Types:

ab_id          int64
batter_id      int64
event         object
g_id           int64
inning         int64
o              int64
p_score        int64
p_throws      object
pitcher_id     int64
stand         object
top             bool
dtype: object


# Data Preparation

## Brief Data Analysis (For Sanity)

In [12]:
print("Total Number Of Pitches: {:>10}".format(len(pitches)))
print("Total Number Of At Bats: {:>10}".format(len(np.unique(pitches['ab_id']))))

Total Number Of Pitches:    2867154
Total Number Of At Bats:     740241


In [13]:
print("Total Number of At Bats: {:>10}".format(len(batting)))
assert(len(batting) == len(np.unique(batting['ab_id']))), "Batting Events Are Not Uniquely Identified"

Total Number of At Bats:     740389


In [14]:
print("Number of no pitch pick-off play or no pitch balks for entire season: {}".format(
    abs(len(np.unique(pitches['ab_id'])) - len(batting))))

Number of no pitch pick-off play or no pitch balks for entire season: 148


## Merging Data Sources

## Join Batting Events Onto Pitches (Number Of Pitches > Number At Bats)

In [15]:
pitching_batting_df = pd.merge(pitches, batting, how='left', left_on='ab_id', right_on='ab_id')

In [16]:
num_pitches = len(pitching_batting_df)
num_at_bats = len(np.unique(pitching_batting_df['ab_id']))
num_games   = len(np.unique(pitching_batting_df['g_id']))

print("Total Number of At Bats With At Least One Pitch: {}".format(num_at_bats))
print("Total Number Of Pitches: {}".format(num_pitches))
print("Average Number of Pitches Per Bat: {}".format(num_pitches / num_at_bats))
print("Total Number Of Played Games: {}".format(num_games))

Total Number of At Bats With At Least One Pitch: 740241
Total Number Of Pitches: 2867154
Average Number of Pitches Per Bat: 3.8732710022816894
Total Number Of Played Games: 9718


## Join Games Onto Pitches and Batting Events (Games Composed of the Prior)

### Sanity Check the Number of Games Played

In [17]:
print("Total Number Of Acutual Played Games: {}".format(len(games)))
assert(len(games) == len(np.unique(games['g_id']))), "Not All Games Have A Unique ID" 
print("From {} To {}".format(np.min(games['date']), np.max(games['date'])))

Total Number Of Acutual Played Games: 9718
From 2015-04-05 To 2018-10-01


In [18]:
games_per_season = int(162 / 2) * 30
print("Each Season In Theory Has {} Games Or "
      "For 4 Seasons This Is A Theoretical Total Of {} Games".format(games_per_season, 4 * games_per_season))

Each Season In Theory Has 2430 Games Or For 4 Seasons This Is A Theoretical Total Of 9720 Games


### Perform The Merge

In [19]:
pitching_batting_game_df = pd.merge(pitching_batting_df, games, how='left', left_on='g_id', right_on='g_id')

In [20]:
number_of_batters = len(np.unique(players['batter_id']))
batters_at_bat = len(np.unique(batting['batter_id']))
print("Number Of Players With A Batter Id: {}".format(number_of_batters))
print("Number Of Players Who Were Actually Batting: {}".format(batters_at_bat))

Number Of Players With A Batter Id: 2218
Number Of Players Who Were Actually Batting: 1688


In [21]:
pitching_batting_games_players_df = pd.merge(
    pitching_batting_game_df,
    players,
    how='left',
    left_on='batter_id',
    right_on='batter_id')

In [22]:
assert(batters_at_bat == len(np.unique(pitching_batting_games_players_df['batter_id']))), "Mismatch of Batter Ids"

### Clean Up Data

In [23]:
df = pitching_batting_games_players_df
print("Number of Attributes: {}".format(len(df.dtypes)))
print("Number Of Instances: {}".format(len(df)))
print("Current List Of Attributes:\n{}".format(df.columns))

Number of Attributes: 68
Number Of Instances: 2867154
Current List Of Attributes:
Index(['px', 'pz', 'start_speed', 'end_speed', 'spin_rate', 'spin_dir',
       'break_angle', 'break_length', 'break_y', 'ax', 'ay', 'az', 'sz_bot',
       'sz_top', 'type_confidence', 'vx0', 'vy0', 'vz0', 'x', 'x0', 'y', 'y0',
       'z0', 'pfx_x', 'pfx_z', 'nasty', 'zone', 'code', 'type', 'pitch_type',
       'event_num', 'b_score', 'ab_id', 'b_count', 's_count', 'outs',
       'pitch_num', 'on_1b', 'on_2b', 'on_3b', 'batter_id', 'event', 'g_id',
       'inning', 'o', 'p_score', 'p_throws', 'pitcher_id', 'stand', 'top',
       'attendance', 'away_final_score', 'away_team', 'date', 'elapsed_time',
       'home_final_score', 'home_team', 'start_time', 'umpire_1B', 'umpire_2B',
       'umpire_3B', 'umpire_HP', 'venue_name', 'weather', 'wind', 'delay',
       'first_name', 'last_name'],
      dtype='object')


#### Join First And Last Names into Full Batter Name

In [24]:
df['batters_name'] = df[['first_name','last_name']].apply(lambda e: " ".join(e), axis=1)

In [25]:
df.drop(['first_name', 'last_name'], axis=1, inplace=True)

In [26]:
df.columns

Index(['px', 'pz', 'start_speed', 'end_speed', 'spin_rate', 'spin_dir',
       'break_angle', 'break_length', 'break_y', 'ax', 'ay', 'az', 'sz_bot',
       'sz_top', 'type_confidence', 'vx0', 'vy0', 'vz0', 'x', 'x0', 'y', 'y0',
       'z0', 'pfx_x', 'pfx_z', 'nasty', 'zone', 'code', 'type', 'pitch_type',
       'event_num', 'b_score', 'ab_id', 'b_count', 's_count', 'outs',
       'pitch_num', 'on_1b', 'on_2b', 'on_3b', 'batter_id', 'event', 'g_id',
       'inning', 'o', 'p_score', 'p_throws', 'pitcher_id', 'stand', 'top',
       'attendance', 'away_final_score', 'away_team', 'date', 'elapsed_time',
       'home_final_score', 'home_team', 'start_time', 'umpire_1B', 'umpire_2B',
       'umpire_3B', 'umpire_HP', 'venue_name', 'weather', 'wind', 'delay',
       'batters_name'],
      dtype='object')

#### Update The Data with Pitcher Name

In [27]:
players.rename(columns={'batter_id': 'pitcher_id'}, inplace=True)
players.columns

Index(['pitcher_id', 'first_name', 'last_name'], dtype='object')

In [28]:
df = pd.merge(df, players, how='left', left_on='pitcher_id', right_on='pitcher_id')
df['pitcher_name'] = df[['first_name', 'last_name']].apply(lambda e: " ".join(e), axis=1)
df.drop(['first_name', 'last_name'], axis=1, inplace=True)
df.columns

Index(['px', 'pz', 'start_speed', 'end_speed', 'spin_rate', 'spin_dir',
       'break_angle', 'break_length', 'break_y', 'ax', 'ay', 'az', 'sz_bot',
       'sz_top', 'type_confidence', 'vx0', 'vy0', 'vz0', 'x', 'x0', 'y', 'y0',
       'z0', 'pfx_x', 'pfx_z', 'nasty', 'zone', 'code', 'type', 'pitch_type',
       'event_num', 'b_score', 'ab_id', 'b_count', 's_count', 'outs',
       'pitch_num', 'on_1b', 'on_2b', 'on_3b', 'batter_id', 'event', 'g_id',
       'inning', 'o', 'p_score', 'p_throws', 'pitcher_id', 'stand', 'top',
       'attendance', 'away_final_score', 'away_team', 'date', 'elapsed_time',
       'home_final_score', 'home_team', 'start_time', 'umpire_1B', 'umpire_2B',
       'umpire_3B', 'umpire_HP', 'venue_name', 'weather', 'wind', 'delay',
       'batters_name', 'pitcher_name'],
      dtype='object')

In [29]:
df.head()

Unnamed: 0,px,pz,start_speed,end_speed,spin_rate,spin_dir,break_angle,break_length,break_y,ax,...,umpire_1B,umpire_2B,umpire_3B,umpire_HP,venue_name,weather,wind,delay,batters_name,pitcher_name
0,0.416,2.963,92.9,84.1,2305.052,159.235,-25.0,3.2,23.7,7.665,...,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0,Matt Carpenter,Jon Lester
1,-0.191,2.347,92.8,84.1,2689.935,151.402,-40.7,3.4,23.7,12.043,...,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0,Matt Carpenter,Jon Lester
2,-0.518,3.284,94.1,85.2,2647.972,145.125,-43.7,3.7,23.7,14.368,...,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0,Matt Carpenter,Jon Lester
3,-0.641,1.221,91.0,84.0,1289.59,169.751,-1.3,5.0,23.8,2.104,...,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0,Matt Carpenter,Jon Lester
4,-1.821,2.083,75.4,69.6,1374.569,280.671,18.4,12.0,23.8,-10.28,...,Mark Wegner,Marty Foster,Mike Muchlinski,Mike Winters,Wrigley Field,"44 degrees, clear","7 mph, In from CF",0,Matt Carpenter,Jon Lester


#### Remap Pitching Codes To Their Full Names

In [30]:
pitch_type_map = dict({
    'FF': 'Four Seam Fastball',
    'SL': 'Slider',
    'FT': 'Two seam fastball',
    'CH': 'Changeup',
    'SI': 'Sinker',
    'CU': 'Curveball',
    'FC': 'Cutter',
    'KC': 'Knuckle Curve',
    'FS': 'Splitter',
    'KN': 'Knuckleball',
    'EP': 'Eephus',
    'FO': 'Pitch Out',
    'PO': 'Pitch Out',
    'SC': 'Screwball',
    'UN': 'Unidentified',
    'FA': 'Fastball',
    'IN': 'Intentional Ball'
})

In [31]:
df['pitch_type'] = df['pitch_type'].map(pitch_type_map)

#### Remap Result Codes To Their Full Names

In [32]:
code_type_map = dict({
    'B':  'Ball',
    '*B': 'Ball in dirt',
    'S':  'Swinging Strike',
    'C':  'Called Strike',
    'F':  'Foul',
    'T':  'Foul Tip',
    'L':  'Foul Bunt',
    'I':  'Intentional Ball',
    'W':  'Blocked',
    'M':  'Missed Bunt',
    'P':  'Pitch Out',
    'Q':  'Swinging Pitch Out',
    'R':  'Foul Pitch Out',
    'X':  'In play out(s)',
    'D':  'In play no out',
    'E':  'In play runs'
})

In [33]:
df['code'] = df['code'].map(code_type_map)

## Player Performance

In [36]:
player_name = "Max Scherzer"

In [37]:
player = df[df['pitcher_name'] == player_name]

In [38]:
print("Total Pitches Thrown By {}: {}".format(player_name, len(player)))

Total Pitches Thrown By Max Scherzer: 13534


In [39]:
player['pitch_type'].value_counts() / len(player) * 100

Four Seam Fastball    52.586080
Slider                19.499039
Changeup              13.713610
Curveball              8.053790
Cutter                 5.083493
Two seam fastball      0.657603
Unidentified           0.007389
Name: pitch_type, dtype: float64

In [54]:
player['event'].value_counts()

Strikeout              5232
Groundout              1752
Flyout                 1289
Single                 1202
Walk                   1115
Pop Out                 836
Lineout                 675
Double                  452
Home Run                348
Hit By Pitch            131
Grounded Into DP        115
Forceout                 81
Triple                   58
Sac Bunt                 55
Field Error              51
Intent Walk              40
Sac Fly                  26
Bunt Groundout           17
Bunt Pop Out             12
Fielders Choice Out      12
Runner Out               12
Double Play              12
Strikeout - DP           10
Fielders Choice           1
Name: event, dtype: int64

In [51]:
player.code.value_counts() / len(player) * 100

Ball                29.200532
Foul                19.779814
Called Strike       15.945027
Swinging Strike     14.866263
In play out(s)      10.595537
In play no out       3.236294
In play runs         1.462982
Foul Tip             1.344761
Ball in dirt         1.285651
Blocked              0.591104
Foul Bunt            0.421162
Missed Bunt          0.073888
Intentional Ball     0.073888
Name: code, dtype: float64

## Full Data

In [88]:
print("X-Position Mean: {} +/- {}".format(player.px.mean(), player.px.std()))
print("X-Position Range: [{}, {}]".format(player.px.min(),  player.px.max()))
print("Z-Position Mean: {} +/- {}".format(player.pz.mean(), player.pz.std()))
print("Z-Position Range: [{}, {}]".format(player.pz.min(),  player.pz.max()))

X-Position Mean: -0.017691280769793923 +/- 0.8724981196240903
X-Position Range: [-3.54192503574252, 3.29]
Z-Position Mean: 2.360284049903265 +/- 0.8701328320057345
Z-Position Range: [-1.45, 5.95532530048124]


In [89]:
trace0 = go.Scatter(
    
    x = player.px[player.code == 'Ball'],
    y = player.pz[player.code == 'Ball'],

    name="Balls",
    mode='markers',
    marker=dict(
        size=1,
        color='rgba(22, 0, 0, 0.8)',
        line=dict(width=2, color='rgba(22, 0, 0, 0.8)')
    )
)

trace1 = go.Scatter(
    
    x = player.px[player.code == 'Called Strike'],
    y = player.pz[player.code == 'Called Strike'],

    name="Strikes",
    mode='markers',
    marker=dict(
        size=1,
        color='rgba(152, 0, 0, 0.8)',
        line=dict(width=2, color='rgba(152, 0, 0, .8)')
    )
)

In [90]:
data = [trace0, trace1]
layout = dict(
    autosize=False,
    width=500,
    height=500,
    title = 'Max Scherzer Strike Distribution',
    plot_bgcolor='rgba(50,205,50)',
    yaxis=dict(zeroline=False),
    xaxis=dict(zeroline=False)
)
fig = dict(data=data, layout=layout)

In [91]:
py.iplot(fig, filename=player_name)

In [106]:
called_ball   = player[['px', 'pz']][player.code == 'Ball']
called_strike =  player[['px', 'pz']][player.code == 'Called Strike']
print("Number of Called Balls: {}".format(len(called_ball)))
print("Number of Called Strikes (Batter Caught Looking): {}".format(len(called_strike)))

Number of Called Balls: 3952
Number of Called Strikes (Batter Caught Looking): 2158


## 50% Sample

In [175]:
ball_sample_index0   = np.random.choice(len(called_ball),   int((1/2) * len(called_ball)),   replace=False)
strike_sample_index0 = np.random.choice(len(called_strike), int((1/2) * len(called_strike)), replace=False)

In [176]:
reduced_trace_ball_0 = go.Scatter(
    
    x = called_ball.px.iloc[ball_sample_index0],
    y = called_ball.pz.iloc[ball_sample_index0],

    name="Balls",
    mode='markers',
    marker=dict(
        size=1,
        color='rgba(22, 0, 0, 0.8)',
        line=dict(width=2, color='rgba(22, 0, 0, 0.8)')
    )
)

reduced_trace_strike_0 = go.Scatter(
    
    x = called_strike.px.iloc[strike_sample_index0],
    y = called_strike.pz.iloc[strike_sample_index0],

    name="Strikes",
    mode='markers',
    marker=dict(
        size=1,
        color='rgba(152, 0, 0, 0.8)',
        line=dict(width=2, color='rgba(152, 0, 0, .8)')
    )
)

In [177]:
reduced_data0 = [reduced_trace_ball_0, reduced_trace_strike_0]
layout = dict(
    autosize=False,
    width=500,
    height=500,
    title = 'Max Scherzer Strike Distribution (Reduced)',
    plot_bgcolor='rgba(50,205,50)',
    yaxis=dict(zeroline=False),
    xaxis=dict(zeroline=False)
)
reduced_fig0 = dict(data=reduced_data0, layout=layout)

In [178]:
py.iplot(reduced_fig0, filename=player_name)

## 25% Sample

In [179]:
ball_sample_index1   = np.random.choice(len(called_ball),   int((1/4) * len(called_ball)),   replace=False)
strike_sample_index1 = np.random.choice(len(called_strike), int((1/4) * len(called_strike)), replace=False)

In [180]:
reduced_trace_ball_1 = go.Scatter(
    
    x = called_ball.px.iloc[ball_sample_index1],
    y = called_ball.pz.iloc[ball_sample_index1],

    name="Balls",
    mode='markers',
    marker=dict(
        size=1,
        color='rgba(22, 0, 0, 0.8)',
        line=dict(width=2, color='rgba(22, 0, 0, 0.8)')
    )
)

reduced_trace_strike_1 = go.Scatter(
    
    x = called_strike.px.iloc[strike_sample_index1],
    y = called_strike.pz.iloc[strike_sample_index1],

    name="Strikes",
    mode='markers',
    marker=dict(
        size=1,
        color='rgba(152, 0, 0, 0.8)',
        line=dict(width=2, color='rgba(152, 0, 0, .8)')
    )
)

In [181]:
reduced_data1 = [reduced_trace_ball_1, reduced_trace_strike_1]
layout = dict(
    autosize=False,
    width=500,
    height=500,
    title = 'Max Scherzer Strike Distribution (Reduced)',
    plot_bgcolor='rgba(50,205,50)',
    yaxis=dict(zeroline=False),
    xaxis=dict(zeroline=False)
)
reduced_fig1 = dict(data=reduced_data1, layout=layout)

In [182]:
py.iplot(reduced_fig1, filename=player_name)

## 10% Sample

In [183]:
ball_sample_index2   = np.random.choice(len(called_ball),   int((1/10) * len(called_ball)),   replace=False)
strike_sample_index2 = np.random.choice(len(called_strike), int((1/10) * len(called_strike)), replace=False)

In [184]:
reduced_trace_ball_2 = go.Scatter(
    
    x = called_ball.px.iloc[ball_sample_index2],
    y = called_ball.pz.iloc[ball_sample_index2],

    name="Balls",
    mode='markers',
    marker=dict(
        size=1,
        color='rgba(22, 0, 0, 0.8)',
        line=dict(width=2, color='rgba(22, 0, 0, 0.8)')
    )
)

reduced_trace_strike_2 = go.Scatter(
    
    x = called_strike.px.iloc[strike_sample_index2],
    y = called_strike.pz.iloc[strike_sample_index2],

    name="Strikes",
    mode='markers',
    marker=dict(
        size=1,
        color='rgba(152, 0, 0, 0.8)',
        line=dict(width=2, color='rgba(152, 0, 0, .8)')
    )
)

In [185]:
reduced_data2 = [reduced_trace_ball_2, reduced_trace_strike_2]
layout = dict(
    autosize=False,
    width=500,
    height=500,
    title = 'Max Scherzer Strike Distribution (Reduced)',
    plot_bgcolor='rgba(50,205,50)',
    yaxis=dict(zeroline=False),
    xaxis=dict(zeroline=False)
)
reduced_fig2 = dict(data=reduced_data2, layout=layout)

In [186]:
py.iplot(reduced_fig2, filename=player_name)