In [62]:
!pip install tables
import pandas as pd
import numpy as np
import torch
from pickle import dump, load
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from sklearn.utils import random
from sklearn.model_selection import train_test_split
import scipy.optimize
from scipy.optimize import Bounds
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

[0m

Init NN

In [2]:
class FFNetPytorch(nn.Module):
  def __init__(self, input_size, output_size, hl1=100, hl2=25):
    super(FFNetPytorch, self).__init__()
    '''
    Define the layers of the neural network. One hidden layer and output layer.
    The activation function used in between the two layers is sigmoid.
    '''
    self.layer1 = nn.Linear(input_size, hl1, bias = True)
    self.layer2 = nn.Linear(hl1, output_size, bias = True)

    self.to(torch.float32)
    

  def forward(self, x):
    '''
    :param x: input to the model (N, NUM_FEATURES)

    :return:
      output: logits of the last layer of the model 
    '''
    x = torch.relu(self.layer1(x))
    x = self.layer2(x)

    return x

Import Data

In [3]:
#BECAUSE NO PXPY
import pandas as pd
INPUT_COLUMNS = ["start_speed", "spin_rate", "spin_dir", "zone", "pitch_type"]
CONTEXT_COLUMNS = ["b_score", "p_score", "b_count", "s_count", "outs", "pitch_num", \
    "on_1b", "on_2b", "on_3b", "inning", "p_throws", "stand", "top"]
OUTPUT_COLUMNS = ["at_bat_score"]

combined_data = pd.read_hdf("source_files/model_data.hdf5")
inputs = combined_data.iloc[:, 0:-1].copy()
outputs = combined_data.iloc[:, -1].copy()

inputs_t = torch.tensor(combined_data.iloc[:, 0:-1].values)
outputs_t = torch.tensor(combined_data.iloc[:, -1].values)

model = torch.load("scripts/batting/models/HL1_64.pt")
model.eval()
torch.no_grad()

<torch.autograd.grad_mode.no_grad at 0x7f0b08d2e490>

Constraints for each type of pitch. Takes 25% and 75% Percentile of speed,
spin, and spin dir

In [6]:
convert_name = {"CH": "Changeup",
                "CU": "Curveball",
                "FC": "Cutter",
                "FF": "Four-Seam Fastball",
                "FS": "Splitter",
                "FT": "Two-Seam Fastball",
                "KC": "Knuckle Curve",
                "KN": "Knuckleball",
                "SC": "Screwball",
                "SI": "Sinker",
                "SL": "Slider"}

#Initialize constraints for pitches
pitch_constraints = {}

PITCH_TYPES = ['CH', 'CU', 'FC', 'FF', 'FS', 'FT', 'KC', 'KN', 'SI', 'SL']
ZONES = [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14]

for pitch_type in PITCH_TYPES:
    constraints = {}
    pitches = inputs[inputs[pitch_type] == 1]

    speed_bounds = pitches["start_speed"].quantile([0.25, 0.75])
    constraints["min_speed"], constraints["max_speed"] = speed_bounds.iloc[0], speed_bounds.iloc[1]
    
    srate_bounds = pitches["spin_rate"].quantile([0.25, 0.75])
    constraints["min_srate"], constraints["max_srate"] = srate_bounds.iloc[0], srate_bounds.iloc[1]

    sdir_bounds = pitches["spin_dir"].quantile([0.25, 0.75])
    constraints["min_sdir"], constraints["max_sdir"] = sdir_bounds.iloc[0], sdir_bounds.iloc[1]

    pitch_constraints[pitch_type] = constraints
pitch_constraints

{'CH': {'min_speed': 81.9,
  'max_speed': 86.2,
  'min_srate': 1462.13875,
  'max_srate': 2029.496,
  'min_sdir': 131.76725,
  'max_sdir': 246.45475000000002},
 'CU': {'min_speed': 75.6,
  'max_speed': 80.7,
  'min_srate': 898.019,
  'max_srate': 1624.0994999999998,
  'min_sdir': 36.213499999999996,
  'max_sdir': 267.8525},
 'FC': {'min_speed': 86.3,
  'max_speed': 90.2,
  'min_srate': 798.078,
  'max_srate': 1446.274,
  'min_sdir': 152.951,
  'max_sdir': 184.973},
 'FF': {'min_speed': 91.1,
  'max_speed': 94.7,
  'min_srate': 1924.725,
  'max_srate': 2428.60425,
  'min_sdir': 172.955,
  'max_sdir': 212.127},
 'FS': {'min_speed': 82.7,
  'max_speed': 86.6,
  'min_srate': 1110.6335,
  'max_srate': 1774.5615,
  'min_sdir': 219.2955,
  'max_sdir': 256.347},
 'FT': {'min_speed': 90.5,
  'max_speed': 94.0,
  'min_srate': 1927.97975,
  'max_srate': 2422.06625,
  'min_sdir': 144.094,
  'max_sdir': 238.014},
 'KC': {'min_speed': 78.4,
  'max_speed': 82.6,
  'min_srate': 970.733,
  'max_srate':

Objective function for minimizer

In [5]:
def objective(x, *args):
    model, context, pitch_type, scaler = args[0], args[1], args[2], args[3]
    
    pitches = [pitch_type==val for val in PITCH_TYPES]
    pitches = pd.Series(pitches, index=PITCH_TYPES, dtype=float)

    inputs = pd.Series([x[0], x[1], x[2], x[3], x[4]], index=["start_speed", "spin_rate", "spin_dir", "px", "pz"], dtype=float)

    pitch = pd.concat([inputs, pitches, context])

    pitch = scaler.transform(np.array(pitch.values).reshape(1, -1))
    
    return -model(torch.tensor(pitch, dtype=torch.float32).cpu()).item()

Configure context and determine best pitch for given situation

In [123]:
#Initialize Context
context = {
    "b_score": 0,
    "p_score": 0,
    "b_count": 3,
    "s_count": 0,
    "outs": 0,
    "pitch_num": 4,
    "on_1b": 0,
    "on_2b": 0,
    "on_3b": 0,
    "inning": 1,
    "p_isrighty": 1,
    "b_isrighty": 0,
    "is_top_inning": 1
}
indexes = [key for key, val in context.items()]
values = [val for key, val in context.items()]
context_df = pd.Series(values, index=indexes, dtype=float)

max_score_overall = np.inf
max_pitch_overall = None

pitch_scores = []
max_score_pitch = np.inf
max_pitch_cur = None

model.cpu()

scaler = load(open('scaler.pkl', 'rb'))

#Find optimum pitch
for pitch_type in PITCH_TYPES:
    bounds = Bounds([pitch_constraints[pitch_type]["min_speed"], pitch_constraints[pitch_type]["min_srate"], pitch_constraints[pitch_type]["min_sdir"], -3.0, 0.0], \
            [pitch_constraints[pitch_type]["max_speed"], pitch_constraints[pitch_type]["max_srate"], pitch_constraints[pitch_type]["max_sdir"], 3.0, 5.0])
    
    #print(pitch_type, zone)
    args = (model, context_df, pitch_type, scaler)

    x0 = [pitch_constraints[pitch_type]["max_speed"], pitch_constraints[pitch_type]["max_srate"], pitch_constraints[pitch_type]["max_sdir"], 0.1, 0.5]

    # optim = scipy.optimize.minimize(objective, x0, args=args,\
    #         method='Nelder-Mead', bounds=bounds, tol = 1e-4, options={"maxiter": 1000})
    # optim = scipy.optimize.shgo(objective, bounds, args=args,\
    #        n = 100, iters = 1, options={"maxtime": 60})

    optim = scipy.optimize.basinhopping(objective, x0, niter = 100, minimizer_kwargs = {"args": args})

    if optim.fun < max_score_overall:
        max_score_overall = optim.fun
        max_pitch_overall = {"Pitch Type": pitch_type,
                    "Speed": optim.x[0], 
                    "Spin Rate": optim.x[1],
                    "Spin Dir": optim.x[2],
                    "px": optim.x[3],
                    "pz": optim.x[4],
                    "Score": -optim.fun}
    
    pitch_scores.append({"Pitch Type": pitch_type,
                    "Speed": optim.x[0], 
                    "Spin Rate": optim.x[1],
                    "Spin Dir": optim.x[2],
                    "px": optim.x[3],
                    "pz": optim.x[4],
                    "Score": -optim.fun})

        
pitch_scores = sorted(pitch_scores, key = lambda d: d['Score'], reverse = True)

for val in pitch_scores:
    print(f"Pitch Type: {convert_name[val['Pitch Type']].ljust(18)}, px: {val['px']:.4f}, pz: {val['pz']:.4f}, Speed: {val['Speed']:.2f}, Spin Rate: {val['Spin Rate']:.2f}, Spin Dir: {val['Spin Dir']:.2f}, Score: {val['Score']:.3f}")




Pitch Type: Knuckle Curve     , px: 0.5434, pz: 2.1239, Speed: 83.89, Spin Rate: 1628.76, Spin Dir: 109.72, Score: 0.189
Pitch Type: Curveball         , px: 0.3222, pz: 2.3126, Speed: 81.61, Spin Rate: 1626.65, Spin Dir: 270.95, Score: 0.160
Pitch Type: Cutter            , px: 0.7378, pz: 2.6778, Speed: 91.79, Spin Rate: 1445.59, Spin Dir: 180.89, Score: 0.054
Pitch Type: Splitter          , px: -0.1143, pz: 1.9877, Speed: 80.59, Spin Rate: 1774.63, Spin Dir: 256.39, Score: -0.003
Pitch Type: Sinker            , px: -0.2156, pz: 2.7679, Speed: 97.25, Spin Rate: 2370.40, Spin Dir: 236.15, Score: -0.014
Pitch Type: Slider            , px: 0.5240, pz: 2.1921, Speed: 82.51, Spin Rate: 1044.28, Spin Dir: 192.30, Score: -0.043
Pitch Type: Four-Seam Fastball, px: -0.1190, pz: 2.8719, Speed: 95.12, Spin Rate: 2427.91, Spin Dir: 212.20, Score: -0.080
Pitch Type: Changeup          , px: -0.4976, pz: 2.2792, Speed: 84.27, Spin Rate: 2026.62, Spin Dir: 247.28, Score: -0.110
Pitch Type: Two-Seam Fa

Grid Search

In [158]:
%%timeit -n 5
context = {
    "b_score": 0,
    "p_score": 0,
    "b_count": 3,
    "s_count": 0,
    "outs": 0,
    "pitch_num": 4,
    "on_1b": 0,
    "on_2b": 0,
    "on_3b": 0,
    "inning": 1,
    "p_isrighty": 1,
    "b_isrighty": 0,
    "is_top_inning": 1
}
indexes = [key for key, val in context.items()]
values = [val for key, val in context.items()]
context_df = pd.Series(values, index=indexes, dtype=float)

num_search = 200

px_l, px_u = -2, 2
pz_l, pz_u = 0, 6

pxs = np.linspace(px_l, px_u, num_search)
pzs = np.linspace(pz_l, pz_u, num_search)

pxs, pzs = np.meshgrid(pxs, pzs)

model.to("cuda")
torch.no_grad()
model.eval()
scaler = load(open('scaler.pkl', 'rb'))

for pitch in PITCH_TYPES:
    pitches = [pitch_type==val for val in PITCH_TYPES]
    pitches = pd.Series(pitches, index=PITCH_TYPES, dtype=float)
    p_d = pitch_constraints[pitch]
    start_speed, spin_rate, spin_dir = p_d["max_speed"], p_d["max_srate"], p_d["max_sdir"]
    inputs = np.tile(np.array([start_speed, spin_rate, spin_dir]), (num_search**2,1))
    inputs = pd.DataFrame(inputs, columns=["start_speed", "spin_rate", "spin_dir"])
    inputs["px"] = pxs.ravel()
    inputs["pz"] = pzs.ravel()

    pitch_m = np.concatenate([inputs.values, np.tile(pitches.values, (num_search**2, 1)), np.tile(context_df.values, (num_search**2, 1))], axis = 1)
    pitch_m = scaler.transform(pitch_m, copy=False)
    # print(pitch_m.shape)
    results = model(torch.tensor(pitch_m, dtype=torch.float32).cuda()).cpu().detach().numpy()
    results = results.reshape(num_search, num_search)
    
    # plt.figure()
    # plt.pcolor(pxs, pzs, results)
    # cs = plt.contour(pxs, pzs, results, 15, colors="black")
    # plt.clabel(cs)

    print(pitch, results.max())
    # print(results[results.argmax()//num_search, results.argmax()%num_search])
    

    

CH 0.1205574
CU 0.19016244
FC -0.08441022
FF 0.10297495
FS 0.12678152
FT 0.09535453
KC 0.16585183
KN 0.106478125
SI 0.07823497
SL -0.06901872
CH 0.1205574
CU 0.19016244
FC -0.08441022
FF 0.10297495
FS 0.12678152
FT 0.09535453
KC 0.16585183
KN 0.106478125
SI 0.07823497
SL -0.06901872
CH 0.1205574
CU 0.19016244
FC -0.08441022
FF 0.10297495
FS 0.12678152
FT 0.09535453
KC 0.16585183
KN 0.106478125
SI 0.07823497
SL -0.06901872
CH 0.1205574
CU 0.19016244
FC -0.08441022
FF 0.10297495
FS 0.12678152
FT 0.09535453
KC 0.16585183
KN 0.106478125
SI 0.07823497
SL -0.06901872
CH 0.1205574
CU 0.19016244
FC -0.08441022
FF 0.10297495
FS 0.12678152
FT 0.09535453
KC 0.16585183
KN 0.106478125
SI 0.07823497
SL -0.06901872
CH 0.1205574
CU 0.19016244
FC -0.08441022
FF 0.10297495
FS 0.12678152
FT 0.09535453
KC 0.16585183
KN 0.106478125
SI 0.07823497
SL -0.06901872
CH 0.1205574
CU 0.19016244
FC -0.08441022
FF 0.10297495
FS 0.12678152
FT 0.09535453
KC 0.16585183
KN 0.106478125
SI 0.07823497
SL -0.06901872
CH 0.1

In [149]:
scaler.get_params()

{'copy': True, 'with_mean': True, 'with_std': True}

In [None]:
%timeit -n 5 np.concatenate([inputs.values, np.tile(pitches.values, (num_search**2, 1)), np.tile(context_df.values, (num_search**2, 1))], axis = 1)


In [19]:
!ls

README.md  scaler.pkl  scripts	source_files


In [None]:
combined