In [4]:
!pip install tables
import pandas as pd
import numpy as np
import torch
from pickle import dump, load
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from sklearn.utils import random
from sklearn.model_selection import train_test_split
import scipy.optimize
from scipy.optimize import Bounds

import warnings
warnings.filterwarnings('ignore')

Collecting tables
  Downloading tables-3.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m40.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting blosc2~=2.0.0
  Downloading blosc2-2.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.9/3.9 MB[0m [31m43.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting py-cpuinfo
  Downloading py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Collecting numexpr>=2.6.2
  Downloading numexpr-2.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.7/380.7 kB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: py-cpuinfo, numexpr, blosc2, tables
Successfully installed blosc2-2.0.0 numexpr-2.8.4 py-cpuinfo-9.0.0 tables-3.8.0
[0m

Init NN

In [2]:
class FFNetPytorch(nn.Module):
  def __init__(self, input_size, output_size, hl1=100, hl2=25):
    super(FFNetPytorch, self).__init__()
    '''
    Define the layers of the neural network. One hidden layer and output layer.
    The activation function used in between the two layers is sigmoid.
    '''
    self.layer1 = nn.Linear(input_size, hl1, bias = True)
    self.layer2 = nn.Linear(hl1, output_size, bias=True)

    self.to(torch.float32)
    

  def forward(self, x):
    '''
    :param x: input to the model (N, NUM_FEATURES)

    :return:
      output: logits of the last layer of the model 
    '''
    x = torch.relu(self.layer1(x))
    x = self.layer2(x)

    return x

Import Data

In [5]:
#BECAUSE NO PXPY
import pandas as pd
INPUT_COLUMNS = ["start_speed", "spin_rate", "spin_dir", "zone", "pitch_type"]
CONTEXT_COLUMNS = ["b_score", "p_score", "b_count", "s_count", "outs", "pitch_num", \
    "on_1b", "on_2b", "on_3b", "inning", "p_throws", "stand", "top"]
OUTPUT_COLUMNS = ["at_bat_score"]

combined_data = pd.read_hdf("source_files/model_data.hdf5")
inputs = combined_data.iloc[:, 0:-1].copy()
outputs = combined_data.iloc[:, -1].copy()

inputs_t = torch.tensor(combined_data.iloc[:, 0:-1].values)
outputs_t = torch.tensor(combined_data.iloc[:, -1].values)

model = torch.load("scripts/batting/models/HL1_64.pt")
model.eval()
torch.no_grad()

<torch.autograd.grad_mode.no_grad at 0x7f11200b4520>

Constraints for each type of pitch. Takes 25% and 75% Percentile of speed,
spin, and spin dir

In [6]:
convert_name = {"CH": "Changeup",
                "CU": "Curveball",
                "FC": "Cutter",
                "FF": "Four-Seam Fastball",
                "FS": "Splitter",
                "FT": "Two-Seam Fastball",
                "KC": "Knuckle Curve",
                "KN": "Knuckleball",
                "SC": "Screwball",
                "SI": "Sinker",
                "SL": "Slider"}

#Initialize constraints for pitches
pitch_constraints = {}

PITCH_TYPES = ['CH', 'CU', 'FC', 'FF', 'FS', 'FT', 'KC', 'KN', 'SI', 'SL']
ZONES = [1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14]

for pitch_type in PITCH_TYPES:
    constraints = {}
    pitches = inputs[inputs[pitch_type] == 1]

    speed_bounds = pitches["start_speed"].quantile([0.25, 0.75])
    constraints["min_speed"], constraints["max_speed"] = speed_bounds.iloc[0], speed_bounds.iloc[1]
    
    srate_bounds = pitches["spin_rate"].quantile([0.25, 0.75])
    constraints["min_srate"], constraints["max_srate"] = srate_bounds.iloc[0], srate_bounds.iloc[1]

    sdir_bounds = pitches["spin_dir"].quantile([0.25, 0.75])
    constraints["min_sdir"], constraints["max_sdir"] = sdir_bounds.iloc[0], sdir_bounds.iloc[1]

    pitch_constraints[pitch_type] = constraints

Objective function for minimizer

In [7]:
def objective(x, *args):
    model, context, pitch_type, scaler = args[0], args[1], args[2], args[3]
    
    pitches = [pitch_type==val for val in PITCH_TYPES]
    pitches = pd.Series(pitches, index=PITCH_TYPES, dtype=float)

    inputs = pd.Series([x[0], x[1], x[2], x[3], x[4]], index=["start_speed", "spin_rate", "spin_dir", "px", "pz"], dtype=float)

    pitch = pd.concat([inputs, pitches, context])

    pitch = scaler.transform(np.array(pitch.values).reshape(1, -1))
    
    return -model(torch.tensor(pitch, dtype=torch.float32).cpu()).item()

Configure context and determine best pitch for given situation

In [21]:
#Initialize Context
context = {
    "b_score": 0,
    "p_score": 0,
    "b_count": 3,
    "s_count": 0,
    "outs": 0,
    "pitch_num": 4,
    "on_1b": 0,
    "on_2b": 0,
    "on_3b": 0,
    "inning": 1,
    "p_isrighty": 1,
    "b_isrighty": 0,
    "is_top_inning": 1
}
indexes = [key for key, val in context.items()]
values = [val for key, val in context.items()]
context_df = pd.Series(values, index=indexes, dtype=float)

max_score_overall = np.inf
max_pitch_overall = None

pitch_scores = []
max_score_pitch = np.inf
max_pitch_cur = None

model.cpu()
scaler = load(open('scaler.pkl', 'rb'))

#Find optimum pitch
for pitch_type in PITCH_TYPES:
    bounds = Bounds([pitch_constraints[pitch_type]["min_speed"], pitch_constraints[pitch_type]["min_srate"], pitch_constraints[pitch_type]["min_sdir"], -3.0, 0.0], \
            [pitch_constraints[pitch_type]["max_speed"], pitch_constraints[pitch_type]["max_srate"], pitch_constraints[pitch_type]["max_sdir"], 3.0, 5.0])
    
    #print(pitch_type, zone)
    args = (model, context_df, pitch_type, scaler)

    x0 = [pitch_constraints[pitch_type]["max_speed"], pitch_constraints[pitch_type]["max_srate"], pitch_constraints[pitch_type]["max_sdir"], 0.1, 0.5]

    # optim = scipy.optimize.minimize(objective, x0, args=args,\
    #         method='Nelder-Mead', bounds=bounds, tol = 1e-4, options={"maxiter": 1000})
    # optim = scipy.optimize.shgo(objective, bounds, args=args,\
    #        n = 100, iters = 1, options={"maxtime": 60})

    optim = scipy.optimize.basinhopping(objective, x0, niter = 25, minimizer_kwargs = {"args": args})

    if optim.fun < max_score_overall:
        max_score_overall = optim.fun
        max_pitch_overall = {"Pitch Type": pitch_type,
                    "Speed": optim.x[0], 
                    "Spin Rate": optim.x[1],
                    "Spin Dir": optim.x[2],
                    "px": optim.x[3],
                    "pz": optim.x[4],
                    "Score": -optim.fun}
    
    pitch_scores.append({"Pitch Type": pitch_type,
                    "Speed": optim.x[0], 
                    "Spin Rate": optim.x[1],
                    "Spin Dir": optim.x[2],
                    "px": optim.x[3],
                    "pz": optim.x[4],
                    "Score": -optim.fun})

        
pitch_scores = sorted(pitch_scores, key = lambda d: d['Score'], reverse = True)

for val in pitch_scores:
    print(f"Pitch Type: {convert_name[val['Pitch Type']].ljust(18)}, px: {val['px']:.4f}, pz: {val['pz']:.4f}, Speed: {val['Speed']:.2f}, Spin Rate: {val['Spin Rate']:.2f}, Spin Dir: {val['Spin Dir']:.2f}, Score: {val['Score']:.3f}")



Pitch Type: Curveball         , px: 0.0459, pz: 2.0463, Speed: 81.50, Spin Rate: 1625.28, Spin Dir: 267.34, Score: 0.143
Pitch Type: Cutter            , px: 0.6272, pz: 2.5024, Speed: 90.15, Spin Rate: 1445.17, Spin Dir: 185.03, Score: 0.074
Pitch Type: Knuckle Curve     , px: 0.3175, pz: 1.6945, Speed: 81.72, Spin Rate: 1628.76, Spin Dir: 113.74, Score: 0.022
Pitch Type: Splitter          , px: -0.4116, pz: 1.7036, Speed: 86.14, Spin Rate: 1774.74, Spin Dir: 256.50, Score: -0.007
Pitch Type: Changeup          , px: -0.3410, pz: 2.2053, Speed: 86.44, Spin Rate: 2029.25, Spin Dir: 245.77, Score: -0.123
Pitch Type: Four-Seam Fastball, px: 0.4606, pz: 2.5793, Speed: 95.29, Spin Rate: 2428.36, Spin Dir: 211.29, Score: -0.123
Pitch Type: Two-Seam Fastball , px: -0.4923, pz: 2.0240, Speed: 94.42, Spin Rate: 2418.53, Spin Dir: 241.51, Score: -0.175
Pitch Type: Sinker            , px: -0.5583, pz: 2.7942, Speed: 94.92, Spin Rate: 2369.62, Spin Dir: 244.67, Score: -0.197
Pitch Type: Slider     

In [65]:
args = (model, context_df, "FF", scaler)
x = [94.47, 2428.34, 211.83, -0.1, 0.6653]
print(-objective(x, args[0], args[1], args[2], args[3]))

0.7539949417114258


In [19]:
!ls

README.md  scaler.pkl  scripts	source_files


In [None]:
combined