In [2]:
import pandas as pd 
import numpy as np 
import datetime
import sqlite3 
import pybaseball as pyb #Pybaseball is where we will be pulling data from

In [7]:
# Get today's date 
today = datetime.date.today()
#turn the output of the date to a string to be able to pass through the statcast function
today_str = today.strftime('%Y-%m-%d')  
#This start_dt will be the starting date of spring training for the 2023 season 
start_dt_od = '2023-03-30'


In [None]:
#Get all data leading from Opening Day to the current date from pybaseball's statcast function 
data = pyb.statcast(start_dt=start_dt_od, end_dt=today_str, team=None, verbose=True, parallel=True)

#Using an arbitray set of dates to test to make sure the functions work and data can be written to the databse - this line is only included to test the code out

In [None]:
#data = pyb.statcast(start_dt = '2022-05-17', end_dt = '2022-05-18', team = None, verbose = True, parallel = True) 

#This is a function to add some aditional things I want in the pitch by pich data

In [19]:
def add_to_savant(df):
    # Modify 'pfx_x' and 'pfx_z' columns to get them in inches 
    df['pfx_x'] = df['pfx_x'].apply(lambda x: x * -12)
    df['pfx_z'] = df['pfx_z'].apply(lambda x: x * 12)
    
    # Create 'hard_hit' column
    df['launch_speed'] = df['launch_speed'].fillna(False)
    df['hard_hit'] = df['launch_speed'].apply(lambda x: 1 if x > 95 else 0)

    # Create 'sweet_spot' column
    df['launch_angle'] = df['launch_angle'].fillna(False)
    df['sweet_spot'] = df['launch_angle'].apply(lambda x: 1 if (x >= 8) and (x <= 32) else 0)

    # Add 'VAA' column
    vy_f = -1 * np.sqrt(df['vx0']**2 + df['vy0']**2) 
    t = (vy_f - df['vy0']) / df['ay']
    vz_f = df['vz0'] + df['az'] * t
    df['VAA'] = np.round(-1 * np.arctan(vz_f / vy_f) * (180 / np.pi), 2)
    
    # Add 'count' column
    df['count'] = df['balls'].astype(str) + '-' + df['strikes'].astype(str)

    # Add 'count_type' column  
    df['count_type'] = ''
    df.loc[df['count'] == '1-0', '2-0','3-0','2-1','3-1','3-2'] = 'hitter'
    df.loc[df['count'] == '0-1', '0-2','1-2'] = 'pitcher'
    df.loc[df['count'] == '0-0', '1-1','2-2'] = 'even'

    return df

#These are the column names I will be changing to make querying quicker for me

In [20]:
# Define a dictionary with the column name changes
column_names_dict = {
    'release_speed': 'velo',
    'release_pos_x': 'release_side',
    'release_pos_z': 'release_height',
    'game_date': 'date',
    'pf_x_z': 'ind_vert_break',
    'pf_x_x': 'horizontal_break',
    'launch_speed': 'exit_velo',
    
    # Add more column names as needed
}

#Function to apply column name changes to the data

In [21]:
def rename_statcast_columns(data, column_names_dict):
    """
    Rename columns from the Statcast data that was pulled from Savant

    Parameters:
        data (pd.DataFrame): Statcast data from Savant
        column_names_dict (dict): a dictionary containing the current column names as keys
                                  and the new column names as values

    Returns:
        pd.DataFrame: the modified DataFrame with the renamed columns
    """
    data = data.rename(columns=column_names_dict)
    return data

In [22]:
# Run the downloaded data from pybaseball for the day through the 'add_to_savant' function
modified_data = add_to_savant(data)

In [None]:
# Run the new dataframe (modified_data) that contains the new columns from the 'add_to_savant' function through 
# the function that renames the columns
data_cleaned = rename_statcast_columns(modified_data, column_names_dict) 
print(data_cleaned.head())

#We want to clear out any data in the database table before we load anything new. This is to ensure that if Statcast makes any changes we will always have the most up to date information - thanks to Jeremy Maschino for this suggestion

In [24]:
# Connect to the database
conn = sqlite3.connect('2023Statcast.db')

# Create a cursor object to execute SQL statements
cur = conn.cursor()

# Execute the DELETE statement to remove all data from the table
cur.execute('DELETE FROM statcast_data_2023')

# Commit the changes to the database
conn.commit()

# Close the cursor and database connections
cur.close()
conn.close()

#We will now put all of our updated data into the table 'statcast_data_2023' into the '2023Statcast' database

In [25]:
#Connect to the SQLLite database 2023Statcast
conn = sqlite3.connect('2023Statcast.db')

#define the table name
table_name = 'statcast_data_2023'
#if the table name exists then append the data on it
data_cleaned.to_sql(table_name, conn, if_exists='append', index=False)

# Close the connection
conn.close()
