In [1]:
import pandas as pd 
import numpy as np 
import datetime
import sqlite3 
import pybaseball as pyb #Pybaseball is where we will be pulling data from

In [3]:
# Calculate yesterday's date
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)

In [None]:
#turn the output of the date to a string to be able to pass through the statcast function
yesterday_str = yesterday.strftime('%Y-%m-%d')
#Get yesterdays data from pybaseball's statcast function 
data = pyb.statcast(start_dt=yesterday_str, end_dt=None, team=None, verbose=True, parallel=True)

In [73]:
def add_to_savant(df):
    # Modify 'pfx_x' and 'pfx_z' columns to get them in inches 
    df['pfx_x'] = df['pfx_x'].apply(lambda x: x * -12)
    df['pfx_z'] = df['pfx_z'].apply(lambda x: x * 12)
    
    # Create 'hard_hit' column
    df['launch_speed'] = df['launch_speed'].fillna(False)
    df['hard_hit'] = df['launch_speed'].apply(lambda x: 1 if x > 95 else 0)

    # Create 'sweet_spot' column
    df['launch_angle'] = df['launch_angle'].fillna(False)
    df['sweet_spot'] = df['launch_angle'].apply(lambda x: 1 if (x >= 8) and (x <= 32) else 0)

    # Add 'VAA' column
    vy_f = -1 * np.sqrt(df['vx0']**2 + df['vy0']**2) 
    t = (vy_f - df['vy0']) / df['ay']
    vz_f = df['vz0'] + df['az'] * t
    df['VAA'] = np.round(-1 * np.arctan(vz_f / vy_f) * (180 / np.pi), 2)
    
    return df

In [40]:
# Define a dictionary with the column name changes
column_names_dict = {
    'release_speed': 'velo',
    'release_pos_x': 'release_side',
    'release_pos_z': 'release_height',
    'game_date': 'date',
    'pf_x_z': 'ind_vert_break',
    'pf_x_x': 'horizontal_break',
    'launch_speed': 'exit_velo',
    
    # Add more column names as needed
}

In [46]:
def rename_statcast_columns(data, column_names_dict):
    """
    Rename columns from the Statcast data that was pulled from Savant

    Parameters:
        data (pd.DataFrame): Statcast data from Savant
        column_names_dict (dict): a dictionary containing the current column names as keys
                                  and the new column names as values

    Returns:
        pd.DataFrame: the modified DataFrame with the renamed columns
    """
    data = data.rename(columns=column_names_dict)
    return data

In [None]:
# Run the downloaded data from pybaseball for the day through the 'add_to_savant' function
modified_data = add_to_savant(data)

In [None]:
# Run the new dataframe (modified_data) that contains the new columns from the 'add_to_savant' function through 
# the function that renames the columns
data_cleaned = rename_statcast_columns(modified_data, column_names_dict)

In [None]:
#Connect to the SQLLite database 2023Statcast
conn = sqlite3.connect('2023Statcast.db')

#define the table name
table_name = 'statcast_data_2023'
#if the table name exists then append the data on it
data_cleaned.to_sql(table_name, conn, if_exists='append', index=False)

# Close the connection
conn.close()
