<a href="https://colab.research.google.com/github/JColeman1550/hammer_points/blob/main/Hammer_Points.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import pandas as pd

from google.colab import drive

import os

# Mount Google Drive
drive.mount('/content/drive')


# Define file paths
file_paths = [
    '/content/drive/MyDrive/Hammers/2018_3_innings.csv',
    '/content/drive/MyDrive/Hammers/2019_3_innings.csv',
    '/content/drive/MyDrive/Hammers/2020_3_innings.csv',
    '/content/drive/MyDrive/Hammers/2021_3_innings.csv',
    '/content/drive/MyDrive/Hammers/2022_3_inning.csv',
    '/content/drive/MyDrive/Hammers/2023_3_inning.csv'
]

# Create a directory in Google Drive
working_directory = '/content/drive/My Drive/HammerData'
os.makedirs(working_directory, exist_ok=True)

# Copy files to the working directory
for file_path in file_paths:
    file_name = os.path.basename(file_path)
    destination_path = os.path.join(working_directory, file_name)
    !cp "$file_path" "$destination_path"

# Change the current working directory to the newly created directory
os.chdir(working_directory)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [13]:
# Define file paths within the working directory in Google Drive
hammer_files = [
    '/content/drive/MyDrive/Hammers/2018_3_innings.csv',
    '/content/drive/MyDrive/Hammers/2019_3_innings.csv',
    '/content/drive/MyDrive/Hammers/2020_3_innings.csv',
    '/content/drive/MyDrive/Hammers/2021_3_innings.csv',
    '/content/drive/MyDrive/Hammers/2022_3_inning.csv',
    '/content/drive/MyDrive/Hammers/2023_3_inning.csv'
]

# Create an empty list to store the data from each CSV file
data_list = []

# Load and store data from all hammer files
for hammer_file in hammer_files:
    data = pd.read_csv(hammer_file)
    data_list.append(data)

# Concatenate all data frames into one
combined_data = pd.concat(data_list, ignore_index=True)

In [None]:
combined_data

In [None]:
# Extract unique event types
unique_events = combined_data['events'].unique()

# Create individual columns for each event type
for event in unique_events:
    combined_data[event] = 0

# Iterate through the rows and populate the event columns
for index, row in combined_data.iterrows():
    events = row['events'].split(',')
    for event in events:
        combined_data.at[index, event] += 1

# Define the point system
point_system = {
    'double': 1,
    'triple': 1,
    'walk': 1,
    'hit_by_pitch': 1,
    'home_run': 2,
    'single': 0.5,
    'strikeout': -1,
    'strikeout_double_play': -1
}

# Calculate "Hammer Points" for each row
combined_data['Hammer Points'] = combined_data.apply(
    lambda row: sum(point_system[event] * row[event] for event in unique_events if event in point_system),
    axis=1
)

# Sort the DataFrame in ascending order of Hammer Points (rows with fewer points are better)
combined_data = combined_data.sort_values(by='Hammer Points', ascending=True)

# Save the DataFrame to a new CSV file
combined_data.to_csv('HammerPoints.csv', index=False)

# Display the top rows with the least number of Hammer Points
print(combined_data.head(10))

In [None]:
# Create a function to calculate and display the total Hammer Points for a specific pitcher
def display_total_hammer_points(pitcher_name):
    pitcher_data = combined_data[combined_data['player_name'] == pitcher_name]

    if pitcher_data.empty:
        print(f"No data found for pitcher '{pitcher_name}'.")
    else:
        total_points = pitcher_data['Hammer Points'].sum()  # Sum of all seasons
        total_points_per_season = total_points / 5  # Divide by 5 seasons
        total_points_per_season = round(total_points_per_season, 2)
        print(f"Total Hammer Points Per Season (2018-23) '{pitcher_name}': {total_points_per_season:.2f}")

# Interactive loop (Last, First)
while True:
    pitcher_name = input("\nEnter the name of the pitcher (or 'exit' to quit): ").strip()

    if pitcher_name.lower() == 'exit':
        break

    display_total_hammer_points(pitcher_name)

In [18]:
#Packages for scatterplot

%%capture
!pip install plotly
import plotly.express as px
import matplotlib.pyplot as plt

In [20]:
# Calculate total Hammer Points for each pitcher
pitcher_totals = combined_data.groupby('player_name')['Hammer Points'].sum() / 5   # 5 full seasons
pitcher_totals = pitcher_totals.reset_index()  # Reset index to include pitcher names as a column



# interactive scatterplot using Plotly
fig = px.scatter(pitcher_totals, x='Hammer Points', y='player_name', title='Hammer Points Per Season (2019-2023)',
                 hover_name='player_name', hover_data=['Hammer Points'])


# hover template to display Hammer Points directly
fig.update_traces(
    hovertemplate='<br>'.join([
        'Pitcher: %{hovertext}',
        'Avg HP: %{x:.2f}',  # Display Hammer Points directly
    ])
)



fig.update_xaxes(showticklabels=True, title='HP Per Season (2018-23)')
fig.update_yaxes(showticklabels=False,title=None)

fig.show()

In [22]:
# Calculate the average Hammer Points
average_hammer_points = combined_data['Hammer Points'].mean()

print(f"League average Hammer Points (2018-23): {average_hammer_points:.2f}")

League average Hammer Points (2018-23): 0.09
