# Explore the Transformed Play by Play Data

## Setup - Paths - Dependencies

In [19]:
from config import recent_play_by_play, recent_clean_db

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Load Play by Play data
pbp_raw_df = pd.read_csv(recent_play_by_play, low_memory=False)

# pbp_raw_df.head()


In [20]:
# Rename to df for convenience
df = pbp_raw_df

# Filter for relevant events: Faceoffs and Goals
faceoff_events = df[df['Event_type'] == 'Faceoff']
goal_events = df[df['Event_type'] == 'Goal']

# Merge faceoff events with subsequent goal events within the same game and period
merged_df = pd.merge(
    faceoff_events[['Game_ID', 'Period', 'Time', 'Primary_team']],
    goal_events[['Game_ID', 'Period', 'Time', 'Primary_team']],
    on=['Game_ID', 'Period'],
    suffixes=('_faceoff', '_goal')
)

# Calculate the time difference between faceoff and goal
merged_df['time_diff'] = merged_df['Time_goal'] - merged_df['Time_faceoff']

# Filter only instances where the goal happens after the faceoff
merged_df = merged_df[merged_df['time_diff'] > 0]


# Count how often a goal is scored within 10 seconds and 5 seconds of a faceoff
goals_within_10s = (merged_df['time_diff'] <= 10).sum()
goals_within_5s = (merged_df['time_diff'] <= 5).sum()

# Count the number of times each team has scored a goal within 5 seconds of a faceoff
teams_scoring_within_5s = merged_df[merged_df['time_diff'] <= 5]['Primary_team_goal'].value_counts()

# Display results
# goals_within_10s, goals_within_5s, teams_scoring_within_5s


## Data Elporation

### Faceoff Danger
- Table of how many times a team has scored within 3,5, 7, and 10 seconds of a faceoff

In [21]:
# Recalculate the time difference properly
merged_df = pd.merge(
    faceoff_events[['Game_ID', 'Period', 'Time', 'Primary_team']],
    goal_events[['Game_ID', 'Period', 'Time', 'Primary_team']],
    on=['Game_ID', 'Period'],
    suffixes=('_faceoff', '_goal')
)

# Ensure that the goal occurs after the faceoff in time
merged_df = merged_df[merged_df['Time_goal'] > merged_df['Time_faceoff']]

# Calculate the correct time difference between faceoff and goal
merged_df['time_diff'] = merged_df['Time_goal'] - merged_df['Time_faceoff']

# Count goals within specific time frames
goals_within_10s = (merged_df['time_diff'] <= 10).sum()
goals_within_5s = (merged_df['time_diff'] <= 5).sum()

# Count goals per team in different time frames
teams_goal_counts = merged_df.groupby('Primary_team_goal')['time_diff'].agg(
    within_3s=lambda x: (x <= 3).sum(),
    within_5s=lambda x: (x <= 5).sum(),
    within_7s=lambda x: (x <= 7).sum(),
    within_10s=lambda x: (x <= 10).sum()
).reset_index()

# Add the total goals scored by each team
total_goals_per_team = goal_events['Primary_team'].value_counts().reset_index()
total_goals_per_team.columns = ['Primary_team_goal', 'Total_Goals']

# Merge with the team goal counts
teams_goal_counts = teams_goal_counts.merge(total_goals_per_team, on='Primary_team_goal', how='left')



In [22]:
teams_goal_counts.head()

Unnamed: 0,Primary_team_goal,within_3s,within_5s,within_7s,within_10s,Total_Goals
0,Air Force,0,2,4,8,55
1,Alaska,1,4,6,9,54
2,Alaska Anchorage,0,0,1,2,55
3,American Intl,0,1,1,4,57
4,Arizona State,0,1,1,2,90


In [23]:
### Calulate percenta of total goals of each goal duration
wintin_3s_pct = teams_goal_counts['within_3s'] / teams_goal_counts['Total_Goals']
within_5s_pct = teams_goal_counts['within_5s'] / teams_goal_counts['Total_Goals']
within_7s_pct = teams_goal_counts['within_7s'] / teams_goal_counts['Total_Goals']
within_10s_pct = teams_goal_counts['within_10s'] / teams_goal_counts['Total_Goals']
teams_goal_counts['within_3s_pct'] = wintin_3s_pct
teams_goal_counts['within_5s_pct'] = within_5s_pct
teams_goal_counts['within_7s_pct'] = within_7s_pct
teams_goal_counts['within_10s_pct'] = within_10s_pct

# Rearrange columns
teams_goal_counts = teams_goal_counts[['Primary_team_goal', 'Total_Goals', 'within_3s', 'within_3s_pct', 'within_5s', 'within_5s_pct', 'within_7s', 'within_7s_pct', 'within_10s', 'within_10s_pct']]
teams_goal_counts.head()

Unnamed: 0,Primary_team_goal,Total_Goals,within_3s,within_3s_pct,within_5s,within_5s_pct,within_7s,within_7s_pct,within_10s,within_10s_pct
0,Air Force,55,0,0.0,2,0.036364,4,0.072727,8,0.145455
1,Alaska,54,1,0.018519,4,0.074074,6,0.111111,9,0.166667
2,Alaska Anchorage,55,0,0.0,0,0.0,1,0.018182,2,0.036364
3,American Intl,57,0,0.0,1,0.017544,1,0.017544,4,0.070175
4,Arizona State,90,0,0.0,1,0.011111,1,0.011111,2,0.022222


In [24]:
##3 Sort by Most within 5 seconds
teams_goal_counts = teams_goal_counts.sort_values(by='within_5s', ascending=False)
teams_goal_counts.head(10)

teams_goal_counts = teams_goal_counts.sort_values(by='within_3s', ascending=False)
teams_goal_counts.head(10)

within_7s = teams_goal_counts.sort_values(by='within_7s', ascending=False)
within_7s.head(10)

within_10s = teams_goal_counts.sort_values(by='within_10s', ascending=False)
within_10s.head(10)

Unnamed: 0,Primary_team_goal,Total_Goals,within_3s,within_3s_pct,within_5s,within_5s_pct,within_7s,within_7s_pct,within_10s,within_10s_pct
35,Michigan State,94,2,0.021277,6,0.06383,8,0.085106,18,0.191489
64,Wisconsin,79,2,0.025316,4,0.050633,11,0.139241,15,0.189873
27,Long Island,78,3,0.038462,4,0.051282,10,0.128205,14,0.179487
37,Minnesota,111,1,0.009009,2,0.018018,4,0.036036,13,0.117117
46,Ohio State,82,1,0.012195,2,0.02439,5,0.060976,12,0.146341
55,Sacred Heart,91,5,0.054945,6,0.065934,6,0.065934,11,0.120879
54,Robert Morris,86,1,0.011628,6,0.069767,8,0.093023,11,0.127907
15,Clarkson,73,0,0.0,5,0.068493,5,0.068493,11,0.150685
63,Western Michigan,80,1,0.0125,3,0.0375,5,0.0625,10,0.125
5,Army,73,0,0.0,2,0.027397,4,0.054795,9,0.123288
