In [None]:
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt
import numpy as np

In [None]:
shots = pd.read_csv('../data/shots_2007-2023.csv')

In [None]:
shots.event.value_counts()

In [None]:
shots.head()

In [None]:
just_shots = shots.loc[shots['event'] == 'SHOT'].reset_index(drop = True)

In [None]:
reg_szn_shots = just_shots.loc[just_shots['isPlayoffGame'] == 0]

In [None]:
reg_szn_shots.head()

In [None]:
shots.shape

In [None]:
shots['teamCode'] = shots['teamCode'].str.replace('S.J', 'SJS')

In [None]:
shots['teamCode'] = shots['teamCode'].str.replace('T.B', 'TBL')

In [None]:
shots['teamCode'] = shots['teamCode'].str.replace('N.J', 'NJD')

In [None]:
shots['teamCode'] = shots['teamCode'].str.replace('L.A', 'LAK')

How many goals are there per season/playoffs?

In [None]:
goals = shots.loc[shots['event'] == 'GOAL'].reset_index(drop = True)

In [None]:
goals.head()

In [None]:
reg_szn_goals = goals.loc[goals['isPlayoffGame'] == 0]

In [None]:
goals_per_season = reg_szn_goals.groupby(['season'])['event'].value_counts().reset_index()

In [None]:
goals_per_season

In [None]:
goals_per_season.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Goals Scored", fontsize=10)
plt.title("Goals Scored Per NHL Regular Season 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Goals Scored Per Regular Season.png', bbox_inches='tight')
plt.show();

NOTES: 

    1) 2012 only 48 games were played total due to a Lockout
    
    2) 2017 NHL Expansion adding VGK
    
    3) 2019 due to Covid-19 teams played between 68-71 games (Playoffs included Qualifying Round for teams)
    

In [None]:
po_goals = goals.loc[goals['isPlayoffGame'] == 1]

In [None]:
po_goals.loc[po_goals['season'] == 2023]

In [None]:
po_goals_zone = po_goals.groupby(['season', 'homeTeamCode', 'awayTeamCode'])['event'].value_counts().reset_index()

In [None]:
po_goals_zone.loc[po_goals_zone['homeTeamCode'] == 'NSH'].sort_values(by = 'count', ascending = False)

In [None]:
po_goals_zone.loc[po_goals_zone['awayTeamCode'] == 'NSH'].sort_values(by = 'count', ascending = False)

In [None]:
goals_per_po = po_goals.groupby(['season'])['event'].value_counts().reset_index()

In [None]:
goals_per_po

In [None]:
goals_per_po.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Goals Scored", fontsize=10)
plt.title("Goals Scored Per Stanley Cup Playoff 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Goals Scored Per SCP.png', bbox_inches='tight')
plt.show();

In [None]:
most_po_goals = po_goals.loc[po_goals['season'] == 2019]

In [None]:
most_po_goals.groupby(['teamCode'])['event'].value_counts().reset_index()

In [None]:
po_goal_inv = most_po_goals[['homeTeamCode', 'awayTeamCode', 'teamCode', 'event', 'isPlayoffGame']]

In [None]:
po_goal_inv.loc[po_goal_inv['teamCode'] == 'ARI']

How many goals (per season) has each team scored?

In [None]:
reg_szn_goals_by_team = reg_szn_goals.groupby(['teamCode'])['event'].value_counts().reset_index()

In [None]:
reg_szn_goals_order = reg_szn_goals_by_team.sort_values(by = 'count', ascending = False).reset_index(drop = True)

In [None]:
reg_szn_goals_order

In [None]:
reg_szn_goals_order.head(10).plot(x = 'teamCode', y = 'count', kind = 'bar')
plt.xlabel("Team Abbreviations", fontsize=10)
plt.ylabel("Goals Scored", fontsize=10)
plt.title("Teams With the Most Goals Scored From 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Teams With Most Goals.png', bbox_inches='tight')
plt.show();

In [None]:
reg_szn_goals_order.tail(10).plot(x = 'teamCode', y = 'count', kind = 'bar')
plt.xlabel("Team Names", fontsize=10)
plt.ylabel("Goals Scored", fontsize=10)
plt.title("Teams With the Least Goals Scored From 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Teams With Least Goals.png', bbox_inches='tight')
plt.show;

What is the overall scoring percentage across the whole dataset? How does it look like broken down by year? Playoffs?

In [None]:
shot_num = just_shots.shape[0]
print(shot_num)

In [None]:
goals_num = goals.shape[0]
print(goals_num)

In [None]:
(goals_num/shot_num) *100

In [None]:
po_just_shots = just_shots.loc[just_shots["isPlayoffGame"] == 1]

In [None]:
po_js_num = po_just_shots.shape[0]

In [None]:
print(po_js_num)

In [None]:
po_just_goals = goals.loc[goals["isPlayoffGame"] == 1]

In [None]:
po_jg_num = po_just_goals.shape[0]

In [None]:
print(po_jg_num)

In [None]:
(po_jg_num/po_js_num) *100

In [None]:
player_goals_rs = reg_szn_goals.groupby(['shooterName'])['event'].value_counts().reset_index()

In [None]:
player_goals_rs

In [None]:
player_goals_rs = player_goals_rs.sort_values(by = 'count', ascending = False).reset_index(drop=True)

In [None]:
player_goals_rs.head(5)

In [None]:
player_goals_rs.head(10).plot(x = 'shooterName', y = 'count', kind = 'bar')
plt.xlabel("Player Name", fontsize=10)
plt.ylabel("Goals Scored", fontsize=10)
plt.title("Players With the Most Goals Scored From 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Active Players With Most Goals.png', bbox_inches='tight')
plt.show;

In [None]:
po_goals_player = po_goals.groupby(['shooterName'])['event'].value_counts().reset_index()

In [None]:
player_goals_po = po_goals_player.sort_values(by = 'count', ascending = False).reset_index(drop=True)

In [None]:
player_goals_po.head(5)

In [None]:
player_goals_po.head(10).plot(x = 'shooterName', y = 'count', kind = 'bar')
plt.xlabel("Player Name", fontsize=10)
plt.ylabel("Goals Scored", fontsize=10)
plt.title("Players With the Most Playoff Goals Scored From 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Active Players With Most Playoff Goals.png', bbox_inches='tight')
plt.show;

Trendline: 

z = np.polyfit(x, y, 1)

p = np.poly1d(z)

plt.plot(x, p(x), color='red', label='Trendline')


In [None]:
Ovi = reg_szn_goals.loc[reg_szn_goals['shooterName'] == 'Alex Ovechkin']

In [None]:
Ovi.head(3)

In [None]:
Ovi.event.value_counts()

In [None]:
Ovi_goals_szn = Ovi.groupby(['season'])['event'].value_counts().reset_index()

In [None]:
Ovi_goals_szn

In [None]:
Ovi_goals_szn.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Goals Scored", fontsize=10)
plt.title("Ovechkin Goals Per Season 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Ovi Goals.png', bbox_inches='tight')
plt.show();

In [None]:
Ovi_po = po_goals.loc[po_goals['shooterName'] == 'Alex Ovechkin']

In [None]:
Ovi_po.head(3)

In [None]:
Ovi_po_goals = Ovi_po.groupby(['season'])['event'].value_counts().reset_index()

In [None]:
Ovi_po_goals

In [None]:
Ovi_po_goals.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Goals Scored", fontsize=10)
plt.title("Ovechkin Goals Per Stanley Cup Playoffs 2007-2021")
plt.legend("", frameon = False)
#plt.savefig('Ovi Playoff Goals.png', bbox_inches='tight')
plt.show();

In [None]:
reg = reg_szn_goals[['season', 'teamCode', 'event']]
reg.head()

In [None]:
t_s_g = reg.groupby(['season'])['teamCode'].value_counts().reset_index()

In [None]:
avg_g_year = t_s_g.groupby(['teamCode'])['count'].mean().reset_index()

In [None]:
mean_team_season = t_s_g.groupby(['season', 'teamCode'])['count'].mean().reset_index()

In [None]:
mean_team_season.head()

In [None]:
mean_team_season

In [None]:
mts = mean_team_season.sort_values(by = 'count', ascending = False)

In [None]:
mts.head(5)

In [None]:
avg_g_year.sort_values(by = 'count', ascending = False)

In [None]:
reg_szn_goals.groupby(['teamCode'])['event'].value_counts().reset_index()

In [None]:
st_count = reg_szn_goals.groupby(['season'])['shotType'].value_counts().reset_index()

In [None]:
st_count.head()

In [None]:
esst = reg_szn_goals[['event', 'season', 'shotType']]

In [None]:
type_cps = esst.groupby(['season'])['shotType'].value_counts().reset_index()

In [None]:
type_cps.tail(14)

In [None]:
most_per_year = type_cps.groupby(['season'])['count'].max().reset_index()

In [None]:
most_per_year

In [None]:
count_type = pd.merge(most_per_year, type_cps, 
                               left_on = 'count', right_on = 'count', 
                               how = 'left')
count_type.head(19)

In [None]:
count_type.plot(x = 'season_x', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Shot Attempts", fontsize=10)
plt.title("Most Shot Attempts Each Year")
plt.legend("", frameon = False)
#plt.savefig('Most Shot Attempts.png', bbox_inches='tight')
plt.show();

In [None]:
WRIST = st_count.loc[st_count['shotType'] == 'WRIST']

In [None]:
WRIST.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Shot Attempts", fontsize=10)
plt.title("Wrist Shot Attempts Each Year")
plt.legend("", frameon = False)
#plt.savefig('Wrist Shot Attempts.png', bbox_inches='tight')
plt.show();

In [None]:
SLAP = st_count.loc[st_count['shotType'] == 'SLAP']

In [None]:
SLAP.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Shot Attempts", fontsize=10)
plt.title("Slap Shots Per Season 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Slap Shots Per Year.png', bbox_inches='tight')
plt.show();

In [None]:
WRAP = st_count.loc[st_count['shotType'] == 'WRAP']

In [None]:
WRAP.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Shot Attempts", fontsize=10)
plt.title("Wrap Around Attempts Per Season 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Wrap Around Attempts.png', bbox_inches='tight')
plt.show();

In [None]:
SNAP = st_count.loc[st_count['shotType'] == 'SNAP']

In [None]:
SNAP.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Shot Attempts", fontsize=10)
plt.title("Snap Shots Per Season 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Snap Shots Attempts.png', bbox_inches='tight')
plt.show();

In [None]:
DEFL = st_count.loc[st_count['shotType'] == 'DEFL']

In [None]:
DEFL.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Shots Deflected", fontsize=10)
plt.title("Deflected Shots Per Season 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Deflected Shots.png', bbox_inches='tight')
plt.show();

In [None]:
TIP = st_count.loc[st_count['shotType'] == 'TIP']

In [None]:
TIP.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Shots Tipped", fontsize=10)
plt.title("Tipped Shots Per Season 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Tip Time.png', bbox_inches='tight')
plt.show();

In [None]:
BACK = st_count.loc[st_count['shotType'] == 'BACK']

In [None]:
BACK.plot(x = 'season', y = 'count', kind = 'line')
plt.xlabel("Season", fontsize=10)
plt.ylabel("Backhand Shots", fontsize=10)
plt.title("Backhand Shots Per Season 2007-2023")
plt.legend("", frameon = False)
#plt.savefig('Backhand.png', bbox_inches='tight')
plt.show();