In [2]:
# Gather data to determine how often the winner wins by OVER 1.5 goals

import pandas as pd
import numpy as np

url = "https://www.hockey-reference.com/leagues/NHL_2023_games.html"
nhlHTML=pd.read_html(url)

df = nhlHTML[0]
df.columns = df.columns.str.replace("."," ", regex=False)
df.rename(columns = {'G':'Visitor G', 'G 1':'Home G', 'Unnamed: 5': 'OT/SO'}, inplace = True)
df["OT/SO"] = df["OT/SO"].fillna('No')
df

Unnamed: 0,Date,Visitor,Visitor G,Home,Home G,OT/SO,Att,LOG,Notes
0,2022-10-07,San Jose Sharks,1.0,Nashville Predators,4.0,No,16648.0,2:43,"at (Prague, CZ)"
1,2022-10-08,Nashville Predators,3.0,San Jose Sharks,2.0,No,17023.0,2:33,"at (Prague, CZ)"
2,2022-10-11,Vegas Golden Knights,4.0,Los Angeles Kings,3.0,No,18230.0,2:31,
3,2022-10-11,Tampa Bay Lightning,1.0,New York Rangers,3.0,No,18006.0,2:21,
4,2022-10-12,Seattle Kraken,4.0,Anaheim Ducks,5.0,OT,17530.0,2:28,
...,...,...,...,...,...,...,...,...,...
1307,2023-04-13,Vegas Golden Knights,,Seattle Kraken,,No,,,
1308,2023-04-13,Detroit Red Wings,,Tampa Bay Lightning,,No,,,
1309,2023-04-13,New Jersey Devils,,Washington Capitals,,No,,,
1310,2023-04-14,Buffalo Sabres,,Columbus Blue Jackets,,No,,,


In [19]:
import requests

header = {
  "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
  "X-Requested-With": "XMLHttpRequest"
}


oddsURL = 'https://checkbestodds.com/hockey-odds/archive-nhl/?'
r = requests.get(oddsURL, headers=header)

oddsHTML=pd.read_html(r.text)
oddsDf = oddsHTML[0]

oddsDf.columns = ['game', 'home', 'tie?', 'away']
oddsDf = oddsDf.loc[oddsDf['home'] != 1]

oddsDf

Unnamed: 0,game,home,tie?,away
1,20:00 Nashville Predators - San Jose Sharks,1.97,4.70,3.54
3,20:00 San Jose Sharks - Nashville Predators,3.45,4.80,1.98
5,01:30 NY Rangers - Tampa Bay Lightning,2.38,4.45,2.78
6,04:00 Los Angeles Kings - Vegas Golden Knights,2.36,4.35,2.50
8,01:00 Carolina Hurricanes - Columbus Blue Jackets,1.62,5.05,4.50
...,...,...,...,...
1214,21:00 Seattle Kraken - Edmonton Oilers,2.85,4.75,2.14
1215,22:00 Philadelphia Flyers - Carolina Hurricanes,4.33,4.85,1.70
1216,23:00 Florida Panthers - New Jersey Devils,2.33,4.65,2.55
1218,00:00 Tampa Bay Lightning - Montreal Canadiens,1.52,5.60,4.75


In [75]:
# Clean data down to only necessary data

nhlData = df[["Date", "Visitor", "Visitor G", "Home", "Home G", "OT/SO"]]
nhlData = nhlData.dropna()
nhlData

Unnamed: 0,Date,Visitor,Visitor G,Home,Home G,OT/SO
0,2022-10-07,San Jose Sharks,1.0,Nashville Predators,4.0,No
1,2022-10-08,Nashville Predators,3.0,San Jose Sharks,2.0,No
2,2022-10-11,Vegas Golden Knights,4.0,Los Angeles Kings,3.0,No
3,2022-10-11,Tampa Bay Lightning,1.0,New York Rangers,3.0,No
4,2022-10-12,Seattle Kraken,4.0,Anaheim Ducks,5.0,OT
...,...,...,...,...,...,...
1087,2023-03-16,Boston Bruins,3.0,Winnipeg Jets,0.0,No
1088,2023-03-17,Columbus Blue Jackets,4.0,Anaheim Ducks,7.0,No
1089,2023-03-17,Buffalo Sabres,2.0,Philadelphia Flyers,5.0,No
1090,2023-03-17,Carolina Hurricanes,2.0,Toronto Maple Leafs,5.0,No


In [76]:
# Gather the goal differential

nhlData['Diff'] = nhlData['Visitor G'] - nhlData['Home G']
nhlData['Diff'] = nhlData['Diff'].abs()
nhlData

Unnamed: 0,Date,Visitor,Visitor G,Home,Home G,OT/SO,Diff
0,2022-10-07,San Jose Sharks,1.0,Nashville Predators,4.0,No,3.0
1,2022-10-08,Nashville Predators,3.0,San Jose Sharks,2.0,No,1.0
2,2022-10-11,Vegas Golden Knights,4.0,Los Angeles Kings,3.0,No,1.0
3,2022-10-11,Tampa Bay Lightning,1.0,New York Rangers,3.0,No,2.0
4,2022-10-12,Seattle Kraken,4.0,Anaheim Ducks,5.0,OT,1.0
...,...,...,...,...,...,...,...
1087,2023-03-16,Boston Bruins,3.0,Winnipeg Jets,0.0,No,3.0
1088,2023-03-17,Columbus Blue Jackets,4.0,Anaheim Ducks,7.0,No,3.0
1089,2023-03-17,Buffalo Sabres,2.0,Philadelphia Flyers,5.0,No,3.0
1090,2023-03-17,Carolina Hurricanes,2.0,Toronto Maple Leafs,5.0,No,3.0


In [78]:
# RESULT # 

# Gather data to determine how often the winner wins by OVER 1.5 goals

result = (nhlData['Diff']>1).sum()
percent = (result / len(nhlData.index)) * 100
print("Percent of nhl games where the winner won by OVER 1.5 goals: " + str(percent))

Percent of nhl games where the winner won by OVER 1.5 goals: 59.43223443223443


In [82]:
# Percent of games that did not go into OT that won by OVER 1.5 goals:

noOT = nhlData.loc[nhlData['OT/SO'] == 'No']

result = (noOT['Diff']>1).sum()
percent = (result / len(noOT.index)) * 100
print("Percent of nhl games that did not go into OT where the winner won by OVER 1.5 goals: " + str(percent))

Percent of nhl games that did not go into OT where the winner won by OVER 1.5 goals: 77.26190476190476
