# Load necessary packages

In [9]:
import urllib.request, json 
import pandas as pd
from datetime import datetime, timezone
import time
from dateutil.parser import parse

# Find all the game ids for games being played today

In [10]:
todays_date = str(datetime.today().year) + "-" + str(datetime.today().month) + "-" + str(datetime.today().day)
games_links = f"https://statsapi.web.nhl.com/api/v1/schedule?startDate={todays_date}&endDate={todays_date}"
#games_links = f"https://statsapi.web.nhl.com/api/v1/schedule?startDate=2020-01-02&endDate=2020-01-02"
with urllib.request.urlopen(games_links) as url:
    dates = json.loads(url.read().decode())
num_of_games = dates["totalGames"]
games_id_list = [dates["dates"][0]["games"][i]["gamePk"] for i in range(num_of_games)]
#games_id_list.sort() #Need to order the game id list so the start times are in order.
print(f"Number of Games on {todays_date}:", len(games_id_list))
print("Game Ids: ", games_id_list)

Number of Games on 2020-1-7: 12
Game Ids:  [2019020663, 2019020664, 2019020665, 2019020667, 2019020668, 2019020666, 2019020669, 2019020670, 2019020671, 2019020672, 2019020673, 2019020674]


# Find the difference in seconds between the different start times

In [None]:
#Use this to only look at select games
#games_id_list = [2019020650,2019020651,2019020652]
#games_id_list = games_id_list[:8]
#print(games_id_list)

In [11]:
if len(games_id_list) > 1:
    start_times = []
    game_start_dict = {}
    for game_id in games_id_list:
        with urllib.request.urlopen(f"https://statsapi.web.nhl.com/api/v1/game/{game_id}/feed/live") as url:
            data = json.loads(url.read().decode())
        start_time = data["gameData"]["datetime"]["dateTime"]
        game_start_dict[str(game_id)] = start_time
        start_time = parse(start_time)
        start_times.append(start_time)
        start_times = sorted(start_times)
    delta_seconds_start_times = [(start_times[i+1]- start_times[i]).total_seconds() for i in range(len(start_times)-1)]+ [0]
else:
    delta_seconds_start_times = [0]
# Solution to sorting a dict found here: https://stackoverflow.com/questions/613183/how-do-i-sort-a-dictionary-by-value
game_start_dict = {k : v for k, v in sorted(game_start_dict.items(), key=lambda item: item[1])}
games_id_list =[int(j) for j in [k for k in game_start_dict.keys()]]
print(len(delta_seconds_start_times))
print(delta_seconds_start_times)
#delta_seconds_start_times = [i - 600 for i in delta_seconds_start_times]
#print(delta_seconds_start_times)
print(games_id_list)
#May need to subtract 10 minutes from each delta time. This is 600 seconds.


12
[0.0, 0.0, 0.0, 0.0, 1800.0, 0.0, 1800.0, 0.0, 1800.0, 5400.0, 0.0, 0]
[2019020663, 2019020664, 2019020665, 2019020667, 2019020668, 2019020666, 2019020669, 2019020670, 2019020671, 2019020672, 2019020673, 2019020674]


# Find all the first period stats for the games being played today

In [12]:
#Extract the win, loss, OT records for each team playing.
def _team_records(game_id):  
    """This function is used to extract the team records for the teams playing today."""
    game_link = f"https://statsapi.web.nhl.com/api/v1/game/{game_id}/feed/live"
    with urllib.request.urlopen(game_link) as url:
        data = json.loads(url.read().decode())
    away_team_id = data["gameData"]["teams"]["away"]["id"]
    home_team_id = data["gameData"]["teams"]["home"]["id"]
    for team_id in ([away_team_id] + [home_team_id]):
        team_link = f"https://statsapi.web.nhl.com/api/v1/schedule?teamId={team_id}"
        with urllib.request.urlopen(team_link) as url:
            team_id_data = json.loads(url.read().decode())
        home_record = list(team_id_data["dates"][0]["games"][0]["teams"]["home"]["leagueRecord"].values())[:3]
        away_record = list(team_id_data["dates"][0]["games"][0]["teams"]["away"]["leagueRecord"].values())[:3]
    return(home_record + away_record)

In [13]:
#df.loc[str(game_id)] = home_team + away_team + team_records + home_team_values + away_team_values
def differences(df):
    """Used to calculate the feature differences between the home and away teams, and to
    convert the percentage features to numeric."""
    df["Win_Diff"] = df["Home_wins"] - df["Away_wins"]
    df["Loss_Diff"] = df["Home_losses"] - df["Away_losses"]
    df["OT_Diff"] = df["Home_OT"] - df["Away_OT"]
    df["Goals_Diff"] = df["Home_goals"] - df["Away_goals"]
    df["Shots_Diff"] = df["Home_shots"] - df["Away_shots"]
    df["Blocked_Diff"] = df["Home_blocked"] - df["Away_blocked"]
    df["PIM_Diff"] = df["Home_pim"] - df["Away_pim"]
    df["PowerPlayGoals_Diff"] = df["Home_powerPlayGoals"] - df["Away_powerPlayGoals"]
    df["Takeaways_Diff"] = df["Home_takeaways"] - df["Away_takeaways"]
    df["Giveaways_Diff"] = df["Home_giveaways"] - df["Away_giveaways"]
    df["Hits_Diff"] = df["Home_hits"] - df["Away_hits"]
    df["Home_powerPlayPercentage"] = pd.to_numeric(df["Home_powerPlayPercentage"])/100
    df["Away_powerPlayPercentage"] = pd.to_numeric(df["Away_powerPlayPercentage"])/100
    df["Home_faceOffWinPercentage"] = pd.to_numeric(df["Home_faceOffWinPercentage"])/100
    df["Away_faceOffWinPercentage"] = pd.to_numeric(df["Away_faceOffWinPercentage"])/100
    return(df)

In [14]:
#time.sleep(36000)
game_counter = 0
Start_time_counter = 0
df = pd.DataFrame(columns = [i for i in range(30)])
for game_id in games_id_list:
    Period = 0 
    while Period < 1:
        game_link = f"https://statsapi.web.nhl.com/api/v1/game/{game_id}/feed/live"
        with urllib.request.urlopen(game_link) as url:
            data = json.loads(url.read().decode())
        if data["liveData"]["linescore"]["currentPeriod"] == 1 and data["liveData"]["linescore"]["currentPeriodTimeRemaining"] == "END": 
            team_record = _team_records(game_id)
            some_columns = ["Home_team", "Away_team", "Home_wins", "Home_losses", "Home_OT", "Away_wins", "Away_losses", "Away_OT"]
            home_team_categories = list(data['liveData']['boxscore']['teams']['home']['teamStats']['teamSkaterStats'].keys())
            away_team_categories = list(data['liveData']['boxscore']['teams']['away']['teamStats']['teamSkaterStats'].keys())
            home_team_categories = [f"Home_{i}" for i in home_team_categories]
            away_team_categories = [f"Away_{i}" for i in away_team_categories]
            home_team = [data["gameData"]["teams"]["home"]["triCode"]]
            away_team = [data["gameData"]["teams"]["away"]["triCode"]]
            df.columns = some_columns + home_team_categories + away_team_categories
            away_team_stats = data['liveData']['boxscore']['teams']['away']['teamStats']['teamSkaterStats']
            home_team_stats = data['liveData']['boxscore']['teams']['home']['teamStats']['teamSkaterStats']
            home_team_values = list(home_team_stats.values())
            away_team_values = list(away_team_stats.values())
            df.loc[str(game_id)] = home_team + away_team + team_record + home_team_values + away_team_values
            Period = 1
            game_counter += 1
            df.to_csv(f"C:\\Users\\David\\OneDrive\\Documents\\OneDrive\\NHL API First period Prediction\\{todays_date}_raw.csv", index = True)
            print("Game ", str(game_counter) ,"/", str(len(games_id_list)), f"ID: {game_id} ({away_team}@{home_team}) completed at: ", str(datetime.today().hour), ":", str(datetime.today().minute))
            if delta_seconds_start_times[Start_time_counter] != 0:
                print("Now sleeping for:", str(delta_seconds_start_times[Start_time_counter]/60/60), "hours.")
            time.sleep(delta_seconds_start_times[Start_time_counter])
            Start_time_counter += 1
        else:
            Period = 0
            print("Check Point: ", datetime.today().hour, ":", datetime.today().minute)
            time.sleep(180) 

Game  1 / 12 ID: 2019020663 (['VAN']@['TBL']) completed at:  16 : 47
Check Point:  16 : 47
Game  2 / 12 ID: 2019020664 (['ARI']@['FLA']) completed at:  16 : 50
Game  3 / 12 ID: 2019020665 (['NYI']@['NJD']) completed at:  16 : 50
Game  4 / 12 ID: 2019020667 (['OTT']@['WSH']) completed at:  16 : 50
Check Point:  16 : 50
Game  5 / 12 ID: 2019020668 (['PHI']@['CAR']) completed at:  16 : 53
Now sleeping for: 0.5 hours.
Game  6 / 12 ID: 2019020666 (['COL']@['NYR']) completed at:  17 : 23
Game  7 / 12 ID: 2019020669 (['MTL']@['DET']) completed at:  17 : 23
Now sleeping for: 0.5 hours.
Game  8 / 12 ID: 2019020670 (['SJS']@['STL']) completed at:  17 : 53
Game  9 / 12 ID: 2019020671 (['BOS']@['NSH']) completed at:  17 : 53
Now sleeping for: 0.5 hours.
Game  10 / 12 ID: 2019020672 (['CGY']@['CHI']) completed at:  18 : 23
Now sleeping for: 1.5 hours.
Game  11 / 12 ID: 2019020673 (['PIT']@['VGK']) completed at:  19 : 53
Game  12 / 12 ID: 2019020674 (['CBJ']@['ANA']) completed at:  19 : 53


In [15]:
df_all_features = differences(df)
print(df_all_features)
df_all_features.to_csv(f"C:\\Users\\David\\OneDrive\\Documents\\OneDrive\\NHL API First period Prediction\\{todays_date}_df_all_features.csv", index = True)

           Home_team Away_team Home_wins Home_losses Home_OT Away_wins  \
2019020663       TBL       VAN        24          13       4        23   
2019020664       FLA       ARI        22          15       5        24   
2019020665       NJD       NYI        15          20       6        26   
2019020667       WSH       OTT        29           9       5        16   
2019020668       CAR       PHI        24          16       2        22   
2019020666       NYR       COL        19          18       4        25   
2019020669       DET       MTL        10          30       3        18   
2019020670       STL       SJS        26          10       7        19   
2019020671       NSH       BOS        19          15       7        24   
2019020672       CHI       CGY        19          18       6        22   
2019020673       VGK       PIT        24          15       6        25   
2019020674       ANA       CBJ        17          20       5        20   

           Away_losses Away_OT Home_g

# Report the Winner

In [9]:
#time.sleep(9000)
for game_id in games_id_list:
    game_link = f"https://statsapi.web.nhl.com/api/v1/game/{game_id}/feed/live"
    with urllib.request.urlopen(game_link) as url:
        data = json.loads(url.read().decode())
    home_team = data["gameData"]["teams"]["home"]["triCode"]
    away_team = data["gameData"]["teams"]["away"]["triCode"]
    num_periods = len(data["liveData"]["linescore"]["periods"])
    if data["liveData"]["linescore"]["hasShootout"] == False:
        if sum([int(data["liveData"]["linescore"]["periods"][i]["home"]["goals"]) for i in range(num_periods)]) > \
            sum([int(data["liveData"]["linescore"]["periods"][i]["away"]["goals"]) for i in range(num_periods)]):#2 for the third period, using 0 indexing
            print(f"{home_team} Wins")
            df_all_features.loc[str(game_id), "Winner"] = home_team
            df_all_features.loc[str(game_id), "Winner_binary"] = 0
        else: 
            print(f"{away_team} Wins")
            df_all_features.loc[str(game_id), "Winner"] = away_team
            df_all_features.loc[str(game_id), "Winner_binary"] = 1
    else:
        if int(data["liveData"]["linescore"]["shootoutInfo"]["home"]["scores"]) > \
            int(data["liveData"]["linescore"]["shootoutInfo"]["away"]["scores"]):#2 for the third period, using 0 indexing
            print(f"{home_team} Wins")
            df_all_features.loc[str(game_id), "Winner"] = home_team
            df_all_features.loc[str(game_id), "Winner_binary"] = 0
        else: 
            print(f"{away_team} Wins")
            df_all_features.loc[str(game_id), "Winner"] = away_team
            df_all_features.loc[str(game_id), "Winner_binary"] = 1

EDM Wins
WPG Wins
NYI Wins
CBJ Wins


# Prepare the final dataset for analysis by converting everything to floats

In [10]:
df_all_features_copy = df_all_features.copy()
df_all_features = df_all_features.drop(columns = ["Home_team", "Away_team", "Winner"])
df_all_features.astype(float)
df_all_features.to_csv(f"C:\\Users\\David\\OneDrive\\Documents\\OneDrive\\NHL API First period Prediction\\{todays_date}_df_all_features_winner.csv", index = True)