## Setup

In [99]:
import pandas as pd
import numpy as np
from datetime import datetime

# Load the data
CSV_FILE = "generated/api_data/games_basic.csv"
df = pd.read_csv(CSV_FILE, parse_dates=["date"])

print(f"Total games loaded: {len(df)}")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")

Total games loaded: 3220
Date range: 2023-10-10 00:00:00 to 2025-12-27 00:00:00


## Select 2 Random Games to Verify

We'll select games that have enough history (not from the first 5 games of a season) to ensure L5 stats are properly calculated.

In [100]:
# Filter to games that have sufficient history (at least 210 games into the dataset)
# This ensures teams have played more than 5 games
df_filtered = df.iloc[210:].copy()

# Randomly select 2 games
np.random.seed(42)  # For reproducibility
sample_games = df_filtered.sample(n=2)

print("Selected Games for Verification:")
print("="*80)
for idx, game in sample_games.iterrows():
    print(f"\nGame ID: {game['game_id']}")
    print(f"Date: {game['date']}")
    print(f"Matchup: {game['away_team']} @ {game['home_team']}")
    print(f"Score: {game['away_gf']} - {game['home_gf']}")

Selected Games for Verification:

Game ID: 2025020486
Date: 2025-12-11 00:00:00
Matchup: Ottawa Senators @ Columbus Blue Jackets
Score: 6 - 3

Game ID: 2023020805
Date: 2024-02-10 00:00:00
Matchup: Vancouver Canucks @ Detroit Red Wings
Score: 3 - 4


## Verification Functions

In [101]:
def get_last_5_games(df, team_abbrev, current_date):
    """
    Get the last 5 games for a team before the current date.
    """
    # Get all games for this team before current date
    team_games = df[
        ((df['home_team_abbrev'] == team_abbrev) | (df['away_team_abbrev'] == team_abbrev)) &
        (df['date'] < current_date)
    ].sort_values('date', ascending=False).head(5)
    
    return team_games

def calc_l5_gf_per_game(df, team_abbrev, current_date):
    """
    Calculate goals for per game over last 5 games.
    """
    last_5 = get_last_5_games(df, team_abbrev, current_date)
    
    goals = []
    for _, game in last_5.iterrows():
        if game['home_team_abbrev'] == team_abbrev:
            goals.append(game['home_gf'])
        else:
            goals.append(game['away_gf'])
    
    return round(np.mean(goals), 2) if goals else 0

def calc_l5_win_pct(df, team_abbrev, current_date):
    """
    Calculate win percentage over last 5 games.
    """
    last_5 = get_last_5_games(df, team_abbrev, current_date)
    
    wins = 0
    for _, game in last_5.iterrows():
        if game['home_team_abbrev'] == team_abbrev:
            wins += game['home_win']
        else:
            wins += (1 - game['home_win'])
    
    return round(wins / 5.0, 2) if len(last_5) >= 5 else round(wins / len(last_5), 2) if len(last_5) > 0 else 0

def calc_l5_powerplay_pct(df, team_abbrev, current_date):
    """
    Calculate average powerplay percentage over last 5 games.
    """
    last_5 = get_last_5_games(df, team_abbrev, current_date)
    
    pp_pcts = []
    for _, game in last_5.iterrows():
        if game['home_team_abbrev'] == team_abbrev:
            pp_pcts.append(game['home_powerplay_pct'])
        else:
            pp_pcts.append(game['away_powerplay_pct'])
    
    return round(np.mean(pp_pcts), 2) if pp_pcts else 0

def calc_l5_ga_per_game(df, team_abbrev, current_date):
    """
    Calculate goals against per game over last 5 games.
    """
    last_5 = get_last_5_games(df, team_abbrev, current_date)
    
    goals_against = []
    for _, game in last_5.iterrows():
        if game['home_team_abbrev'] == team_abbrev:
            goals_against.append(game['home_ga'])
        else:
            goals_against.append(game['away_ga'])
    
    return round(np.mean(goals_against), 2) if goals_against else 0

def calc_l5_sog_per_game(df, team_abbrev, current_date):
    """
    Calculate shots on goal per game over last 5 games.
    """
    last_5 = get_last_5_games(df, team_abbrev, current_date)
    
    shots = []
    for _, game in last_5.iterrows():
        if game['home_team_abbrev'] == team_abbrev:
            shots.append(game['home_sog'])
        else:
            shots.append(game['away_sog'])
    
    return round(np.mean(shots), 2) if shots else 0

def calc_win_pct_season(df, team_abbrev, current_date):
    """
    Calculate season win percentage up to current date.
    """
    # Extract season year from current date
    year = current_date.year
    month = current_date.month
    
    # NHL season runs Oct-Apr, so determine season start
    if month >= 10:
        season_start = pd.Timestamp(f"{year}-10-01")
    else:
        season_start = pd.Timestamp(f"{year-1}-10-01")
    
    # Get all games for this team in current season before current date
    season_games = df[
        ((df['home_team_abbrev'] == team_abbrev) | (df['away_team_abbrev'] == team_abbrev)) &
        (df['date'] >= season_start) &
        (df['date'] < current_date)
    ]
    
    if len(season_games) == 0:
        return 0
    
    wins = 0
    for _, game in season_games.iterrows():
        if game['home_team_abbrev'] == team_abbrev:
            wins += game['home_win']
        else:
            wins += (1 - game['home_win'])
    
    return round(wins / len(season_games), 3)

## Game 1 Verification

In [102]:
# Get first game
game1 = sample_games.iloc[0]

print("="*80)
print(f"GAME 1 VERIFICATION")
print("="*80)
print(f"Game ID: {game1['game_id']}")
print(f"Date: {game1['date']}")
print(f"Matchup: {game1['away_team_abbrev']} @ {game1['home_team_abbrev']}")
print(f"Score: {game1['away_gf']} - {game1['home_gf']}")
print()

GAME 1 VERIFICATION
Game ID: 2025020486
Date: 2025-12-11 00:00:00
Matchup: OTT @ CBJ
Score: 6 - 3



### L5 Stats Verification (Game 1)

In [103]:
print("\n" + "="*80)
print("L5 STATS VERIFICATION")
print("="*80)

# 1. Home GF per game L5
calc_home_gf_l5 = calc_l5_gf_per_game(df, game1['home_team_abbrev'], game1['date'])
stored_home_gf_l5 = game1['home_gf_per_game_l5']
match_1 = "✓" if abs(calc_home_gf_l5 - stored_home_gf_l5) < 0.01 else "✗"
print(f"\n{match_1} Home GF per game L5:")
print(f"  Calculated: {calc_home_gf_l5}")
print(f"  Stored:     {stored_home_gf_l5}")

# 2. Away Win % L5
calc_away_win_pct = calc_l5_win_pct(df, game1['away_team_abbrev'], game1['date'])
stored_away_win_pct = game1['away_win_pct_l5']
match_2 = "✓" if abs(calc_away_win_pct - stored_away_win_pct) < 0.01 else "✗"
print(f"\n{match_2} Away Win % L5:")
print(f"  Calculated: {calc_away_win_pct}")
print(f"  Stored:     {stored_away_win_pct}")

# 3. Home Powerplay % L5
calc_home_pp_l5 = calc_l5_powerplay_pct(df, game1['home_team_abbrev'], game1['date'])
stored_home_pp_l5 = game1['home_powerplay_pct_l5']
match_3 = "✓" if abs(calc_home_pp_l5 - stored_home_pp_l5) < 0.01 else "✗"
print(f"\n{match_3} Home Powerplay % L5:")
print(f"  Calculated: {calc_home_pp_l5}")
print(f"  Stored:     {stored_home_pp_l5}")


L5 STATS VERIFICATION

✓ Home GF per game L5:
  Calculated: 3.6
  Stored:     3.6

✓ Away Win % L5:
  Calculated: 0.2
  Stored:     0.2

✓ Home Powerplay % L5:
  Calculated: 0.32
  Stored:     0.32


### Diff Stats Verification (Game 1)

In [104]:
print("\n" + "="*80)
print("DIFF STATS VERIFICATION")
print("="*80)

# 1. Home Goal Diff L5 (Home GF L5 - Away GF L5)
calc_home_goal_diff = round(game1['home_gf_per_game_l5'] - game1['away_gf_per_game_l5'], 2)
stored_home_goal_diff = game1['home_goal_diff_l5']
match_4 = "✓" if abs(calc_home_goal_diff - stored_home_goal_diff) < 0.01 else "✗"
print(f"\n{match_4} Home Goal Diff L5 (Home GF L5 - Away GF L5):")
print(f"  Calculated: {calc_home_goal_diff}")
print(f"  Stored:     {stored_home_goal_diff}")
print(f"  (Home: {game1['home_gf_per_game_l5']}, Away: {game1['away_gf_per_game_l5']})")

# 2. Home Shot Diff L5 (Home SOG L5 - Away SOG L5)
calc_home_shot_diff = round(game1['home_sog_per_game_l5'] - game1['away_sog_per_game_l5'], 2)
stored_home_shot_diff = game1['home_shot_diff_l5']
match_5 = "✓" if abs(calc_home_shot_diff - stored_home_shot_diff) < 0.01 else "✗"
print(f"\n{match_5} Home Shot Diff L5 (Home SOG L5 - Away SOG L5):")
print(f"  Calculated: {calc_home_shot_diff}")
print(f"  Stored:     {stored_home_shot_diff}")
print(f"  (Home: {game1['home_sog_per_game_l5']}, Away: {game1['away_sog_per_game_l5']})")

# 3. Home GA Diff L5 (Home GA L5 - Away GA L5)
calc_home_ga_diff = round(game1['home_ga_per_game_l5'] - game1['away_ga_per_game_l5'], 2)
stored_home_ga_diff = game1.get('home_ga_diff_l5', np.nan)
if not np.isnan(stored_home_ga_diff):
    match_6 = "✓" if abs(calc_home_ga_diff - stored_home_ga_diff) < 0.01 else "✗"
    print(f"\n{match_6} Home GA Diff L5 (Home GA L5 - Away GA L5):")
    print(f"  Calculated: {calc_home_ga_diff}")
    print(f"  Stored:     {stored_home_ga_diff}")
    print(f"  (Home: {game1['home_ga_per_game_l5']}, Away: {game1['away_ga_per_game_l5']})")
else:
    print("\n(Info) 'home_ga_diff_l5' not present in dataset; skipped.")


DIFF STATS VERIFICATION

✓ Home Goal Diff L5 (Home GF L5 - Away GF L5):
  Calculated: 1.2
  Stored:     1.2
  (Home: 3.6, Away: 2.4)

✓ Home Shot Diff L5 (Home SOG L5 - Away SOG L5):
  Calculated: -0.2
  Stored:     -0.2
  (Home: 30.2, Away: 30.4)

✓ Home GA Diff L5 (Home GA L5 - Away GA L5):
  Calculated: 0.6
  Stored:     0.6
  (Home: 4.2, Away: 3.6)


### Season Stats Verification (Game 1)

In [105]:
print("\n" + "="*80)
print("SEASON STATS VERIFICATION")
print("="*80)

# 1. Home Win % Season
calc_home_win_season = calc_win_pct_season(df, game1['home_team_abbrev'], game1['date'])
stored_home_win_season = game1['home_win_pct_season']
match_7 = "✓" if abs(calc_home_win_season - stored_home_win_season) < 0.01 else "✗"
print(f"\n{match_7} Home Win % Season:")
print(f"  Calculated: {calc_home_win_season}")
print(f"  Stored:     {stored_home_win_season}")

# 2. Home Home Win % (wins at home / games at home)
# This is calculated from the standings API, so we'll just display it
print(f"\n  Home Win % at Home (from API): {game1['home_home_win_pct']}")
print(f"  (This comes from NHL standings API and cannot be independently verified)")

# 3. Away GF per Game Season (from standings API)
print(f"\n  Away GF % Season (from API): {game1['away_gf_per_game_season']}")
print(f"  (This comes from NHL standings API and cannot be independently verified)")


SEASON STATS VERIFICATION

✗ Home Win % Season:
  Calculated: 0.433
  Stored:     0.419

  Home Win % at Home (from API): 0.462
  (This comes from NHL standings API and cannot be independently verified)

  Away GF % Season (from API): 3.2
  (This comes from NHL standings API and cannot be independently verified)


## Game 2 Verification

In [106]:
# Get second game
game2 = sample_games.iloc[1]

print("\n\n" + "="*80)
print(f"GAME 2 VERIFICATION")
print("="*80)
print(f"Game ID: {game2['game_id']}")
print(f"Date: {game2['date']}")
print(f"Matchup: {game2['away_team_abbrev']} @ {game2['home_team_abbrev']}")
print(f"Score: {game2['away_gf']} - {game2['home_gf']}")
print()



GAME 2 VERIFICATION
Game ID: 2023020805
Date: 2024-02-10 00:00:00
Matchup: VAN @ DET
Score: 3 - 4



### L5 Stats Verification (Game 2)

In [107]:
print("\n" + "="*80)
print("L5 STATS VERIFICATION")
print("="*80)

# 1. Home GF per game L5
calc_home_gf_l5 = calc_l5_gf_per_game(df, game2['home_team_abbrev'], game2['date'])
stored_home_gf_l5 = game2['home_gf_per_game_l5']
match_1 = "✓" if abs(calc_home_gf_l5 - stored_home_gf_l5) < 0.01 else "✗"
print(f"\n{match_1} Home GF per game L5:")
print(f"  Calculated: {calc_home_gf_l5}")
print(f"  Stored:     {stored_home_gf_l5}")

# 2. Away Win % L5
calc_away_win_pct = calc_l5_win_pct(df, game2['away_team_abbrev'], game2['date'])
stored_away_win_pct = game2['away_win_pct_l5']
match_2 = "✓" if abs(calc_away_win_pct - stored_away_win_pct) < 0.01 else "✗"
print(f"\n{match_2} Away Win % L5:")
print(f"  Calculated: {calc_away_win_pct}")
print(f"  Stored:     {stored_away_win_pct}")

# 3. Home Powerplay % L5
calc_home_pp_l5 = calc_l5_powerplay_pct(df, game2['home_team_abbrev'], game2['date'])
stored_home_pp_l5 = game2['home_powerplay_pct_l5']
match_3 = "✓" if abs(calc_home_pp_l5 - stored_home_pp_l5) < 0.01 else "✗"
print(f"\n{match_3} Home Powerplay % L5:")
print(f"  Calculated: {calc_home_pp_l5}")
print(f"  Stored:     {stored_home_pp_l5}")


L5 STATS VERIFICATION

✓ Home GF per game L5:
  Calculated: 3.2
  Stored:     3.2

✓ Away Win % L5:
  Calculated: 0.6
  Stored:     0.6

✓ Home Powerplay % L5:
  Calculated: 0.25
  Stored:     0.25


### Diff Stats Verification (Game 2)

In [108]:
print("\n" + "="*80)
print("DIFF STATS VERIFICATION")
print("="*80)

# 1. Home Goal Diff L5 (Home GF L5 - Away GF L5)
calc_home_goal_diff = round(game2['home_gf_per_game_l5'] - game2['away_gf_per_game_l5'], 2)
stored_home_goal_diff = game2['home_goal_diff_l5']
match_4 = "✓" if abs(calc_home_goal_diff - stored_home_goal_diff) < 0.01 else "✗"
print(f"\n{match_4} Home Goal Diff L5 (Home GF L5 - Away GF L5):")
print(f"  Calculated: {calc_home_goal_diff}")
print(f"  Stored:     {stored_home_goal_diff}")
print(f"  (Home: {game2['home_gf_per_game_l5']}, Away: {game2['away_gf_per_game_l5']})")

# 2. Home Shot Diff L5 (Home SOG L5 - Away SOG L5)
calc_home_shot_diff = round(game2['home_sog_per_game_l5'] - game2['away_sog_per_game_l5'], 2)
stored_home_shot_diff = game2['home_shot_diff_l5']
match_5 = "✓" if abs(calc_home_shot_diff - stored_home_shot_diff) < 0.01 else "✗"
print(f"\n{match_5} Home Shot Diff L5 (Home SOG L5 - Away SOG L5):")
print(f"  Calculated: {calc_home_shot_diff}")
print(f"  Stored:     {stored_home_shot_diff}")
print(f"  (Home: {game2['home_sog_per_game_l5']}, Away: {game2['away_sog_per_game_l5']})")

# 3. Home GA Diff L5 (Home GA L5 - Away GA L5)
calc_home_ga_diff = round(game2['home_ga_per_game_l5'] - game2['away_ga_per_game_l5'], 2)
stored_home_ga_diff = game2.get('home_ga_diff_l5', np.nan)
if not np.isnan(stored_home_ga_diff):
    match_6 = "✓" if abs(calc_home_ga_diff - stored_home_ga_diff) < 0.01 else "✗"
    print(f"\n{match_6} Home GA Diff L5 (Home GA L5 - Away GA L5):")
    print(f"  Calculated: {calc_home_ga_diff}")
    print(f"  Stored:     {stored_home_ga_diff}")
    print(f"  (Home: {game2['home_ga_per_game_l5']}, Away: {game2['away_ga_per_game_l5']})")
else:
    print("\n(Info) 'home_ga_diff_l5' not present in dataset; skipped.")


DIFF STATS VERIFICATION

✓ Home Goal Diff L5 (Home GF L5 - Away GF L5):
  Calculated: 0.6
  Stored:     0.6
  (Home: 3.2, Away: 2.6)

✓ Home Shot Diff L5 (Home SOG L5 - Away SOG L5):
  Calculated: 1.8
  Stored:     1.8
  (Home: 28.0, Away: 26.2)

✓ Home GA Diff L5 (Home GA L5 - Away GA L5):
  Calculated: -0.6
  Stored:     -0.6
  (Home: 2.2, Away: 2.8)


### Season Stats Verification (Game 2)

In [109]:
print("\n" + "="*80)
print("SEASON STATS VERIFICATION")
print("="*80)

# 1. Home Win % Season
calc_home_win_season = calc_win_pct_season(df, game2['home_team_abbrev'], game2['date'])
stored_home_win_season = game2['home_win_pct_season']
match_7 = "✓" if abs(calc_home_win_season - stored_home_win_season) < 0.01 else "✗"
print(f"\n{match_7} Home Win % Season:")
print(f"  Calculated: {calc_home_win_season}")
print(f"  Stored:     {stored_home_win_season}")

# 2. Home Home Win % (wins at home / games at home)
print(f"\n  Home Win % at Home (from API): {game2['home_home_win_pct']}")
print(f"  (This comes from NHL standings API and cannot be independently verified)")

# 3. Away GF per Game Season (from standings API)
print(f"\n  Away GF % Season (from API): {game2['away_gf_per_game_season']}")
print(f"  (This comes from NHL standings API and cannot be independently verified)")


SEASON STATS VERIFICATION

✓ Home Win % Season:
  Calculated: 0.52
  Stored:     0.529

  Home Win % at Home (from API): 0.536
  (This comes from NHL standings API and cannot be independently verified)

  Away GF % Season (from API): 3.692
  (This comes from NHL standings API and cannot be independently verified)


## Summary

This notebook verified:
- **3 L5 stats**: Goals for per game, Win %, Powerplay %
- **3 Diff stats**: Home goal diff (GF L5 - GF L5), Home shot diff (SOG L5 - SOG L5), Home GA diff (GA L5 - GA L5)
- **3 Season stats**: Win %, Home/Away splits, Goals for (some from API)

For **2 randomly selected games**.

Check marks (✓) indicate calculations match the stored values within 0.01 tolerance.