In [3]:
"""
Verify Season Coverage - Check that we have data for all 82 games per team

Analyzes the combined props dataset to verify:
- All 30 NBA teams are present
- Each team has data for all 82 regular season games
"""

import pandas as pd
from pathlib import Path
from collections import Counter


# def verify_season_coverage():
"""Verify we have complete 82-game coverage for all 30 teams"""

print("="*70)
print("NBA SEASON COVERAGE VERIFICATION")
print("="*70)
print()

# Load combined props
# combined_file = Path(__file__).parent.parent / 'historical_props' / 'combined_props_player_threes.csv'
from pathlib import Path

# If your notebook is in the 'notebooks' folder:
combined_file = Path('../historical_props/combined_props_player_threes.csv')

# Or more explicitly:
# notebook_dir = Path.cwd()  # Current working directory
# project_root = notebook_dir.parent  # Go up one level to project root
# combined_file = project_root / 'historical_props' / 'combined_props_player_threes.csv'

# # Or just use the absolute path:
# combined_file = Path('/Users/thomasmyles/dev/betting/historical_props/combined_props_player_threes.csv')

# ....

if not combined_file.exists():
    print(f"‚ùå File not found: {combined_file}")
    raise ValueError(f"File not found: {combined_file}")

print(f"Loading data from: {combined_file.name}")
df = pd.read_csv(combined_file)

print(f"Total rows: {len(df):,}")
print(f"Date range: {df['date'].min()} to {df['date'].max()}")
print()

# Get unique game+date combinations (each game on each date)
unique_game_dates = df[['game', 'date']].drop_duplicates()
unique_matchups = df['game'].nunique()

print(f"Unique matchups (e.g., 'Hawks @ Celtics'): {unique_matchups:,}")
print(f"Total games (including rematches): {len(unique_game_dates):,}")
print()

# Split each game+date at ' @ ' to get both teams
all_teams = []

for game in unique_game_dates['game']:
    if ' @ ' in game:
        away_team, home_team = game.split(' @ ')
        all_teams.append(away_team.strip())
        all_teams.append(home_team.strip())
    else:
        print(f"‚ö†Ô∏è  Warning: Game format unexpected: {game}")

# Count games per team
team_counts = Counter(all_teams)

print("="*70)
print("GAMES PER TEAM")
print("="*70)
print()

# Sort by team name
sorted_teams = sorted(team_counts.items(), key=lambda x: x[0])

complete_teams = []
incomplete_teams = []

for team, count in sorted_teams:
    if count == 82:
        status = "‚úÖ"
        complete_teams.append(team)
    else:
        status = "‚ö†Ô∏è "
        incomplete_teams.append((team, count))
    
    print(f"{status} {team:30} - {count:3} games")

print()
print("="*70)
print("SUMMARY")
print("="*70)
print()
print(f"Total unique teams: {len(team_counts)}")
print(f"Expected teams: 30")
print()
print(f"‚úÖ Teams with complete 82-game data: {len(complete_teams)}")
print(f"‚ö†Ô∏è  Teams with incomplete data: {len(incomplete_teams)}")

if incomplete_teams:
    print()
    print("="*70)
    print("INCOMPLETE TEAMS DETAIL")
    print("="*70)
    print()
    for team, count in incomplete_teams:
        missing = 82 - count
        print(f"  {team:30} - {count:3}/82 games ({missing} missing)")
    
    # Calculate average missing
    avg_missing = sum(82 - count for _, count in incomplete_teams) / len(incomplete_teams)
    print()
    print(f"Average games missing per incomplete team: {avg_missing:.1f}")

print()
print("="*70)

if len(team_counts) == 30 and len(incomplete_teams) == 0:
    print("üéâ SUCCESS! All 30 teams have complete 82-game coverage!")
elif len(team_counts) == 30:
    print("‚ö†Ô∏è  All 30 teams present, but some have incomplete coverage")
else:
    print(f"‚ö†Ô∏è  Only {len(team_counts)}/30 teams found in dataset")

print("="*70)

team_counts


# if __name__ == "__main__":
#     verify_season_coverage()



NBA SEASON COVERAGE VERIFICATION

Loading data from: combined_props_player_threes.csv
Total rows: 64,567
Date range: 2024-10-22 to 2025-04-13

Unique matchups (e.g., 'Hawks @ Celtics'): 870
Total games (including rematches): 1,231

GAMES PER TEAM

‚úÖ Atlanta Hawks                  -  82 games
‚úÖ Boston Celtics                 -  82 games
‚úÖ Brooklyn Nets                  -  82 games
‚ö†Ô∏è  Charlotte Hornets              -  83 games
‚úÖ Chicago Bulls                  -  82 games
‚ö†Ô∏è  Cleveland Cavaliers            -  81 games
‚úÖ Dallas Mavericks               -  82 games
‚úÖ Denver Nuggets                 -  82 games
‚úÖ Detroit Pistons                -  82 games
‚úÖ Golden State Warriors          -  82 games
‚ö†Ô∏è  Houston Rockets                -  83 games
‚úÖ Indiana Pacers                 -  82 games
‚úÖ Los Angeles Clippers           -  82 games
‚ö†Ô∏è  Los Angeles Lakers             -  83 games
‚úÖ Memphis Grizzlies              -  82 games
‚úÖ Miami Heat                 

Counter({'Los Angeles Lakers': 83,
         'Milwaukee Bucks': 83,
         'Charlotte Hornets': 83,
         'Houston Rockets': 83,
         'New Orleans Pelicans': 83,
         'Boston Celtics': 82,
         'Minnesota Timberwolves': 82,
         'Indiana Pacers': 82,
         'Detroit Pistons': 82,
         'Brooklyn Nets': 82,
         'Atlanta Hawks': 82,
         'Toronto Raptors': 82,
         'Miami Heat': 82,
         'Philadelphia 76ers': 82,
         'Chicago Bulls': 82,
         'Memphis Grizzlies': 82,
         'Utah Jazz': 82,
         'Golden State Warriors': 82,
         'Portland Trail Blazers': 82,
         'Phoenix Suns': 82,
         'Los Angeles Clippers': 82,
         'Washington Wizards': 82,
         'San Antonio Spurs': 82,
         'Dallas Mavericks': 82,
         'Oklahoma City Thunder': 82,
         'Denver Nuggets': 82,
         'Sacramento Kings': 82,
         'New York Knicks': 81,
         'Cleveland Cavaliers': 81,
         'Orlando Magic': 81})

In [4]:
"""
Check Home/Away Split - Each team should have 41 home and 41 away games
"""

print("="*70)
print("HOME/AWAY SPLIT VERIFICATION")
print("="*70)
print()

# Parse home and away teams from each game
home_games = {}
away_games = {}

for game in unique_game_dates['game']:
    if ' @ ' in game:
        away_team, home_team = game.split(' @ ')
        away_team = away_team.strip()
        home_team = home_team.strip()
        
        # Count away games
        if away_team not in away_games:
            away_games[away_team] = 0
        away_games[away_team] += 1
        
        # Count home games
        if home_team not in home_games:
            home_games[home_team] = 0
        home_games[home_team] += 1

# Combine and display results
print("TEAM                           HOME  AWAY  TOTAL  STATUS")
print("-" * 70)

all_teams_split = set(list(home_games.keys()) + list(away_games.keys()))
split_issues = []

for team in sorted(all_teams_split):
    home_count = home_games.get(team, 0)
    away_count = away_games.get(team, 0)
    total = home_count + away_count
    
    # Check if perfectly balanced
    if home_count == 41 and away_count == 41:
        status = "‚úÖ"
    else:
        status = "‚ö†Ô∏è "
        split_issues.append((team, home_count, away_count))
    
    print(f"{status} {team:30} {home_count:4} {away_count:4} {total:5}")

print()
print("="*70)
print("SUMMARY")
print("="*70)
print()
print(f"Teams with perfect 41-41 split: {len(all_teams_split) - len(split_issues)}")
print(f"Teams with imbalanced split: {len(split_issues)}")

if split_issues:
    print()
    print("IMBALANCED TEAMS:")
    for team, home, away in split_issues:
        home_diff = home - 41
        away_diff = away - 41
        print(f"  {team:30} - Home: {home:2} ({home_diff:+2}), Away: {away:2} ({away_diff:+2})")
else:
    print()
    print("üéâ All teams have perfect 41 home / 41 away split!")

print("="*70)


HOME/AWAY SPLIT VERIFICATION

TEAM                           HOME  AWAY  TOTAL  STATUS
----------------------------------------------------------------------
‚ö†Ô∏è  Atlanta Hawks                    40   42    82
‚úÖ Boston Celtics                   41   41    82
‚úÖ Brooklyn Nets                    41   41    82
‚ö†Ô∏è  Charlotte Hornets                41   42    83
‚úÖ Chicago Bulls                    41   41    82
‚ö†Ô∏è  Cleveland Cavaliers              41   40    81
‚ö†Ô∏è  Dallas Mavericks                 40   42    82
‚úÖ Denver Nuggets                   41   41    82
‚úÖ Detroit Pistons                  41   41    82
‚úÖ Golden State Warriors            41   41    82
‚ö†Ô∏è  Houston Rockets                  41   42    83
‚úÖ Indiana Pacers                   41   41    82
‚úÖ Los Angeles Clippers             41   41    82
‚ö†Ô∏è  Los Angeles Lakers               42   41    83
‚úÖ Memphis Grizzlies                41   41    82
‚úÖ Miami Heat                       41   41    82
‚ö

In [5]:
"""
Check for Multiple Games on Same Day - Teams shouldn't play 2 games on the same date
"""

print("="*70)
print("DUPLICATE GAME DATE CHECK")
print("="*70)
print()

# For each team, check if they appear more than once on any date
duplicates_found = []

for team in sorted(all_teams_split):
    # Get all games for this team with dates
    team_games = unique_game_dates[unique_game_dates['game'].str.contains(team, regex=False)]
    
    # Count games per date
    date_counts = team_games['date'].value_counts()
    
    # Find dates with more than 1 game
    duplicate_dates = date_counts[date_counts > 1]
    
    if len(duplicate_dates) > 0:
        duplicates_found.append((team, duplicate_dates))
        print(f"‚ö†Ô∏è  {team}:")
        for date, count in duplicate_dates.items():
            games_on_date = team_games[team_games['date'] == date]['game'].tolist()
            print(f"    {date}: {count} games")
            for game in games_on_date:
                print(f"      - {game}")
        print()

print("="*70)
print("SUMMARY")
print("="*70)
print()

if duplicates_found:
    print(f"‚ö†Ô∏è  Found {len(duplicates_found)} teams with multiple games on same day!")
    print()
    print("This likely indicates:")
    print("  - Data duplication issue")
    print("  - Rescheduled games entered twice")
    print("  - API data anomaly")
else:
    print("‚úÖ No teams have multiple games on the same day!")
    print("   Each team plays at most 1 game per day (as expected)")

print("="*70)


DUPLICATE GAME DATE CHECK

SUMMARY

‚úÖ No teams have multiple games on the same day!
   Each team plays at most 1 game per day (as expected)


In [None]:
# check home/away now
# ...
