# 1. Install Required Libraries
(Skip if already installed)

In [None]:
# Uncomment below if needed:
# !pip install pandas openpyxl

# 2. Define Data Classes

In [None]:
from golf_classes import Player, PlayerRoundInfo, Tournament, Round, Team


# 3. Load Cleaned Excel Files

In [None]:

import pandas as pd

# Update paths to where you saved your cleaned files
path_2023 = '/home/justin/JustInternetAI/Calcutta/Data/2023_cleaned_events_fixed_V2.xlsx'
path_2024 = '/home/justin/JustInternetAI/Calcutta/Data/2024_cleaned_events_V2.xlsx'

sheets_2023 = pd.ExcelFile(path_2023).sheet_names
sheets_2024 = pd.ExcelFile(path_2024).sheet_names


In [None]:

# --- Course Setup ---
hole_handicap_ratings = [5, 13, 17, 3, 11, 9, 1, 15, 7, 10, 6, 18, 14, 2, 16, 4, 12, 8]
hole_pars = [4, 4, 3, 4, 3, 4, 4, 5, 4, 4, 4, 3, 5, 4, 3, 4, 5, 4]



# --- Compute strokes received per hole ---
def strokes_received_per_hole(player_handicap):
    strokes = [0] * 18
    for i in range(18):
        hcap = hole_handicap_ratings[i]
        if player_handicap >= hcap:
            strokes[i] += 1
        if player_handicap > 18 and player_handicap >= hcap + 18:
            strokes[i] += 1
    return strokes

# 4. Build Player and Tournament Structures

In [None]:
players = {}
tournaments = {}

def get_or_create_player(player_name):
    if player_name not in players:
        players[player_name] = Player(player_name)
    return players[player_name]

def load_event(file_path, sheet_name, year, verbose=True):
    import pandas as pd

    df = pd.read_excel(file_path, sheet_name=sheet_name)

    tournament_name = f"{year} {sheet_name}"
    tournament = Tournament(tournament_name)
    round_obj = Round(tournament_name, round_number=1)

    # --- Detect gross and net column names (flexible matching) ---
    gross_col = next((col for col in df.columns if str(col).strip().lower() in ['total', 'gross', 'gross score']), None)
    net_col = next((col for col in df.columns if str(col).strip().lower() in ['net', 'net score']), None)

    if gross_col is None:
        raise ValueError(f"❌ Could not find a 'Total' or 'Gross' column in sheet: {sheet_name}")

    # --- Determine if scores are gross or net by checking one row ---
    first_row = df.iloc[0]
    first_handicap = first_row['Handicap']
    hole_scores_first = [first_row[i] for i in range(4, 22)]  # Columns for holes 1–18
    total_hole_score_first = sum(hole_scores_first)
    reported_gross = first_row[gross_col]

    if abs(total_hole_score_first - reported_gross) < 1e-3:
        scores_are_gross = True
    elif abs(total_hole_score_first + first_handicap - reported_gross) < 1e-3:
        scores_are_gross = False
    else:
        raise ValueError(f"Unable to determine if scores are gross or net for sheet: {sheet_name}")

    if verbose:
        score_format = 'GROSS' if scores_are_gross else 'NET'
        print(f"📄 Loaded '{sheet_name}' ({year}) as {score_format} hole-by-hole scoring using column '{gross_col}'")

    # --- Process all rows ---
    for _, row in df.iterrows():
        player = get_or_create_player(row['Player'])
        handicap = row['Handicap']
        tee = row['Tee']
        gross = row[gross_col]
        net = row[net_col] if net_col and net_col in row else None
        raw_hole_scores = [row[i] for i in range(4, 22)]

        if scores_are_gross:
            adjusted_hole_scores = raw_hole_scores
        else:
            strokes = strokes_received_per_hole(handicap)
            adjusted_hole_scores = [raw_hole_scores[i] + strokes[i] for i in range(18)]

        player_round = PlayerRoundInfo(
            player=player,
            tournament_name=tournament_name,
            round_number=1,
            handicap=handicap,
            tee=tee,
            hole_scores=adjusted_hole_scores,
            total=gross,
            net=net
        )

        player.rounds.append(player_round)
        round_obj.player_rounds.append(player_round)

    tournament.rounds.append(round_obj)
    tournaments[tournament_name] = tournament


# 5. Load All Events

In [None]:

# Load all 2023 events
for sheet in sheets_2023:
    load_event(path_2023, sheet, year=2023)

# Load all 2024 events
for sheet in sheets_2024:
    load_event(path_2024, sheet, year=2024)


In [None]:

# List all players
print(f"Loaded {len(players)} players.")
print(list(players.keys())[:10])  # Show first 10 players

# List all tournaments
print(f"Loaded {len(tournaments)} tournaments.")
print(list(tournaments.keys())[:5])

# Loop through all players and print their name and all rounds they played
for player_name, player_obj in players.items():
    print(f"\n{player_name} played {len(player_obj.rounds)} rounds:")
    for round_info in player_obj.rounds:
        print(f"- {round_info.tournament_name} (Net {round_info.net})")

# 6. Save Player and Tournaments to a file

In [None]:


import pickle

# Save players and tournaments to a file
def save_data(players, tournaments, filename='golf_data.pkl'):
    with open(filename, 'wb') as f:
        pickle.dump({'players': players, 'tournaments': tournaments}, f)

# Load players and tournaments from a file
def load_data(filename='golf_data.pkl'):
    with open(filename, 'rb') as f:
        data = pickle.load(f)
    return data['players'], data['tournaments']

In [None]:
save_data(players, tournaments)
loaded_players, loaded_tournaments = load_data()
print(f"Loaded {len(loaded_players)} players from file.")

In [None]:
# --- Helper to find corrupted rounds ---

def find_corrupt_rounds(players):
    bad_rounds = []
    player_list = players.values() if isinstance(players, dict) else players

    for player in player_list:
        for rnd in player.rounds:
            for score in rnd.hole_scores:
                if not isinstance(score, (int, float)):
                    try:
                        float(score)
                    except (ValueError, TypeError):
                        bad_rounds.append((player.name, score))
    return bad_rounds

# Example usage
corrupt = find_corrupt_rounds(players)
for player_name, bad_score in corrupt:
    print(f"Corrupt score '{bad_score}' found for player {player_name}")

In [None]:


# --- Helper to Print Sections of Player Data ---

def inspect_player_rounds(players, start_idx=0, num_players=5):
    """
    Print sections of player data to manually inspect hole_scores.
    Args:
        players: dict or list of Player objects
        start_idx: starting index in the player list
        num_players: how many players to show
    """
    player_list = list(players.values()) if isinstance(players, dict) else list(players)
    
    for i, player in enumerate(player_list[start_idx:start_idx+num_players], start=start_idx):
        print(f"\n=== Player {i}: {player.name} ===")
        for j, round_info in enumerate(player.rounds):
            print(f"  Round {j}: Tournament={round_info.tournament_name}, RoundNum={round_info.round_number}, Handicap={round_info.handicap}")
            print(f"    Hole Scores: {round_info.hole_scores}")
            
            # Check if any gross score is suspicious
            for k, score in enumerate(round_info.hole_scores):
                try:
                    _ = float(score)
                except (ValueError, TypeError):
                    print(f"    ⚠️ Suspicious score at hole {k+1}: '{score}'")



In [None]:
# See first 5 players
inspect_player_rounds(players, start_idx=0, num_players=5)

# See players 10–15
#inspect_player_rounds(players, start_idx=10, num_players=5)

# See players 50–60
#inspect_player_rounds(players, start_idx=50, num_players=10)

In [None]:
# --- Find and Print Hole-by-Hole Gross Score Outliers ---

def find_hole_score_outliers(players, max_hole_score=12):
    """
    Scan all players and print any hole scores that exceed max_hole_score.

    Args:
        players: dict or list of Player objects
        max_hole_score: maximum reasonable gross score allowed for a single hole
    """
    player_list = players.values() if isinstance(players, dict) else players
    outliers = []

    for player in player_list:
        for rnd in player.rounds:
            for hole_idx, score in enumerate(rnd.hole_scores):
                try:
                    gross = float(score)
                except (ValueError, TypeError):
                    gross = None

                if gross is not None and gross > max_hole_score:
                    outliers.append((
                        player.name, 
                        rnd.tournament_name, 
                        rnd.round_number, 
                        hole_idx + 1,  # 1-indexed hole number
                        gross
                    ))

    if not outliers:
        print(f"✅ No hole scores exceeded {max_hole_score} strokes.")
        return

    print(f"\n🚨 Found {len(outliers)} outlier hole scores exceeding {max_hole_score} strokes:\n")
    print(f"{'Player Name':<25} {'Tournament':<30} {'Round':<6} {'Hole':<5} {'Gross Score':>8}")
    print("-" * 80)

    for name, tournament, round_num, hole_num, gross_score in outliers:
        print(f"{name:<25} {tournament:<30} {round_num:<6} {hole_num:<5} {gross_score:>8.1f}")

# --- Example Usage ---

find_hole_score_outliers(players, max_hole_score=12)