# 1. Install Required Libraries
(Skip if already installed)

In [None]:
# Uncomment below if needed:
# !pip install pandas openpyxl

# 2. Define Data Classes

In [None]:
from golf_classes import Player, PlayerRoundInfo, Tournament, Round, Team


In [None]:
golf_data_file = "golf_data.pkl"

mm_team_file = "/home/justin/JustInternetAI/Calcutta/Data/MMTeamData.xlsx"
# Update paths to  cleaned files
path_2023 = '/home/justin/JustInternetAI/Calcutta/Data/2023_cleaned_events_fixed_V2.xlsx'
path_2024 = '/home/justin/JustInternetAI/Calcutta/Data/2024_cleaned_events_V2.xlsx'

path_2025_ind = '/home/justin/JustInternetAI/Calcutta/Data/cleaned_hole_by_hole_2025.xlsx'
path_2024_ind = '/home/justin/JustInternetAI/Calcutta/Data/cleaned_hole_by_hole_2024.xlsx'


In [None]:
calcutta_data = {
    "players": {},
    "tournaments": {},
    "teams": []
}

players = calcutta_data["players"]
tournaments = calcutta_data["tournaments"]
teams = calcutta_data["teams"]  # Optional global team structure if used

In [None]:
import importlib
import golf_classes 

importlib.reload(golf_classes)

Player = golf_classes.Player
PlayerRoundInfo = golf_classes.PlayerRoundInfo
Tournament = golf_classes.Tournament
Round = golf_classes.Round
Team = golf_classes.Team

# 3. Load Cleaned Excel Files

In [None]:

import pandas as pd


sheets_2023 = pd.ExcelFile(path_2023).sheet_names
sheets_2024 = pd.ExcelFile(path_2024).sheet_names
sheets_2024_ind = pd.ExcelFile(path_2024_ind).sheet_names
sheets_2025_ind = pd.ExcelFile(path_2025_ind).sheet_names


In [None]:

# --- Course Setup ---
hole_handicap_ratings = [5, 13, 17, 3, 11, 9, 1, 15, 7, 10, 6, 18, 14, 2, 16, 4, 12, 8]
hole_pars = [4, 4, 3, 4, 3, 4, 4, 5, 4, 4, 4, 3, 5, 4, 3, 4, 5, 4]



# --- Compute strokes received per hole ---
def strokes_received_per_hole(player_handicap):
    strokes = [0] * 18
    for i in range(18):
        hcap = hole_handicap_ratings[i]
        if player_handicap >= hcap:
            strokes[i] += 1
        if player_handicap > 18 and player_handicap >= hcap + 18:
            strokes[i] += 1
    return strokes

# 4. Build Player and Tournament Structures

In [None]:

def get_or_create_player(player_name):
    global players, tournaments, teams
    if player_name not in players:
        players[player_name] = Player(player_name)
    return players[player_name]

def check_for_invalid_players():
    global players, tournaments, teams
    print("\n🔍 Scanning for invalid player entries...")
    for key, value in players.items():
        if not isinstance(value, Player):
            print(f"⚠️ Invalid entry: key={key} | type={type(value)} | value={value}")

def load_event(file_path, sheet_name, year, verbose=False):
    import pandas as pd
    import re

    global players, tournaments, teams
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    df.columns = df.columns.map(str)  # Normalize all column headers to strings

    # Temporarily defer tournament name to use per-row event name
    # tournament creation moved to after round_obj assignment
    # round_obj creation moved to after tournament_name is determined from rows

    # --- Detect gross column ---
    gross_col = next((col for col in df.columns if col.strip().lower() in ['total', 'gross', 'gross score']), None)
    if gross_col is None:
        raise ValueError(f"❌ Could not find a 'Gross' column in sheet: {sheet_name}")

    # --- Determine hole column naming pattern ---
    hole_cols = []
    if any(col in df.columns for col in [str(i) for i in range(1, 19)]):
        hole_cols = [str(i) for i in range(1, 19) if str(i) in df.columns]  # 2023 style
    elif any(re.match(r'Hole[1-9][0-8]?$', col) for col in df.columns):
        hole_cols = [col for col in df.columns if re.match(r'Hole[1-9][0-8]?$', col)]  # 2024 style

    # --- Identify score type by comparing sum to reported gross ---
    first_data_row = df.iloc[1] if df.iloc[0].isnull().all() else df.iloc[0]
    try:
        hole_scores_first = [int(first_data_row[col]) for col in hole_cols]
    except (ValueError, TypeError):
        print(f"❌ Invalid hole score in first row of sheet: {sheet_name}. Skipping this sheet.")
        return

    total_hole_score_first = sum(hole_scores_first)
    reported_gross = first_data_row[gross_col]
    scores_are_net = abs(total_hole_score_first - reported_gross) > 1e-3

    if verbose:
        score_format = 'NET' if scores_are_net else 'GROSS'
        print(f"📄 First Row in '{sheet_name}':")
        print(f"Player: {first_data_row['Player']}, Handicap: {first_data_row['Handicap']}, Tee: {first_data_row['Tee']}")
        print(f"Hole Columns: {hole_cols}")
        print(f"Hole Scores: {hole_scores_first}")
        print(f"Sum of Hole Scores: {total_hole_score_first}")
        print(f"Reported Gross: {reported_gross}")
        print(f"📄 Loaded '{sheet_name}' ({year}) as {score_format} hole-by-hole scoring using column '{gross_col}'")

    issue_count = 0
    round_obj = None  # Will initialize after reading first valid row
    tournament = None
    for idx, row in df.iterrows():
        try:
            event_name = row.get('Event')
            if not event_name:
                raise ValueError("Missing event name in 'Event' column.")

            if tournament is None:
                tournament_name = f"{event_name}"
                tournament = Tournament(tournament_name)
                round_obj = Round(tournament_name, round_number=1)
            handicap = row['Handicap']
            tee = row['Tee']
            date = row.get('Date') if 'Date' in row and pd.notnull(row['Date']) else None
            gross = row[gross_col]
            raw_hole_scores = [int(row[col]) for col in hole_cols]

            if scores_are_net:
                strokes = strokes_received_per_hole(handicap)
                adjusted_hole_scores = [raw_hole_scores[i] + strokes[i] for i in range(18)]
            else:
                adjusted_hole_scores = raw_hole_scores

            player = get_or_create_player(row['Player'])
            player_round = PlayerRoundInfo(
                player=player,
                tournament_name=tournament_name,
                round_number=1,
                handicap=handicap,
                tee=tee,
                hole_scores=adjusted_hole_scores,
                total=gross,
                net=None,
                date=date
            )

            player.rounds.append(player_round)
            round_obj.player_rounds.append(player_round)
        except Exception as e:
            print(f"❌ Aborting load: error in sheet '{sheet_name}', row {idx + 2} (Excel row number).\n  Error: {e}\n  Row content: {row.to_dict()}")

    if tournament and round_obj:
        tournament.rounds.append(round_obj)
        calcutta_data["tournaments"][tournament.name] = tournament


In [None]:
def reset_all_data():
    global players, tournaments, teams
    players.clear()
    tournaments.clear()
    teams.clear()
    print("🧹 All data has been reset.")

In [None]:
reset_all_data()

# 5. Load All Events

In [None]:

reset_all_data()

# Load all 2023 events
for sheet in sheets_2023:
    load_event(path_2023, sheet, year=2023)

# Load all 2024 events
for sheet in sheets_2024:
    load_event(path_2024, sheet, year=2024)

     # Load all 2024 individual
for sheet in sheets_2024_ind:
    load_event(path_2024_ind, sheet, year=2024)

     # Load all 2024 events
for sheet in sheets_2025_ind:
    load_event(path_2025_ind, sheet, year=2025)

mark_duplicate_individual_rounds()   

load_teams_from_excel(mm_team_file)



In [None]:

# List all players
print(f"Loaded {len(players)} players.")
print(list(players.keys())[:10])  # Show first 10 players

# List all tournaments
print(f"Loaded {len(tournaments)} tournaments.")
print(list(tournaments.keys())[:5])

# Loop through all players and print their name and all rounds they played
for player_name, player_obj in players.items():
    print(f"\n{player_name} played {len(player_obj.rounds)} rounds:")
    for round_info in player_obj.rounds:
        print(f"- {round_info.tournament_name} (Net {round_info.net})")

In [None]:
print(f'players: {len(players)}, tournaments: {len(tournaments)}, teams: {len(teams)}')

print(f" calcutta_data players: {len(calcutta_data['players'])}, teams: {len(calcutta_data['teams'])}, tournaments: {len(calcutta_data['tournaments'])}")

# 6. Save Player and Tournaments to a file

In [None]:

import pickle

def save_data(path=golf_data_file):
    with open(path, "wb") as f:
        pickle.dump(calcutta_data, f)
    print("✅ Data saved successfully.")
    print(f"players: {len(calcutta_data['players'])}, teams: {len(calcutta_data['teams'])}, tournaments: {len(calcutta_data['tournaments'])}")

def load_data(path=golf_data_file):
    global calcutta_data, players, tournaments, teams
    with open(path, "rb") as f:
        loaded = pickle.load(f)

    calcutta_data["players"] = loaded.get("players", {})
    calcutta_data["tournaments"] = loaded.get("tournaments", {})
    calcutta_data["teams"] = loaded.get("teams", [])

    # Rebind the globals for convenience
    players = calcutta_data["players"]
    tournaments = calcutta_data["tournaments"]
    teams = calcutta_data["teams"]

    print("✅ Data loaded successfully.")

In [None]:
# Load existing data (populates calcutta_data globally)
load_data(golf_data_file)

# Load new teams using current players
new_teams = load_teams_from_excel(mm_team_file, calcutta_data["players"])

# Update calcutta_data directly
calcutta_data["teams"] = new_teams

# Save everything (players, tournaments, teams)
save_data(golf_data_file)

print(f"✅ Loaded {len(new_teams)} teams and updated player dictionary.")

In [None]:
# Save everything (players, tournaments, teams)
save_data(golf_data_file)


In [None]:
# --- Helper to find corrupted rounds ---

def find_corrupt_rounds():
    bad_rounds = []
    player_list = players.values() if isinstance(players, dict) else players

    for player in player_list:
        for rnd in player.rounds:
            for score in rnd.hole_scores:
                if not isinstance(score, (int, float)):
                    try:
                        float(score)
                    except (ValueError, TypeError):
                        bad_rounds.append((player.name, score))
    return bad_rounds

# Example usage
corrupt = find_corrupt_rounds()
for player_name, bad_score in corrupt:
    print(f"Corrupt score '{bad_score}' found for player {player_name}")

In [None]:


# --- Helper to Print Sections of Player Data ---

def inspect_player_rounds(start_idx=0, num_players=5):
    """
    Print sections of player data to manually inspect hole_scores.
    Args:
        players: dict or list of Player objects
        start_idx: starting index in the player list
        num_players: how many players to show
    """
    player_list = list(players.values()) if isinstance(players, dict) else list(players)
    
    for i, player in enumerate(player_list[start_idx:start_idx+num_players], start=start_idx):
        print(f"\n=== Player {i}: {player.name} ===")
        for j, round_info in enumerate(player.rounds):
            print(f"  Round {j}: Tournament={round_info.tournament_name}, RoundNum={round_info.round_number}, Handicap={round_info.handicap}")
            print(f"    Hole Scores: {round_info.hole_scores}")
            
            # Check if any gross score is suspicious
            for k, score in enumerate(round_info.hole_scores):
                try:
                    _ = float(score)
                except (ValueError, TypeError):
                    print(f"    ⚠️ Suspicious score at hole {k+1}: '{score}'")



In [None]:
def inspect_player_by_name(search_name):
    """
    Print round details for a specific player by name.
    Args:
        players: dict of Player objects
        search_name: name or partial name of the player (case insensitive)
    """
    matches = []
    for key, value in players.items():
        try:
            if isinstance(value, Player) and search_name.lower() in value.name.lower():
                matches.append(value)
        except Exception as e:
            print(f"⚠️ Skipping corrupted entry in players: {key} ({type(value)}): {e}")

    if not matches:
        print(f"❌ No players found matching '{search_name}'.")
        return

    for player in matches:
        print(f"\n=== Player: {player.name} ===")
        for j, round_info in enumerate(player.rounds):
            print(f"  Round {j}: Tournament={round_info.tournament_name}, RoundNum={round_info.round_number}, Handicap={round_info.handicap}, Date={round_info.date}, Duplicate={round_info.duplicate}")
            print(f"    Tee: {round_info.tee}")
            print(f"    Hole Scores: {round_info.hole_scores}")
            print(f"    Gross: {round_info.total}, Net: {round_info.net}")

In [None]:
inspect_player_by_name("Chris Taylor")

In [None]:
def check_for_invalid_players():
    print("\n🔍 Scanning for invalid player entries...")
    for key, value in players.items():
        if not isinstance(value, Player):
            print(f"⚠️ Invalid entry: key={key} | type={type(value)} | value={value}")

In [None]:
check_for_invalid_players()

In [None]:
# See first 5 players
inspect_player_rounds(start_idx=0, num_players=5)

# See players 10–15
#inspect_player_rounds(players, start_idx=10, num_players=5)

# See players 50–60
#inspect_player_rounds(players, start_idx=50, num_players=10)

In [None]:
def mark_duplicate_individual_rounds():
    print("🔍 Scanning for duplicate 'Individual' rounds...")
    for player in players.values():
        seen_scores = {}
        for round_info in player.rounds:
            key = tuple(round_info.hole_scores)
            if key in seen_scores:
                # If one is 'Individual' and the other is not, mark the 'Individual' one as duplicate
                existing_round = seen_scores[key]
                if round_info.tournament_name.lower() == "individual" and existing_round.tournament_name.lower() != "individual":
                    round_info.duplicate = True
                elif existing_round.tournament_name.lower() == "individual" and round_info.tournament_name.lower() != "individual":
                    existing_round.duplicate = True
            else:
                seen_scores[key] = round_info

    print("✅ Duplicate marking complete.")


In [None]:
mark_duplicate_individual_rounds()

In [None]:
# --- Find and Print Hole-by-Hole Gross Score Outliers ---

def find_hole_score_outliers(max_hole_score=12):
    """
    Scan all players and print any hole scores that exceed max_hole_score.

    Args:
        players: dict or list of Player objects
        max_hole_score: maximum reasonable gross score allowed for a single hole
    """
    player_list = players.values() if isinstance(players, dict) else players
    outliers = []

    for player in player_list:
        for rnd in player.rounds:
            for hole_idx, score in enumerate(rnd.hole_scores):
                try:
                    gross = float(score)
                except (ValueError, TypeError):
                    gross = None

                if gross is not None and gross > max_hole_score:
                    outliers.append((
                        player.name, 
                        rnd.tournament_name, 
                        rnd.round_number, 
                        hole_idx + 1,  # 1-indexed hole number
                        gross
                    ))

    if not outliers:
        print(f"✅ No hole scores exceeded {max_hole_score} strokes.")
        return

    print(f"\n🚨 Found {len(outliers)} outlier hole scores exceeding {max_hole_score} strokes:\n")
    print(f"{'Player Name':<25} {'Tournament':<30} {'Round':<6} {'Hole':<5} {'Gross Score':>8}")
    print("-" * 80)

    for name, tournament, round_num, hole_num, gross_score in outliers:
        print(f"{name:<25} {tournament:<30} {round_num:<6} {hole_num:<5} {gross_score:>8.1f}")

# --- Example Usage ---

find_hole_score_outliers(max_hole_score=12)

In [None]:
def load_teams_from_excel(xlsx_path):
    import pandas as pd
    df = pd.read_excel(xlsx_path, usecols=[1], header=None)  # Column 1 (second column)
    df.dropna(inplace=True)

    teams.clear()  # Clear any existing teams first
    for i in range(0, len(df), 2):
        try:
            name1 = str(df.iloc[i, 0]).strip()
            name2 = str(df.iloc[i + 1, 0]).strip()

            player1 = players.get(name1, Player(name1))
            player2 = players.get(name2, Player(name2))

            players.setdefault(name1, player1)
            players.setdefault(name2, player2)

            team_name = f"{name1} / {name2}"
            team = Team(team_name, [player1, player2])
            teams.append(team)

        except Exception as e:
            print(f"⚠️ Error loading team at rows {i + 2}–{i + 3}: {e}")

In [None]:
load_teams_from_excel(mm_team_file)
print(f'teams: {len(teams)}')

In [None]:
valid_combos = {
    ('I', 72.8, 133),
    ('I/II', 71.7, 131),
    ('II', 70.5, 127),
    ('II', 70.5, 128),   # 👈 NEWLY ADDED
    ('II/III', 68.1, 126),
    ('III', 66.9, 119),
}
