# 1. Install Required Libraries
(Skip if already installed)

In [None]:
# Uncomment below if needed:
#!pip install pandas openpyxl

# 2. Define Data Classes

In [None]:
import pickle
import random
import numpy as np
import sys
from pathlib import Path
import importlib

# Ensure correct path
sys.path.append(str(Path().resolve().parent / "src"))

# Reload golf_classes FIRST
import golf_classes
importlib.reload(golf_classes)

# THEN import the class definitions
from golf_classes import Player, PlayerRoundInfo, Tournament, Round, MMTeam, CTeam

# Reload other modules AFTER class objects are clean
import golf_utils
importlib.reload(golf_utils)
import golf_scoring
importlib.reload(golf_scoring)

# THEN import your functions
from golf_utils import get_player_by_name, get_close_player_matches, rebind_team_players



In [None]:

from pathlib import Path

data_dir = Path().resolve().parent / "Data"


golf_player_data_file = data_dir / "golf_player_data.pkl"
golf_team_data_file = data_dir / "golf_team_data.pkl"

# Paths relative to project root
mm_team_xls_file = data_dir / "MMTeamData.xlsx"
c_team_xls_file = data_dir / "CTeamData.xlsx"
path_2023 = data_dir / "2023_cleaned_events_fixed_V2.xlsx"

path_2024_posted = data_dir / "Cleaned_Posted_Report_2024.xlsx"
path_2025_posted = data_dir / "Cleaned_Posted_Report_2025.xlsx"




In [None]:
# File: calcutta_data_players.pkl
player_data = {
    "players": {},       # str -> Player
    "tournaments": {}    # str -> Tournament
}

# File: calcutta_data_teams.pkl
team_data = {
    "mm_teams": {},      # str -> MMTeam
    "c_teams": {}        # str -> CTeam
}



players = player_data["players"]
tournaments = player_data["tournaments"]
mm_teams = team_data["mm_teams"]
c_teams = team_data["c_teams"]

# 3. Load Cleaned Excel Files

In [None]:

import pandas as pd


sheets_2023 = pd.ExcelFile(path_2023).sheet_names
#sheets_2024 = pd.ExcelFile(path_2024).sheet_names
#sheets_2024_ind = pd.ExcelFile(path_2024_ind).sheet_names
#sheets_2025_ind = pd.ExcelFile(path_2025_ind).sheet_names


In [None]:

# --- Course Setup ---
hole_handicap_ratings = [5, 13, 17, 3, 11, 9, 1, 15, 7, 10, 6, 18, 14, 2, 16, 4, 12, 8]
hole_pars = [4, 4, 3, 4, 3, 4, 4, 5, 4, 4, 4, 3, 5, 4, 3, 4, 5, 4]



# --- Compute strokes received per hole ---
def strokes_received_per_hole(player_handicap):
    strokes = [0] * 18
    for i in range(18):
        hcap = hole_handicap_ratings[i]
        if player_handicap >= hcap:
            strokes[i] += 1
        if player_handicap > 18 and player_handicap >= hcap + 18:
            strokes[i] += 1
    return strokes

# 4. Build Player and Tournament Structures

In [None]:

def get_or_create_player(player_name):
    global players, tournaments, mm_teams
    if player_name not in players:
        players[player_name] = Player(player_name)
    return players[player_name]

def check_for_invalid_players():
    global players, tournaments, mm_team_xls_fileteams
    print("\n🔍 Scanning for invalid player entries...")
    for key, value in players.items():
        if not isinstance(value, Player):
            print(f"⚠️ Invalid entry: key={key} | type={type(value)} | value={value}")

def load_event(file_path, sheet_name, year, verbose=False):
    import pandas as pd
    import re

    global players, tournaments, mm_teams
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    df.columns = df.columns.map(str)  # Normalize all column headers to strings

    # Temporarily defer tournament name to use per-row event name
    # tournament creation moved to after round_obj assignment
    # round_obj creation moved to after tournament_name is determined from rows

    # --- Detect gross column ---
    gross_col = next((col for col in df.columns if col.strip().lower() in ['total', 'gross', 'gross score']), None)
    if gross_col is None:
        raise ValueError(f"❌ Could not find a 'Gross' column in sheet: {sheet_name}")

    # --- Determine hole column naming pattern ---
    hole_cols = []

    # Try exact numeric strings: "1", "2", ..., "18"
    numeric_holes = [str(i) for i in range(1, 19)]
    if all(h in df.columns for h in numeric_holes):
        hole_cols = numeric_holes
    # Try "Hole1" through "Hole18"
    elif all(f"Hole{i}" in df.columns for i in range(1, 19)):
        hole_cols = [f"Hole{i}" for i in range(1, 19)]
    # Fallback: match any columns ending in 1–18
    else:
        hole_cols = [
            col for col in df.columns
            if re.fullmatch(r"(Hole)?[1-9]|1[0-8]", col.strip())
        ]

    # --- Identify score type by comparing sum to reported gross ---
    first_data_row = df.iloc[1] if df.iloc[0].isnull().all() else df.iloc[0]
    try:
        hole_scores_first = [int(first_data_row[col]) for col in hole_cols]
    except (ValueError, TypeError):
        print(f"❌ Invalid hole score in first row of sheet: {sheet_name}. Skipping this sheet.")
        return

    total_hole_score_first = sum(hole_scores_first)
    reported_gross = first_data_row[gross_col]
    scores_are_net = abs(total_hole_score_first - reported_gross) > 1e-3

    if verbose:
        score_format = 'NET' if scores_are_net else 'GROSS'
        print(f"📄 First Row in '{sheet_name}':")
        print(f"Player: {first_data_row['Player']}, Handicap: {first_data_row['Handicap']}, Tee: {first_data_row['Tee']}")
        print(f"Hole Columns: {hole_cols}")
        print(f"Hole Scores: {hole_scores_first}")
        print(f"Sum of Hole Scores: {total_hole_score_first}")
        print(f"Reported Gross: {reported_gross}")
        print(f"📄 Loaded '{sheet_name}' ({year}) as {score_format} hole-by-hole scoring using column '{gross_col}'")

    issue_count = 0
    round_obj = None  # Will initialize after reading first valid row
    tournament = None
    for idx, row in df.iterrows():
        try:
            event_name = row.get('Event')
            if not event_name:
                raise ValueError("Missing event name in 'Event' column.")

            if str(row['Handicap']).strip().upper() == "NH":
                print(f"⚠️ Skipping row with NH handicap in sheet '{sheet_name}', row {idx + 2} (Excel row number).\n  Row content: {row.to_dict()}")
                continue

            if tournament is None:
                tournament_name = f"{event_name}"
                tournament = Tournament(tournament_name)
                round_obj = Round(tournament_name, round_number=1)
            handicap = int(row['Handicap']) if pd.notnull(row['Handicap']) else None
            tee = row['Tee']
            date=pd.to_datetime(row['Date']).date() if 'Date' in row and pd.notnull(row['Date']) else None
            cr = float(row['CR']) if 'CR' in row and pd.notnull(row['CR']) else None
            sr = int(row['SR']) if 'SR' in row and pd.notnull(row['SR']) else None
            index = float(row['Index']) if 'Index' in row and pd.notnull(row['Index']) else None
            gross = int(row[gross_col]) if pd.notnull(row[gross_col]) else None
            raw_hole_scores = [int(row[col]) for col in hole_cols]

            if scores_are_net:
                strokes = strokes_received_per_hole(handicap)
                adjusted_hole_scores = [raw_hole_scores[i] + strokes[i] for i in range(18)]
            else:
                adjusted_hole_scores = raw_hole_scores

            player = get_or_create_player(row['Player'])
            player_round = PlayerRoundInfo(
                player=player,
                tournament_name=tournament_name,
                tournament_flag=True,
                round_number=1,
                handicap=handicap,
                tee=tee,
                hole_scores=adjusted_hole_scores,
                total=gross,
                net=gross - handicap,
                index=index,
                cr=cr,
                sr=sr,
                date=date,
        
            )
            completed = all(score > 0 for score in raw_hole_scores)
            player_round.completed = completed
            
            player.rounds.append(player_round)
            round_obj.player_rounds.append(player_round)
        except Exception as e:
            print(f"❌ Aborting load: error in sheet '{sheet_name}', row {idx + 2} (Excel row number).\n  Error: {e}\n  Row content: {row.to_dict()}")

    if tournament and round_obj:
        tournament.rounds.append(round_obj)
        player_data["tournaments"][tournament.name] = tournament



In [None]:
def reset_all_data():
    global players, tournaments, mm_teams, c_teams
    players.clear()
    tournaments.clear()
    mm_teams.clear()
    c_teams.clear()
    print("🧹 All data has been reset.")

In [None]:
reset_all_data()

In [None]:
import re

def load_posted_rounds_into_model(file_path):
    df = pd.read_excel(file_path)
    tournament_name = "individual"
    round_obj = Round(tournament_name, round_number=1)
    tournament = Tournament(tournament_name)

    for idx, row in df.iterrows():
        if idx <= 0:
            continue

        # 👇 Clean up "Mr. " prefix from names
        raw_name = row['Golfer Name']
        name = re.sub(r"^Mr\.?\s+", "", str(raw_name)).strip()

        player = get_or_create_player(name)

        round_info = PlayerRoundInfo(
            player=player,
            tournament_name=tournament_name,
            tournament_flag=False,
            round_number=1,
            handicap=row['Course Handicap'],
            tee=None,
            hole_scores=[],
            total=row['AGS'],
            net=row['AGS'] - row['Course Handicap'],
            index=row['Handicap Index'],
            cr=row['Course Rating'],
            sr=row['Slope Rating'],
            date=pd.to_datetime(row['Date Played']).date() if pd.notnull(row['Date Played']) else None,
            course_played=row['Course Played']
        )
        round_info.completed = row['Holes Played'] == 18

        player.rounds.append(round_info)
        round_obj.player_rounds.append(round_info)

    tournament.rounds.append(round_obj)
    player_data['tournaments'][tournament.name] = tournament
    print(f"✅ Loaded {len(round_obj.player_rounds)} individual rounds into model.")


In [None]:
from datetime import datetime
from collections import defaultdict

def rename_tournaments_by_date(players, tournaments):
    """
    Update tournament names based on round dates and populate Tournament objects.

    Args:
        players (dict[str, Player]): Player objects with round data.
        tournaments (dict[str, Tournament]): Tournament dictionary to update.
    """
    replacements = {
        datetime(2024, 7, 10).date(): "July Stag 24",
        datetime(2024, 6, 12).date(): "US Open Stag 24",
        datetime(2024, 6, 7).date(): "24 mm Day 1 24",
        datetime(2024, 6, 8).date(): "24 mm Day 2 24",
        datetime(2024, 5, 8).date(): "Husky Stag 24",
        datetime(2024, 8, 23).date(): "Club Champ Friday 24",
        datetime(2024, 8, 24).date(): "Club Champ Saturday 24",
        datetime(2024, 9, 25).date(): "Trophy Stag 24",
        datetime(2024, 4, 13).date(): "Masters 24",
        datetime(2025, 4, 12).date(): "Masters 2025",
        datetime(2025, 5, 7).date(): "Husky Stag 2025",
    }

    counts = defaultdict(int)

    for player in players.values():
        for rnd in player.rounds:
            new_name = replacements.get(rnd.date)
            if new_name:
                rnd.tournament_name = new_name
                rnd.tournament_flag = True
                counts[new_name] += 1

                # Add or update the Tournament object
                if new_name not in tournaments:
                    tournaments[new_name] = Tournament(name=new_name)
                tournaments[new_name].rounds.append(rnd)

    print("\n📅 Updated Tournament Names and Populated Tournaments:")
    for tournament_name, count in counts.items():
        print(f"  {tournament_name}: {count} rounds")


In [None]:
from collections import defaultdict

def load_teams_from_excel(xlsx_path):
    import pandas as pd
    global mm_teams, players
    print(f"number of players : {len(players)}")
    def normalize_name(name):
        if pd.isna(name):
            return None
        parts = [part.strip() for part in str(name).split(",")]
        return f"{parts[1]} {parts[0]}" if len(parts) == 2 else name.strip()

    df = pd.read_excel(xlsx_path, header=None)
    df.dropna(inplace=True)

    mm_teams.clear()

    for i in range(0, len(df) - 1, 2):
        player1_name = None
        player2_name = None
        try:
            player1_name = normalize_name(df.iloc[i, 0])
            player2_name = normalize_name(df.iloc[i + 1, 0])

            if not player1_name or not player2_name:
                raise ValueError("Missing player name")

            player1 = players.get(player1_name)
            player2 = players.get(player2_name)

            if not player1 or not player2:
                raise ValueError("Missing player object(s)")

            team_name = f"{player1_name}/{player2_name}"
            mm_teams[team_name] = MMTeam(name=team_name, player1=player1, player2=player2)

        except Exception as e:
            print(f"⚠️ Error loading team at rows {i}-{i+1}: {e}. player1: {player1_name}, player2: {player2_name}")







In [None]:
def load_c_teams_from_excel(file_path):
    import pandas as pd

    df = pd.read_excel(file_path, header=0)

    c_teams.clear()
    mm_teams.clear()

    for idx, row in df.iterrows():
        try:
            c_team_name = str(row.iloc[0])  # Column A: Team name
            mm_team_objs = []

            # These are the (player1_col, player2_col) index pairs based on file inspection
            col_indices = [(1, 3), (5, 7), (9, 11)]

            for i, (col_p1, col_p2) in enumerate(col_indices):
                player1_name = row.iloc[col_p1] if col_p1 is not None else None
                player2_name = row.iloc[col_p2] if col_p2 is not None else None

                if not isinstance(player1_name, str) or not isinstance(player2_name, str):
                    raise ValueError("Missing or invalid player name")

                player1 = get_player_by_name(player1_name, players, fuzzy=True)
                player2 = get_player_by_name(player2_name, players, fuzzy=True)

                if not player1 or not player2:
                    raise ValueError(f"Missing player object(s): {player1_name}, {player2_name}")

                mm_team_name = f"{player1_name}/{player2_name}"
                mm_team = MMTeam(name=mm_team_name, player1=player1, player2=player2)
                mm_teams[mm_team_name] = mm_team
                mm_team_objs.append(mm_team)

            c_teams[c_team_name] = CTeam(name=c_team_name, mm_teams=mm_team_objs)

        except Exception as e:
            print(f"⚠️ Error on row {idx + 2}: {e}")

    print(f"✅ Loaded {len(c_teams)} CTeams and {len(mm_teams)} MMTeams from file.")




# 5. Load All Events

In [None]:

reset_all_data()

# Load all 2023 events
for sheet in sheets_2023:
    load_event(path_2023, sheet, year=2023)


load_posted_rounds_into_model(path_2025_posted)   
load_posted_rounds_into_model(path_2024_posted)
  

rename_tournaments_by_date(player_data["players"],player_data["tournaments"])
load_c_teams_from_excel(c_team_xls_file)



In [None]:
print(f" players : {len(players)}, tournaments : {len(tournaments)}")
print(f" mm teams : {len(mm_teams)}, c teams : {len(c_teams)}")

In [None]:

def check_unmatched_cteam_players(file_path, players):
    import pandas as pd
    from collections import defaultdict
    from difflib import get_close_matches

    # Read the file
    df = pd.read_excel(file_path, header=0)

    # Use exact column indices if known
    player_columns = [1, 3, 5, 7, 9, 11]

    # Collect all player names from those columns
    names_in_sheet = set()
    for col in player_columns:
        if col < len(df.columns):
            col_names = df.iloc[:, col].dropna().astype(str).tolist()
            names_in_sheet.update(col_names)

    print(f"✅ Found {len(names_in_sheet)} unique player names in columns {player_columns}")

    # Build normalized lookup structure for fuzzy suggestions
    player_names = list(players.keys())
    players_by_last_name = defaultdict(list)
    for full_name in player_names:
        parts = full_name.strip().split()
        if len(parts) >= 2:
            last_name = parts[-1].lower()
            players_by_last_name[last_name].append(full_name)

    # Identify unmatched names using get_player_by_name()
    not_found_report = []
    for name in names_in_sheet:
        match = get_player_by_name(name, players, fuzzy=False)
        if match:
            continue  # found via normalized match

        # If not found, offer suggestions
        parts = name.strip().split()
        if len(parts) >= 2:
            last_name = parts[-1].lower()
            suggestions = players_by_last_name.get(last_name, [])
            if not suggestions:
                suggestions = get_close_matches(name, player_names, n=3, cutoff=0.7)
        else:
            suggestions = get_close_matches(name, player_names, n=3, cutoff=0.7)

        not_found_report.append((name, suggestions))

    return pd.DataFrame(not_found_report, columns=["Unmatched Name", "Suggestions"])



In [None]:
# Usage example (you must define your actual `players` dict beforehand):
file_path = "Data/CTeamData.xlsx"  # <-- adjust this path if needed
report_df = check_unmatched_cteam_players(file_path, players)
display(report_df)

In [None]:
reset_all_data()

# Load all 2023 events
for sheet in sheets_2023:
    load_event(path_2023, sheet, year=2023)

load_posted_rounds_into_model(path_2025_posted)   
load_posted_rounds_into_model(path_2024_posted)  
report_df = check_unmatched_cteam_players(c_team_xls_file, player_data["players"])
display(report_df)


In [None]:

# List all players
print(f"Loaded {len(players)} players.")
print(list(players.keys())[:10])  # Show first 10 players

# List all tournaments
print(f"Loaded {len(tournaments)} tournaments.")
print(list(tournaments.keys())[:5])

# Loop through all players and print their name and all rounds they played
for player_name, player_obj in players.items():
    print(f"\n{player_name} played {len(player_obj.rounds)} rounds:")
    for round_info in player_obj.rounds:
        print(f"- {round_info.tournament_name} (Net {round_info.net})")

# 6. Save Player and Tournaments to a file

In [None]:
from golf_utils import save_pickle, load_pickle

# Save data
save_pickle(player_data, golf_player_data_file)
save_pickle(team_data, golf_team_data_file)


In [None]:
reset_all_data()

# Load data
player_data = load_pickle(golf_player_data_file) or {"players": {}, "tournaments": {}}
team_data = load_pickle(golf_team_data_file) or {"mm_teams": {}, "c_teams": {}}

# Access individual components
players = player_data["players"]
tournaments = player_data["tournaments"]
mm_teams = team_data["mm_teams"]
c_teams = team_data["c_teams"]

In [None]:


# Load new teams using current players
new_teams = load_teams_from_excel(mm_team_file, player_data["players"])

# Update calcutta_data directly
team_data["mm_teams"] = new_teams

print(f"✅ Loaded {len(new_teams)} mm_teams and updated player dictionary.")

In [None]:
# --- Helper to find corrupted rounds ---

def find_corrupt_rounds():
    bad_rounds = []
    player_list = players.values() if isinstance(players, dict) else players

    for player in player_list:
        for rnd in player.rounds:
            for score in rnd.hole_scores:
                if not isinstance(score, (int, float)):
                    try:
                        float(score)
                    except (ValueError, TypeError):
                        bad_rounds.append((player.name, score))
    return bad_rounds

# Example usage
corrupt = find_corrupt_rounds()
for player_name, bad_score in corrupt:
    print(f"Corrupt score '{bad_score}' found for player {player_name}")

In [None]:


# --- Helper to Print Sections of Player Data ---

def inspect_player_rounds(start_idx=0, num_players=5):
    """
    Print sections of player data to manually inspect hole_scores.
    Args:
        players: dict or list of Player objects
        start_idx: starting index in the player list
        num_players: how many players to show
    """
    player_list = list(players.values()) if isinstance(players, dict) else list(players)
    
    for i, player in enumerate(player_list[start_idx:start_idx+num_players], start=start_idx):
        print(f"\n=== Player {i}: {player.name} ===")
        for j, round_info in enumerate(player.rounds):
            print(f"  Round {j}: Tournament={round_info.tournament_name}, RoundNum={round_info.round_number}, Handicap={round_info.handicap}")
            print(f"    Hole Scores: {round_info.hole_scores}")
            
            # Check if any gross score is suspicious
            for k, score in enumerate(round_info.hole_scores):
                try:
                    _ = float(score)
                except (ValueError, TypeError):
                    print(f"    ⚠️ Suspicious score at hole {k+1}: '{score}'")



In [None]:

from datetime import datetime, date

def inspect_player_by_name(search_name):
    """
    Print round details for a specific player by name.
    Args:
        players: dict of Player objects
        search_name: name or partial name of the player (case insensitive)
    """
    hsd = datetime(2025, 5, 7).date()

    matches = []
    for key, value in players.items():
        try:
            if isinstance(value, Player) and search_name.lower() in value.name.lower():
                matches.append(value)
        except Exception as e:
            print(f"⚠️ Skipping corrupted entry in players: {key} ({type(value)}): {e}")

    if not matches:
        print(f"❌ No players found matching '{search_name}'.")
        return

    for player in matches:
        print(f"\n=== Player: {player.name} ===")
        for j, round_info in enumerate(player.rounds):
             #if(round_info.date == hsd):
                print(f"  Round {j}: Tournament={round_info.tournament_name}, RoundNum={round_info.round_number}, Handicap={round_info.handicap}, Date={round_info.date}, Completed={round_info.completed}, Duplicate={round_info.duplicate}")
                print(f"    Tee: {round_info.tee}")
                print(f"    Hole Scores: {round_info.hole_scores}")
                print(f"    Gross: {round_info.total}, Net: {round_info.net}")

In [None]:
inspect_player_by_name("Jim Ridgeway")

In [None]:
matches = get_close_player_matches("Bob Berry", players)

if matches:
    print("🔍 Close matches:")
    for score, name, player in matches:
        print(f"  {name} (score={score:.2f}) — {player.name}")
else:
    print("❌ No close matches found.")



In [None]:
def check_for_invalid_players():
    print("\n🔍 Scanning for invalid player entries...")
    for key, value in players.items():
        if not isinstance(value, Player):
            print(f"⚠️ Invalid entry: key={key} | type={type(value)} | value={value}")

In [None]:
check_for_invalid_players()

In [None]:
# See first 5 players
inspect_player_rounds(start_idx=0, num_players=5)

# See players 10–15
#inspect_player_rounds(players, start_idx=10, num_players=5)

# See players 50–60
#inspect_player_rounds(players, start_idx=50, num_players=10)

In [None]:
# --- Find and Print Hole-by-Hole Gross Score Outliers ---

def find_hole_score_outliers(max_hole_score=12):
    """
    Scan all players and print any hole scores that exceed max_hole_score.

    Args:
        players: dict or list of Player objects
        max_hole_score: maximum reasonable gross score allowed for a single hole
    """
    player_list = players.values() if isinstance(players, dict) else players
    outliers = []

    for player in player_list:
        for rnd in player.rounds:
            for hole_idx, score in enumerate(rnd.hole_scores):
                try:
                    gross = float(score)
                except (ValueError, TypeError):
                    gross = None

                if gross is not None and gross > max_hole_score:
                    outliers.append((
                        player.name, 
                        rnd.tournament_name, 
                        rnd.round_number, 
                        hole_idx + 1,  # 1-indexed hole number
                        gross
                    ))

    if not outliers:
        print(f"✅ No hole scores exceeded {max_hole_score} strokes.")
        return

    print(f"\n🚨 Found {len(outliers)} outlier hole scores exceeding {max_hole_score} strokes:\n")
    print(f"{'Player Name':<25} {'Tournament':<30} {'Round':<6} {'Hole':<5} {'Gross Score':>8}")
    print("-" * 80)

    for name, tournament, round_num, hole_num, gross_score in outliers:
        print(f"{name:<25} {tournament:<30} {round_num:<6} {hole_num:<5} {gross_score:>8.1f}")

# --- Example Usage ---

find_hole_score_outliers(max_hole_score=12)

In [None]:
load_teams_from_excel(mm_team_xls_file)
print(f'teams: {len(player_data['mm_teams'])}')

In [None]:
valid_combos = {
    ('I', 72.8, 133),
    ('I/II', 71.7, 131),
    ('II', 70.5, 127),
    ('II', 70.5, 128),   # 👈 NEWLY ADDED
    ('II/III', 68.1, 126),
    ('III', 66.9, 119),
}
