# 📓 01_player_sampling_and_stats.ipynb
**Purpose:** Load player data, inspect it, and prepare for performance modeling.

In [None]:
import pickle
import random
import numpy as np
import sys
from pathlib import Path
import importlib
import pandas as pd

# Ensure correct path
sys.path.append(str(Path().resolve().parent / "src"))

# Reload golf_classes FIRST
import golf_classes
importlib.reload(golf_classes)

# THEN import the class definitions
from golf_classes import Player, PlayerRoundInfo, Tournament, Round, MMTeam, CTeam

# Reload other modules AFTER class objects are clean
import golf_utils
importlib.reload(golf_utils)
import golf_scoring
importlib.reload(golf_scoring)

# THEN import your functions
from golf_utils import get_player_by_name


from golf_scoring import compute_real_stableford, simulate_round,  stableford_points, calibrated_simulate_round, hole_pars

from golf_utils import save_pickle, load_pickle, compute_all_sandbag_factors, rebind_team_players, score_randomness_test, sandbag_report, print_top_sandbaggers


In [None]:
# File: calcutta_data_players.pkl
player_data = {
    "players": {},       # str -> Player
    "tournaments": {}    # str -> Tournament
}

# File: calcutta_data_teams.pkl
team_data = {
    "mm_teams": {},      # str -> MMTeam
    "c_teams": {}        # str -> CTeam
}



players = player_data["players"]
tournaments = player_data["tournaments"]
mm_teams = team_data["mm_teams"]
c_teams = team_data["c_teams"]

In [None]:

from pathlib import Path

data_dir = Path().resolve().parent / "Data"


golf_player_data_file = data_dir / "golf_player_data.pkl"
golf_team_data_file = data_dir / "golf_team_data.pkl"

In [None]:
def reset_all_data():
    global players, tournaments, mm_teams, c_teams
    players.clear()
    tournaments.clear()
    mm_teams.clear()
    c_teams.clear()
    print("🧹 All data has been reset.")

In [None]:
import os
print(os.path.exists(golf_player_data_file))


In [None]:
reset_all_data()

# Load data
player_data = load_pickle(golf_player_data_file) or {"players": {}, "tournaments": {}}
team_data = load_pickle(golf_team_data_file) or {"mm_teams": {}, "c_teams": {}}

# Access individual components
players = player_data["players"]
tournaments = player_data["tournaments"]
mm_teams = team_data["mm_teams"]
c_teams = team_data["c_teams"]

rebind_team_players(players, mm_teams, c_teams)

passCount, errorCount = compute_all_sandbag_factors(players,2,.5)

print(f"players : with SB Factors {passCount}  : without SB factors {errorCount}")

team_with_sbf = 0
team_without_sbf = 0

for team in mm_teams.values():
    team.compute_aggregate_sand_bag_factor()
    if team.players[0].sand_bag_factor is None or team.players[1].sand_bag_factor is None :
        team_without_sbf += 1
    else:
        team_with_sbf += 1
        
print(f"Loaded {len(mm_teams)} mm teams : with sbf {team_with_sbf}, without sbf {team_without_sbf} and {len(c_teams)} c teams")

print(f"Loaded {len(players)} players and {len(tournaments)} tournaments")

In [None]:
# --- Inspect Example Player ---
example_player = next(iter(players.values() if isinstance(players, dict) else players))

print(f"Example Player: {example_player.name}")
print(f"Total Rounds Recorded: {len(example_player.rounds)}")

# Print some details for the first few rounds
for i, round_info in enumerate(example_player.rounds[:5]):  # Show up to 5 rounds
    print(f" Round {i+1}:")
    print(f"  Tournament: {round_info.tournament_name}")
    print(f"  Round Number: {round_info.round_number}")
    print(f"  Handicap: {round_info.handicap}")
    print(f"  Tee: {round_info.tee}")
    print(f"  Net Score: {round_info.net}")
    print(f"  Total Gross Score: {round_info.total}")

In [None]:
# --- Statistics ---

# Make sure players is either a list or dictionary
player_list = players.values() if isinstance(players, dict) else players

# Number of players
num_players = len(player_list)

# Number of rounds per player
rounds_per_player = [len(player.rounds) for player in player_list]

# All net scores and valid handicaps across all players and rounds
all_net_scores = []
all_handicaps = []

for player in player_list:
    for round_info in player.rounds:
        all_net_scores.append(round_info.net)
        if pd.notnull(round_info.handicap):
            all_handicaps.append(round_info.handicap)

# Print statistics
print(f"\nStatistics:")
print(f"Total Players: {num_players}")
print(f"Average Rounds per Player: {np.mean(rounds_per_player):.2f}")
print(f"Min Rounds per Player: {np.min(rounds_per_player)}")
print(f"Max Rounds per Player: {np.max(rounds_per_player)}")

valid_net_scores = [s for s in all_net_scores if s is not None and isinstance(s, (int, float))]
if valid_net_scores:
    print(f"\nNet Score Statistics:")
    print(f"  Average Net Score: {np.mean(valid_net_scores):.2f}")
    print(f"  Net Score Range: {np.min(valid_net_scores)} to {np.max(valid_net_scores)}")

valid_handicaps = [h for h in all_handicaps if h is not None and isinstance(h, (int, float))]
if valid_handicaps:
    print(f"\nHandicap Statistics:")
    print(f"  Average Handicap: {np.mean(valid_handicaps):.2f}")
    print(f"  Handicap Range: {np.min(valid_handicaps)} to {np.max(valid_handicaps)}")
else:
    print("\n⚠️ Warning: No valid handicaps found.")


In [None]:

import matplotlib.pyplot as plt
# --- Plot Net Score Distribution ---
plt.hist(valid_net_scores, bins=30, edgecolor='black')
plt.title('Distribution of Net Scores (All Players)')
plt.xlabel('Net Score')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

# --- Plot Handicap Distribution ---
plt.hist(valid_handicaps, bins=range(int(min(all_handicaps)) - 1, int(max(all_handicaps)) + 2), edgecolor='black')
plt.title('Distribution of Handicaps (All Players)')
plt.xlabel('Handicap')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

In [None]:
# --- Optional: Plot rounds per player ---
plt.hist(rounds_per_player, bins=range(0, max(rounds_per_player)+5, 2), edgecolor='black')
plt.title('Distribution of Rounds per Player')
plt.xlabel('Number of Rounds')
plt.ylabel('Number of Players')
plt.grid(True)
plt.show()

In [None]:
# --- Ready for Sampling Model ---

def sample_score_simple(player):
    return random.choice(player.historical_scores)

def sample_score_weighted(player, decay_factor=0.9):
    scores = player.historical_scores
    n = len(scores)
    weights = [decay_factor ** (n - i - 1) for i in range(n)]
    return random.choices(scores, weights=weights, k=1)[0]

In [None]:
# --- Test Sampling ---
print("\n🧪 Sampling Test:")
print(f"Simple Sample for {example_player.name}: {sample_score_simple(example_player)}")
print(f"Weighted Sample for {example_player.name}: {sample_score_weighted(example_player)}")

In [None]:
# --- Save player names (optional) ---
player_names = [p.name for p in (players.values() if isinstance(players, dict) else players)]

In [None]:
def inspect_player_net_stats(player_name):
    import numpy as np

    player = players.get(player_name)
    if not player:
        print(f"❌ Player '{player_name}' not found.")
        return

    indiv_nets = [
        r.net for r in player.rounds
        if r.tournament_name.lower() == "individual" and r.completed and isinstance(r.net, (int, float))
    ]
    tour_nets = [
        r.net for r in player.rounds
        if r.tournament_name.lower() != "individual" and r.completed and isinstance(r.net, (int, float))
    ]

    print(f"\n📋 Net Score Summary for {player.name}")
    print(f"  ✅ Completed Individual Rounds: {len(indiv_nets)}")
    print(f"  ✅ Completed Tournament Rounds: {len(tour_nets)}")

    if indiv_nets:
        avg_indiv = np.mean(indiv_nets)
        print(f"  🧍 Avg Individual Net: {avg_indiv:.2f}")
    else:
        avg_indiv = None
        print("  🧍 No individual net scores found.")

    if tour_nets:
        avg_tour = np.mean(tour_nets)
        print(f"  🏌️ Avg Tournament Net: {avg_tour:.2f}")
    else:
        avg_tour = None
        print("  🏌️ No tournament net scores found.")

    if avg_indiv is not None and avg_tour is not None:
        print(f"  🔁 Difference (Individual - Tournament): {avg_indiv - avg_tour:.2f}")

In [None]:
inspect_player_net_stats("Jim Ridgeway")

In [None]:
def compare_net_stats_individual_vs_tournament(top_n=20, min_rounds=2):
    import numpy as np

    player_diffs = []
    all_avg_tour = []
    all_avg_indiv = []

    for player in players.values():
        indiv_nets = [
            r.net for r in player.rounds
            if r.tournament_flag == False and r.completed and isinstance(r.net, (int, float))
        ]
        tour_nets = [
            r.net for r in player.rounds
            if r.tournament_flag == True and r.completed and isinstance(r.net, (int, float))
        ]

        if len(indiv_nets) >= min_rounds and len(tour_nets) >= min_rounds:
            avg_indiv = np.mean(indiv_nets)
            avg_tour = np.mean(tour_nets)
            diff = avg_indiv - avg_tour
            all_avg_tour.append(avg_tour)
            all_avg_indiv.append(avg_indiv)
            player_diffs.append((player.name, avg_tour, avg_indiv, diff, len(tour_nets), len(indiv_nets)))

    # Sort by absolute difference, descending
    player_diffs.sort(key=lambda x: x[3], reverse=True)

    # Print
    header = "{:<5} {:<25} {:>15} {:>15} {:>12} {:>10} {:>10}".format(
        "Rank", "Player", "Avg Tournament", "Avg Individual", "Difference", "Tour Rds", "Indiv Rds"
    )
    print(f"\n📊 Top {top_n} Players with Largest Net Score Differences (Min {min_rounds} Rounds Each):\n")
    print(header)
    print("-" * len(header))

    for i, (name, avg_tour, avg_indiv, diff, tour_ct, indiv_ct) in enumerate(player_diffs[:top_n], start=1):
        print("{:<5} {:<25} {:>15.2f} {:>15.2f} {:>12.2f} {:>10} {:>10}".format(
            i, name, avg_tour, avg_indiv, diff, tour_ct, indiv_ct
        ))

In [None]:
# --- Example Usage ---
print(f"players: {len(players)}, teams: {len(mm_teams)}, tournaments: {len(tournaments)}")

total_players = len(players)
compare_net_stats_individual_vs_tournament(top_n=50, min_rounds=3)

In [None]:
def calculate_and_print_player_net_stats(top_n=20, min_rounds=5):
    import numpy as np

    player_stats = []
    player_list = players.values() if isinstance(players, dict) else players

    for player in player_list:
        net_scores = [
            r.net for r in player.rounds
            if r.net is not None and isinstance(r.net, (int, float)) and getattr(r, "completed", False)
        ]
        if len(net_scores) >= min_rounds:
            avg_net = np.mean(net_scores)
            std_net = np.std(net_scores)
            player_stats.append((player.name, avg_net, std_net, len(net_scores)))

    player_stats.sort(key=lambda x: x[1])

    header = "{:<5} {:<25} {:>15} {:>12} {:>10}".format("Rank", "Player Name", "Avg Net Score", "Std Dev", "Rounds")
    print(f"\n🏌️ Top {top_n} Players by Average Net Score (Minimum {min_rounds} Completed Rounds):\n")
    print(header)
    print("-" * len(header))

    for idx, (name, avg_score, std_dev, num_rounds) in enumerate(player_stats[:top_n], start=1):
        row = "{:<5} {:<25} {:>15.2f} {:>12.2f} {:>10}".format(idx, name, avg_score, std_dev, num_rounds)
        print(row)

In [None]:
def print_cteam_sbf_summary(cteam):
    print(f"\n🟦 CTeam: {cteam.name}")
    for i, mm_team in enumerate(cteam.mm_teams, 1):
        print(f"  ├─ MMTeam {i}: {mm_team.name}")
        for player in mm_team.players:
            sbf = player.sand_bag_factor
            sbf_str = f"{sbf:.2f}" if sbf is not None else "None"
            print(f"  │   ├─ Player: {player.name}, SBF: {sbf_str}")
        agg_sbf = mm_team.aggregate_sand_bag_factor
        agg_str = f"{agg_sbf:.2f}" if agg_sbf is not None else "None"
        print(f"  │   └─ MMTeam Aggregate SBF: {agg_str}")
    agg_cteam = cteam.aggregate_sand_bag_factor
    agg_cteam_str = f"{agg_cteam:.2f}" if agg_cteam is not None else "None"
    print(f"  └─ CTeam Aggregate SBF (best 2 MMTeams): {agg_cteam_str}")


In [None]:

# --- Example Usage ---
print(f"players: {len(players)}, teams: {len(mm_teams)}, tournaments: {len(tournaments)}")
calculate_and_print_player_net_stats(top_n=50, min_rounds=4)

In [None]:
def rank_cteams_by_sandbag_factor(c_teams, top_n=10):
    """
    Compute and rank CTeams by their aggregate sand_bag_factor.

    Args:
        c_teams (dict[str, CTeam]): Mapping of CTeam names to CTeam objects.
        top_n (int): Number of teams to print (sorted by lowest sand_bag_factor).
    """
    # Step 1: Compute all MMTeam sandbag factors
    for cteam in c_teams.values():
        for mm_team in cteam.mm_teams:
            mm_team.compute_aggregate_sand_bag_factor()
            #print(f"team: {mm_team.name} team sb : {mm_team.aggregate_sand_bag_factor}, player1 {mm_team.players[0].name} : sb factor {mm_team.players[0].sand_bag_factor}")

    # Step 2: Compute CTeam aggregate scores
    for cteam in c_teams.values():
        cteam.compute_aggregate_score()
        if cteam.aggregate_sand_bag_factor is None:
           print(f"team: {cteam.name} team sb : None") 

    # Step 3: Filter teams with valid aggregate values
    ranked = [cteam for cteam in c_teams.values() if cteam.aggregate_sand_bag_factor is not None]

    # Step 4: Sort by sand_bag_factor (lower is better — more sandbaggy)
    ranked.sort(key=lambda x: x.aggregate_sand_bag_factor)

    # Step 5: Display
    print(f"\n🏆 Top {min(top_n, len(ranked))} CTeams by Sandbag Factor:")
    for i, team in enumerate(ranked[:top_n], start=1):
        print_cteam_sbf_summary(team)


In [None]:
def rank_mmteams_by_sandbag_factor(c_teams, top_n=10):
    """
    Compute and rank CTeams by their aggregate sand_bag_factor.

    Args:
        c_teams (dict[str, CTeam]): Mapping of CTeam names to CTeam objects.
        top_n (int): Number of teams to print (sorted by lowest sand_bag_factor).
    """
    # Step 1: Compute all MMTeam sandbag factors
    for cteam in c_teams.values():
        for mm_team in cteam.mm_teams:
            mm_team.compute_aggregate_sand_bag_factor()
            #print(f"team: {mm_team.name} team sb : {mm_team.aggregate_sand_bag_factor}, player1 {mm_team.players[0].name} : sb factor {mm_team.players[0].sand_bag_factor}")

    # Step 2: Compute CTeam aggregate scores
    for cteam in c_teams.values():
        cteam.compute_aggregate_score()
        if cteam.aggregate_sand_bag_factor is None:
           print(f"team: {cteam.name} team sb : None") 

    # Step 3: Filter teams with valid aggregate values
    ranked = [cteam for cteam in c_teams.values() if cteam.aggregate_sand_bag_factor is not None]

    # Step 4: Sort by sand_bag_factor (lower is better — more sandbaggy)
    ranked.sort(key=lambda x: x.aggregate_sand_bag_factor)

    # Step 5: Display
    print(f"\n🏆 Top {min(top_n, len(ranked))} CTeams by Sandbag Factor:")
    for i, team in enumerate(ranked[:top_n], start=1):
        print_cteam_sbf_summary(team)


In [None]:
print(f"Loaded {len(mm_teams)} mm teams and {len(c_teams)} c teams")
print(f"Loaded {len(players)} players and {len(tournaments)} tournaments")

rank_cteams_by_sandbag_factor(c_teams, top_n=20)


In [None]:
valid_players = [p for p in players.values() if p.sand_bag_factor is not None]
print(f"✅ {len(valid_players)} players have valid sand_bag_factor")

valid_mm_teams = [m for m in mm_teams.values() if m.aggregate_sand_bag_factor is not None]
print(f"✅ {len(valid_mm_teams)} MMTeams have valid aggregate_sand_bag_factor")

valid_c_teams = [c for c in c_teams.values() if c.aggregate_sand_bag_factor is not None]
print(f"✅ {len(valid_c_teams)} CTeams have valid aggregate_sand_bag_factor")




In [None]:
for name, mm in mm_teams.items():
    p1, p2 = mm.players
    if p1.sand_bag_factor is None or p2.sand_bag_factor is None:
        print(f"❌ {name}: {p1.name}={p1.sand_bag_factor}, {p2.name}={p2.sand_bag_factor}")


In [None]:
for name, mm in mm_teams.items():
    p1, p2 = mm.players
    p1.compute_sand_bag_factor()
    p2.compute_sand_bag_factor()
    if p1.sand_bag_factor is None or p2.sand_bag_factor is None:
        print(f"❌ {name}: {p1.name}={p1.sand_bag_factor}, {p2.name}={p2.sand_bag_factor}")

In [None]:
player_name = "Robby Tonkin"         # your Player object
player = players.get(player_name)
if not player:
    print(f"❌ Player '{player_name}' not found.")
else:        
    result = score_randomness_test(player)
    print(result)
    if result.get("p_mwu", 1) < 0.05:
        print(f"{sandbag_report(result,player_name)}")

In [None]:

print_top_sandbaggers(players, n=30, alpha=0.75)