# 📓 01_player_sampling_and_stats.ipynb
**Purpose:** Load player data, inspect it, and prepare for performance modeling.

In [None]:
import pickle
import random
import matplotlib.pyplot as plt
import numpy as np
from golf_classes import Player, PlayerRoundInfo, Tournament, Round, Team

In [None]:
# --- Configuration ---
PICKLE_FILE = 'golf_data.pkl'  

# --- Load Player and Tournament Data ---
def load_data(filename=PICKLE_FILE):
    with open(filename, 'rb') as f:
        data = pickle.load(f)
    return data['players'], data['tournaments']

In [None]:
players, tournaments = load_data(PICKLE_FILE)

print(f"✅ Loaded {len(players)} players and {len(tournaments)} tournaments from {PICKLE_FILE}")



In [None]:
# --- Inspect Example Player ---
example_player = next(iter(players.values() if isinstance(players, dict) else players))

print(f"Example Player: {example_player.name}")
print(f"Total Rounds Recorded: {len(example_player.rounds)}")

# Print some details for the first few rounds
for i, round_info in enumerate(example_player.rounds[:5]):  # Show up to 5 rounds
    print(f" Round {i+1}:")
    print(f"  Tournament: {round_info.tournament_name}")
    print(f"  Round Number: {round_info.round_number}")
    print(f"  Handicap: {round_info.handicap}")
    print(f"  Tee: {round_info.tee}")
    print(f"  Net Score: {round_info.net}")
    print(f"  Total Gross Score: {round_info.total}")

In [None]:
# --- Statistics ---

# Make sure players is either a list or dictionary
player_list = players.values() if isinstance(players, dict) else players

# Number of players
num_players = len(player_list)

# Number of rounds per player
rounds_per_player = [len(player.rounds) for player in player_list]

# All net scores and valid handicaps across all players and rounds
all_net_scores = []
all_handicaps = []

for player in player_list:
    for round_info in player.rounds:
        all_net_scores.append(round_info.net)
        if round_info.handicap is not None and not np.isnan(round_info.handicap):
            all_handicaps.append(round_info.handicap)

# Print statistics
print(f"\nStatistics:")
print(f"Total Players: {num_players}")
print(f"Average Rounds per Player: {np.mean(rounds_per_player):.2f}")
print(f"Min Rounds per Player: {np.min(rounds_per_player)}")
print(f"Max Rounds per Player: {np.max(rounds_per_player)}")

if all_net_scores:
    print(f"\nNet Score Statistics:")
    print(f"  Average Net Score: {np.mean(all_net_scores):.2f}")
    print(f"  Net Score Range: {np.min(all_net_scores)} to {np.max(all_net_scores)}")

if all_handicaps:
    print(f"\nHandicap Statistics:")
    print(f"  Average Handicap: {np.mean(all_handicaps):.2f}")
    print(f"  Handicap Range: {np.min(all_handicaps)} to {np.max(all_handicaps)}")
else:
    print("\n⚠️ Warning: No valid handicaps found.")


In [None]:
# --- Plot Net Score Distribution ---
plt.hist(all_net_scores, bins=30, edgecolor='black')
plt.title('Distribution of Net Scores (All Players)')
plt.xlabel('Net Score')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

# --- Plot Handicap Distribution ---
plt.hist(all_handicaps, bins=range(int(min(all_handicaps)) - 1, int(max(all_handicaps)) + 2), edgecolor='black')
plt.title('Distribution of Handicaps (All Players)')
plt.xlabel('Handicap')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

In [None]:
# --- Optional: Plot rounds per player ---
plt.hist(rounds_per_player, bins=range(0, max(rounds_per_player)+5, 2), edgecolor='black')
plt.title('Distribution of Rounds per Player')
plt.xlabel('Number of Rounds')
plt.ylabel('Number of Players')
plt.grid(True)
plt.show()

In [None]:
# --- Ready for Sampling Model ---

def sample_score_simple(player):
    return random.choice(player.historical_scores)

def sample_score_weighted(player, decay_factor=0.9):
    scores = player.historical_scores
    n = len(scores)
    weights = [decay_factor ** (n - i - 1) for i in range(n)]
    return random.choices(scores, weights=weights, k=1)[0]

In [None]:
# --- Test Sampling ---
print("\n🧪 Sampling Test:")
print(f"Simple Sample for {example_player.name}: {sample_score_simple(example_player)}")
print(f"Weighted Sample for {example_player.name}: {sample_score_weighted(example_player)}")

In [None]:
# --- Save player names (optional) ---
player_names = [p.name for p in (players.values() if isinstance(players, dict) else players)]