# Rookie of the Year Award

In [16]:
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from pathlib import Path

players_teams = pd.read_csv("../players_teams.csv")
awards = pd.read_csv("../awards_players.csv")
players_teams_clean = pd.read_csv("awards_data/players_teams_clean.csv")

### Get Rookies per Year

In [17]:
# Function to identify rookies by year
def get_rookies_by_year(players_teams):
    players_teams = players_teams.sort_values(by=['year'])
    rookies_by_year = {}
    min_year = players_teams['year'].min()
    max_year = players_teams['year'].max()
    previous_years_players = set()

    for year in range(min_year + 1, max_year + 1):
        current_years = players_teams[players_teams['year'] == year]
        current_years_players = set(current_years['playerID'])

        rookies = current_years_players - previous_years_players
        rookies_by_year[year] = rookies

        previous_years_players.update(current_years_players)

    return rookies_by_year

rookies_by_year = get_rookies_by_year(players_teams)

for year, rookies in rookies_by_year.items():
    print(f"Year: {year}")
    print(f"Rookies: {rookies}")

Year: 2
Rookies: {'schumke01w', 'mccrani01w', 'smithka01w', 'johnsja01w', 'darlihe01w', 'campbed01w', 'farriba01w', 'dickeke01w', 'mcgheca01w', 'vealkr01w', 'rizzoje01w', 'stepama01w', 'mcculda01w', 'santoke01w', 'johnssh01w', 'wautean01w', 'melvich01w', 'thompti01w', 'spornra01w', 'barnequ01w', 'saureau01w', 'cantydo01w', 'marcimi01w', 'pavlimi01w', 'rileyru01w', 'hillec01w', 'feastal01w', 'wynneda01w', 'korstil01w', 'dossaci01w', 'clearmi01w', 'whitmta01w', 'dixonta01w', 'luzhe01w', 'jacksla01w', 'jonesme01w', 'hicksje01w', 'edwarto01w', 'willina01w', 'stedika01w', 'nygaava01w', 'powelel01w', 'blodgci01w', 'pridely01w', 'martima01w', 'boltoru01w', 'randase01w', 'clinest01w', 'wolteka01w', 'griffyo01w', 'amachma01w', 'hendene01w', 'santoal01w', 'harrili01w', 'frettla01w', 'andrame01w', 'streiju01w', 'shakiel01w', 'timmsmi01w', 'badertr01w', 'enissh01w', 'burgean01w', 'walsema01w', 'johnspo01w', 'barnead01w', 'jacksta02w', 'goodsad01w', 'kingija01w', 'hallvi01w', 'smithch03w', 'stafftr

### Team and Player Performance around Rookie

In [18]:
# Function to calculate player yearly score
def get_player_year_score(players_teams_df, player_id, year):
    """
    Compute a player's Game Score metrics for a given year and return native Python types.

    Parameters:
        players_teams_df (pd.DataFrame): The players_teams dataset.
        player_id (str): The player ID.
        year (int): The season year.

    Returns:
        dict: {
            'playerID': str,
            'year': int,
            'tmID': str,
            'Game_Score_Total': float,
            'Game_Score_Per_Game': float,
            'Game_Score_Per_Minute': float
        }
    """
    df_player = players_teams_df[
        (players_teams_df["playerID"] == player_id) &
        (players_teams_df["year"] == year)
    ].copy()

    if df_player.empty:
        return {
            "playerID": player_id,
            "year": year,
            "tmID": None,
            "Game_Score_Total": None,
            "Game_Score_Per_Game": None,
            "Game_Score_Per_Minute": None
        }

    # Apply Game Score formula
    base_score = (
        df_player["points"]
        + 0.4 * df_player["fgMade"]
        - 0.7 * df_player["fgAttempted"]
        - 0.4 * (df_player["ftAttempted"] - df_player["ftMade"])
        + 0.7 * df_player["oRebounds"]
        + 0.3 * df_player["dRebounds"]
        + df_player["steals"]
        + 0.7 * df_player["assists"]
        + 0.7 * df_player["blocks"]
        - 0.4 * df_player["PF"]
        - df_player["turnovers"]
    )

    df_player["Game_Score_Total"] = base_score
    df_player["Game_Score_Per_Game"] = base_score / df_player["GP"].replace(0, np.nan)
    df_player["Game_Score_Per_Minute"] = base_score / df_player["minutes"].replace(0, np.nan)

    # If the player switched teams mid-season, aggregate
    result = {
        "playerID": player_id,
        "year": int(year),
        "tmID": ",".join(df_player["tmID"].unique()),
        "Game_Score_Total": float(df_player["Game_Score_Total"].sum(skipna=True)),
        "Game_Score_Per_Game": float(df_player["Game_Score_Per_Game"].mean(skipna=True)),
        "Game_Score_Per_Minute": float(df_player["Game_Score_Per_Minute"].mean(skipna=True)),
    }

    return result

# Function to get all players for a team in a given year
def get_players_for_team(players_teams_df, year, team_id):
    """
    Return all players who played for a specific team in a given year.

    Parameters:
        players_teams_df (pd.DataFrame): The full players_teams dataset.
        year (int): The season year.
        team_id (str): The team ID (e.g., "LAL", "BOS").

    Returns:
        pd.DataFrame: Filtered DataFrame of players for that team and year.
    """
    df_team = players_teams_df[
        (players_teams_df["year"] == year) &
        (players_teams_df["tmID"] == team_id)
    ].copy()
    return df_team

# Function to calculate team's yearly score
def get_team_weighted_avg_score(players_teams_df, year, team_id):
    """
    Calculate the weighted average Game Score per minute for a team in a given year.
    Uses player-level Game Scores weighted by each player's total Game Score.

    Parameters:
        players_teams_df (pd.DataFrame): The full players_teams dataset.
        year (int): The season year.
        team_id (str): The team ID.

    Returns:
        dict: {
            'tmID': str,
            'year': int,
            'Team_Weighted_Avg_Per_Minute': float,
            'Num_Players': int
        }
    """
    # Get all players for that team and year
    df_team = get_players_for_team(players_teams_df, year, team_id)
    if df_team.empty:
        return {
            "tmID": team_id,
            "year": int(year),
            "Team_Weighted_Avg_Per_Minute": None,
            "Num_Players": 0
        }

    # Compute game scores for each player
    player_scores = []
    for player_id in df_team["playerID"].unique():
        player_result = get_player_year_score(players_teams_df, player_id, year - 1)
        player_scores.append(player_result)

    df_scores = pd.DataFrame(player_scores)

    # Weighted average: weight = Game_Score_Total
    total_weight = df_scores["Game_Score_Total"].sum(skipna=True)
    if total_weight == 0 or np.isnan(total_weight):
        weighted_avg = None
    else:
        weighted_avg = (
            (df_scores["Game_Score_Per_Minute"] * df_scores["Game_Score_Total"]).sum(skipna=True)
            / total_weight
        )

    return {
        "tmID": team_id,
        "year": int(year),
        "Team_Weighted_Avg_Per_Minute": float(weighted_avg) if weighted_avg is not None else None,
        "Num_Players": int(len(df_scores))
    }

## Create Table for Rookie of the Year Award

In [19]:
# Each rookie in players_teams should be evaluated for their previous year's team performance
def evaluate_rookies_team_performance(players_teams_df, rookies_by_year):
    rookie_evaluations = []

    for year, rookies in rookies_by_year.items():
        for rookie_id in rookies:
            # Get the team(s) the rookie played for in their rookie year
            df_rookie = players_teams_df[
                (players_teams_df["playerID"] == rookie_id) &
                (players_teams_df["year"] == year)
            ]

            for team_id in df_rookie["tmID"].unique():
                team_performance = get_team_weighted_avg_score(players_teams_df, year, team_id)
                evaluation = {
                    "playerID": rookie_id,
                    "year": year,
                    "tmID": team_id,
                    "Team_Weighted_Avg_Per_Minute_Previous_Year": team_performance["Team_Weighted_Avg_Per_Minute"],
                    "Num_Players_Previous_Year": team_performance["Num_Players"]
                }
                rookie_evaluations.append(evaluation)

    return pd.DataFrame(rookie_evaluations)

rookie_evaluations_df = evaluate_rookies_team_performance(players_teams, rookies_by_year)
rookie_evaluations_df.to_csv("awards_data/rookie_evaluations.csv", index=False)


In [20]:
# Add teammate scores as a list to each rookie's record
def add_teammate_scores_to_rookies(players_teams_df, rookie_evaluations_df):
    """
    For each rookie, collect all their teammates' previous year performance scores
    and store them as a list in the rookie_evaluations_df.
    """
    updated_records = []

    for _, row in rookie_evaluations_df.iterrows():
        rookie_id = row['playerID']
        year = row['year']
        team_id = row['tmID']

        # Get all players for that team and year (excluding the rookie)
        df_team = get_players_for_team(players_teams_df, year, team_id)
        teammate_scores = []

        for player_id in df_team['playerID'].unique():
            if player_id != rookie_id:
                player_score = get_player_year_score(players_teams_df, player_id, year - 1)
                teammate_scores.append({
                    'playerID': player_id,
                    'Game_Score_Total': player_score["Game_Score_Total"],
                    'Game_Score_Per_Game': player_score["Game_Score_Per_Game"],
                    'Game_Score_Per_Minute': player_score["Game_Score_Per_Minute"]
                })

        # Create updated record with original data plus teammate scores list
        updated_record = row.to_dict()
        updated_record['teammate_scores'] = teammate_scores
        updated_records.append(updated_record)

    return pd.DataFrame(updated_records)

# Apply the function to add teammate scores as lists
rookie_evaluations_with_teammates = add_teammate_scores_to_rookies(players_teams, rookie_evaluations_df)

# Save to CSV (note: lists will be stored as strings in CSV)
rookie_evaluations_with_teammates.to_csv("awards_data/rookie_evaluations_with_teammates.csv", index=False)

print(f"Rookie evaluations with teammate scores shape: {rookie_evaluations_with_teammates.shape}")
print(f"\nSample record with teammate scores:")
print(f"Rookie: {rookie_evaluations_with_teammates.iloc[0]['playerID']}")
print(f"Number of teammates: {len(rookie_evaluations_with_teammates.iloc[0]['teammate_scores'])}")
print(f"Teammate scores sample: {rookie_evaluations_with_teammates.iloc[0]['teammate_scores'][:2]}")

Rookie evaluations with teammate scores shape: (513, 6)

Sample record with teammate scores:
Rookie: schumke01w
Number of teammates: 12
Teammate scores sample: [{'playerID': 'brazian01w', 'Game_Score_Total': 20.400000000000006, 'Game_Score_Per_Game': 0.9272727272727276, 'Game_Score_Per_Minute': 0.10049261083743845}, {'playerID': 'grubigo01w', 'Game_Score_Total': 131.20000000000002, 'Game_Score_Per_Game': 4.524137931034483, 'Game_Score_Per_Minute': 0.18222222222222226}]


In [24]:
# Convert teammate_scores list into ML-ready features
def extract_teammate_features(df_with_teammates):
    """
    Extract fixed-size features from the variable-length teammate_scores list.
    Creates aggregate statistics that can be used in ML models.
    """
    features = []
    
    for _, row in df_with_teammates.iterrows():
        teammate_scores = row['teammate_scores']
        
        if len(teammate_scores) == 0:
            # No teammates with valid scores
            features.append({
                'num_teammates': 0,
                'avg_teammate_total': None,
                'max_teammate_total': None,
                'min_teammate_total': None,
                'std_teammate_total': None,
                'avg_teammate_per_game': None,
                'max_teammate_per_game': None,
                'avg_teammate_per_minute': None,
                'max_teammate_per_minute': None,
                'top3_avg_total': None  # Average of top 3 teammates (star power)
            })
        else:
            # Extract scores into lists
            totals = [s['Game_Score_Total'] for s in teammate_scores if s['Game_Score_Total'] is not None]
            per_game = [s['Game_Score_Per_Game'] for s in teammate_scores if s['Game_Score_Per_Game'] is not None]
            per_minute = [s['Game_Score_Per_Minute'] for s in teammate_scores if s['Game_Score_Per_Minute'] is not None]
            
            # Calculate aggregate features
            features.append({
                'num_teammates': len(teammate_scores),
                'avg_teammate_total': np.mean(totals) if totals else None,
                'max_teammate_total': np.max(totals) if totals else None,
                'min_teammate_total': np.min(totals) if totals else None,
                'std_teammate_total': np.std(totals) if len(totals) > 1 else 0,
                'avg_teammate_per_game': np.mean(per_game) if per_game else None,
                'max_teammate_per_game': np.max(per_game) if per_game else None,
                'avg_teammate_per_minute': np.mean(per_minute) if per_minute else None,
                'max_teammate_per_minute': np.max(per_minute) if per_minute else None,
                'top3_avg_total': np.mean(sorted(totals, reverse=True)[:3]) if len(totals) >= 3 else (np.mean(totals) if totals else None)
            })
    
    return pd.DataFrame(features)

# Extract ML-ready features
teammate_features = extract_teammate_features(rookie_evaluations_with_teammates)

# Combine with original rookie data
rookie_evaluations_ml_ready = pd.concat([
    rookie_evaluations_df.reset_index(drop=True),
    teammate_features
], axis=1)

print(f"ML-ready features shape: {rookie_evaluations_ml_ready.shape}")
print(f"\nNew teammate features:")
print(teammate_features.columns.tolist())
print(f"\nSample of ML-ready data:")
print(rookie_evaluations_ml_ready.head())
print(f"\nFeature statistics:")
print(teammate_features.describe())

# Put this into a csv file
rookie_evaluations_ml_ready.to_csv("awards_data/rookie_evaluations_ml_ready.csv", index=False)

ML-ready features shape: (513, 15)

New teammate features:
['num_teammates', 'avg_teammate_total', 'max_teammate_total', 'min_teammate_total', 'std_teammate_total', 'avg_teammate_per_game', 'max_teammate_per_game', 'avg_teammate_per_minute', 'max_teammate_per_minute', 'top3_avg_total']

Sample of ML-ready data:
     playerID  year tmID  Team_Weighted_Avg_Per_Minute_Previous_Year  \
0  schumke01w     2  IND                                    0.235686   
1  mccrani01w     2  WAS                                    0.277743   
2  smithka01w     2  MIN                                    0.305028   
3  johnsja01w     2  ORL                                    0.269138   
4  darlihe01w     2  CLE                                    0.242529   

   Num_Players_Previous_Year  num_teammates  avg_teammate_total  \
0                         13             12          138.490000   
1                         14             13          155.114286   
2                         12             11          

## Prediction Models

### Rookie of the Year

#### 1. Logistic Regression

In [21]:
# Prepare data for Logistic Regression model to predict Rookie of the Year

# Step 1: Get rookies from players_teams_clean
rookies_by_year_clean = get_rookies_by_year(players_teams_clean)

# Step 2: Create a dataset with all rookies and their stats
all_rookies_data = []

for year, rookies in rookies_by_year_clean.items():
    for rookie in rookies:
        rookie_stats = players_teams_clean[(players_teams_clean['year'] == year) & 
                                           (players_teams_clean['playerID'] == rookie)]
        
        if not rookie_stats.empty:
            # Get the rookie's stats
            stats_dict = {
                'playerID': rookie,
                'year': year,
                'minutes': rookie_stats['minutes'].values[0],
                'games_played': rookie_stats['games_played'].values[0],
                'total_points': rookie_stats['total_points'].values[0],
                'total_rebounds': rookie_stats['total_rebounds'].values[0],
                'total_assists': rookie_stats['total_assists'].values[0],
                'points_per_min': rookie_stats['points_per_min'].values[0],
                'assists_per_min': rookie_stats['assists_per_min'].values[0],
                'rebounds_per_min': rookie_stats['rebounds_per_min'].values[0],
                'steals_per_min': rookie_stats['steals_per_min'].values[0],
                'blocks_per_min': rookie_stats['blocks_per_min'].values[0],
                'turnovers_per_min': rookie_stats['turnovers_per_min'].values[0],
                'FG%': rookie_stats['FG%'].values[0],
                'FT%': rookie_stats['FT%'].values[0],
                'Three%': rookie_stats['Three%'].values[0],
                'Three Rate': rookie_stats['Three Rate'].values[0],
            }
            all_rookies_data.append(stats_dict)

# Create DataFrame with all rookies
df_rookies = pd.DataFrame(all_rookies_data)

# Step 3: Add target variable (1 if won ROY, 0 otherwise)
roy_winners = awards[awards['award'] == 'Rookie of the Year'][['year', 'playerID']]

# Create ROY_winner column
df_rookies['ROY_winner'] = df_rookies.apply(
    lambda row: 1 if ((roy_winners['year'] == row['year']) & 
                      (roy_winners['playerID'] == row['playerID'])).any() else 0,
    axis=1
)

# Step 4: Filter out players with too few minutes (anomalies)
MIN_MINUTES_FOR_MODEL = 300  # Adjust as needed
df_rookies = df_rookies[df_rookies['minutes'] >= MIN_MINUTES_FOR_MODEL].copy()

print(f"Total rookies in dataset: {len(df_rookies)}")
print(f"ROY winners in dataset: {df_rookies['ROY_winner'].sum()}")
print(f"\nClass distribution:")
print(df_rookies['ROY_winner'].value_counts())
print(f"\nDataset shape: {df_rookies.shape}")
print(f"\nSample of data:")
print(df_rookies.head())

Total rookies in dataset: 256
ROY winners in dataset: 9

Class distribution:
ROY_winner
0    247
1      9
Name: count, dtype: int64

Dataset shape: (256, 18)

Sample of data:
     playerID  year  minutes  games_played  total_points  total_rebounds  \
0  schumke01w     2      380            28           112              70   
1  mccrani01w     2      828            32           351              56   
2  smithka01w     2     1234            32           739             122   
4  darlihe01w     2      778            32           196              76   
5  campbed01w     2      854            32           260              85   

   total_assists  points_per_min  assists_per_min  rebounds_per_min  \
0             10        0.294737         0.026316          0.184211   
1             47        0.423913         0.056763          0.067633   
2             70        0.598865         0.056726          0.098865   
4            109        0.251928         0.140103          0.097686   
5            

In [22]:
# Train Logistic Regression model for Rookie of the Year prediction

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# Select features for the model
# OPTION 1: Use combination of per-minute and totals (recommended)
feature_columns_combined = [
    'minutes',  # Playing time is important
    'total_points', 'total_rebounds', 'total_assists',  # Raw production
    'points_per_min', 'rebounds_per_min', 'assists_per_min',  # Efficiency
    'steals_per_min', 'blocks_per_min',  # Defense
    'FG%', 'Three%',  # Shooting efficiency
]

# OPTION 2: Use per-minute stats only (efficiency focus)
feature_columns_efficiency = [
    'points_per_min', 'rebounds_per_min', 'assists_per_min',
    'steals_per_min', 'blocks_per_min', 'turnovers_per_min',
    'FG%', 'FT%', 'Three%', 'Three Rate',
]

# Choose which features to use
#feature_columns = feature_columns_combined  # Change to feature_columns_efficiency to test
feature_columns = feature_columns_efficiency  # Change to feature_columns_efficiency to test

# Prepare X and y
X = df_rookies[feature_columns]
y = df_rookies['ROY_winner']

# Split into train/test by year (more realistic than random split)
train_years = [2, 3, 4, 5, 8, 9, 10]
test_years = [6, 7]

X_train = df_rookies[df_rookies['year'].isin(train_years)][feature_columns]
y_train = df_rookies[df_rookies['year'].isin(train_years)]['ROY_winner']
X_test = df_rookies[df_rookies['year'].isin(test_years)][feature_columns]
y_test = df_rookies[df_rookies['year'].isin(test_years)]['ROY_winner']

print(f"Training set: {len(X_train)} rookies, {y_train.sum()} winners")
print(f"Test set: {len(X_test)} rookies, {y_test.sum()} winners")

# Scale features (important for logistic regression)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model with class_weight='balanced' to handle imbalanced data
model = LogisticRegression(class_weight='balanced', max_iter=1000, random_state=42)
model.fit(X_train_scaled, y_train)

# Predict probabilities
y_train_pred_prob = model.predict_proba(X_train_scaled)[:, 1]
y_test_pred_prob = model.predict_proba(X_test_scaled)[:, 1]

# Predictions (using default threshold 0.5)
y_train_pred = model.predict(X_train_scaled)
y_test_pred = model.predict(X_test_scaled)

print("\n" + "=" * 80)
print("MODEL EVALUATION:")
print("=" * 80)

print("\nTRAINING SET PERFORMANCE:")
print(classification_report(y_train, y_train_pred, target_names=['Not ROY', 'ROY Winner']))

print("\nTEST SET PERFORMANCE:")
print(classification_report(y_test, y_test_pred, target_names=['Not ROY', 'ROY Winner']))

print("\nConfusion Matrix (Test Set):")
print(confusion_matrix(y_test, y_test_pred))

# Feature importance (coefficients)
print("\n" + "=" * 80)
print("FEATURE IMPORTANCE (Model Coefficients):")
print("=" * 80)
feature_importance = pd.DataFrame({
    'Feature': feature_columns,
    'Coefficient': model.coef_[0]
}).sort_values('Coefficient', ascending=False)

print(feature_importance.to_string(index=False))

Training set: 229 rookies, 7 winners
Test set: 27 rookies, 2 winners

MODEL EVALUATION:

TRAINING SET PERFORMANCE:
              precision    recall  f1-score   support

     Not ROY       1.00      0.86      0.92       222
  ROY Winner       0.18      1.00      0.30         7

    accuracy                           0.86       229
   macro avg       0.59      0.93      0.61       229
weighted avg       0.97      0.86      0.90       229


TEST SET PERFORMANCE:
              precision    recall  f1-score   support

     Not ROY       0.96      0.92      0.94        25
  ROY Winner       0.33      0.50      0.40         2

    accuracy                           0.89        27
   macro avg       0.65      0.71      0.67        27
weighted avg       0.91      0.89      0.90        27


Confusion Matrix (Test Set):
[[23  2]
 [ 1  1]]

FEATURE IMPORTANCE (Model Coefficients):
          Feature  Coefficient
   points_per_min     2.488463
 rebounds_per_min     1.434410
           Three%     0.

In [23]:
# Predict Rookie of the Year for each test year and compare with actual winners

print("=" * 100)
print("ROOKIE OF THE YEAR PREDICTIONS BY YEAR (Test Set)")
print("=" * 100)

# Get predictions for test years
test_data = df_rookies[df_rookies['year'].isin(test_years)].copy()
test_data_scaled = scaler.transform(test_data[feature_columns])
test_data['predicted_prob'] = model.predict_proba(test_data_scaled)[:, 1]

# For each test year, find the rookie with highest probability
for year in test_years:
    year_rookies = test_data[test_data['year'] == year].copy()
    year_rookies = year_rookies.sort_values('predicted_prob', ascending=False)
    
    # Get actual winner
    actual_winner = roy_winners[roy_winners['year'] == year]['playerID'].values
    actual_winner = actual_winner[0] if len(actual_winner) > 0 else None
    
    # Get predicted winner (highest probability)
    predicted_winner = year_rookies.iloc[0]['playerID']
    predicted_prob = year_rookies.iloc[0]['predicted_prob']
    
    # Check if correct
    is_correct = predicted_winner == actual_winner
    
    # Get actual winner's probability and rank
    actual_winner_row = year_rookies[year_rookies['playerID'] == actual_winner]
    if not actual_winner_row.empty:
        actual_prob = actual_winner_row['predicted_prob'].values[0]
        actual_rank = (year_rookies['predicted_prob'] > actual_prob).sum() + 1
    else:
        actual_prob = None
        actual_rank = None
    
    print(f"\nYear {year}:")
    print(f"  Predicted: {predicted_winner} (probability: {predicted_prob:.6f})")
    if not is_correct and actual_prob is not None:
        print(f"  Actual:    {actual_winner} (probability: {actual_prob:.6f}, rank: #{actual_rank})")
        print(f"  Difference: {abs(predicted_prob - actual_prob):.6f} (very close!)" if abs(predicted_prob - actual_prob) < 0.01 else "")
    else:
        print(f"  Actual:    {actual_winner}")
    print(f"  Result:    {'✅ CORRECT' if is_correct else '❌ INCORRECT'}")
    
    # Show top 5 rookies by predicted probability
    print(f"\n  Top 5 candidates:")
    for idx, (i, row) in enumerate(year_rookies.head(5).iterrows(), 1):
        is_actual = row['playerID'] == actual_winner
        marker = "⭐" if is_actual else "  "
        print(f"    {marker} {idx}. {row['playerID']:20s} - Prob: {row['predicted_prob']:.6f} "
              f"(Pts/min: {row['points_per_min']:.3f}, Min: {row['minutes']:.0f})")

ROOKIE OF THE YEAR PREDICTIONS BY YEAR (Test Set)

Year 6:
  Predicted: braxtka01w (probability: 0.902084)
  Actual:    johnste01w (probability: 0.114437, rank: #4)

  Result:    ❌ INCORRECT

  Top 5 candidates:
       1. braxtka01w           - Prob: 0.902084 (Pts/min: 0.499, Min: 455)
       2. lyttlsa01w           - Prob: 0.450048 (Pts/min: 0.304, Min: 460)
       3. batkosu01w           - Prob: 0.277327 (Pts/min: 0.432, Min: 461)
    ⭐ 4. johnste01w           - Prob: 0.114437 (Pts/min: 0.324, Min: 973)
       5. whiteta01w           - Prob: 0.017949 (Pts/min: 0.349, Min: 693)

Year 7:
  Predicted: augusse01w (probability: 0.971254)
  Actual:    augusse01w
  Result:    ✅ CORRECT

  Top 5 candidates:
    ⭐ 1. augusse01w           - Prob: 0.971254 (Pts/min: 0.662, Min: 1124)
       2. pondeca01w           - Prob: 0.953448 (Pts/min: 0.585, Min: 1067)
       3. currimo01w           - Prob: 0.483462 (Pts/min: 0.402, Min: 844)
       4. youngso01w           - Prob: 0.410293 (Pts/min: 0.386