# 🏀 March Mania 2025 - Starter Notebook


In [1]:

# 📌 Goal of the Competition:
# Predict the probability that the smaller TeamID wins a given matchup
# using seed rankings.

# ✅ Import Required Libraries
import pandas as pd
import numpy as np
import re

# 🏆 Helper Function: Parse Seed Value
def parse_seed(seed):
    """
    Extracts numerical seed value from the seed string.
    Example: "W01" -> 1, "M16a" -> 16
    """
    if isinstance(seed, str):
        digits = re.sub(r"\D", "", seed)  # Remove non-numeric characters
        return int(digits) if digits else 16  # Default seed = 16 if missing
    return 16

# ======================================================================
# 1️⃣ Load Seed Data & Add Gender Column
# ======================================================================

# 📥 Load Tournament Seed Data
seed_data = pd.read_csv('/kaggle/input/march-machine-learning-mania-2025/MNCAATourneySeeds.csv')

# 🏀 Extract Gender ('M' or 'W') from the Seed column
seed_data['Gender'] = seed_data['Seed'].str[0]  # First letter determines gender (M or W)
seed_data['Gender'] = seed_data['Gender'].replace({'W': 'W', 'M': 'M'}).fillna('M')  # Default to 'M'

# 🎯 Convert Seed Values to Numeric
seed_data['Seed'] = seed_data['Seed'].apply(parse_seed)  # Extract numerical seed values

# ✅ **At this stage:**
# - `Seed` column is now an integer (e.g., W01 → 1, M16a → 16)
# - `Gender` column is added (M/F for Men/Women teams)

# ======================================================================
# 2️⃣ Load Sample Submission for 2025 Matchups
# ======================================================================

# 📥 Load Sample Submission Data
submission = pd.read_csv('/kaggle/input/march-machine-learning-mania-2025/SampleSubmissionStage2.csv')

# 🏀 Extract matchup details from ID column (e.g., 2025_1101_1104)
sub_pairs = submission['ID'].str.split('_', expand=True)
sub_pairs.columns = ['Season', 'TeamID_1', 'TeamID_2']

# 🔄 Convert values to integer type for easier processing
sub_pairs['Season'] = sub_pairs['Season'].astype(int)
sub_pairs['TeamID_1'] = sub_pairs['TeamID_1'].astype(int)
sub_pairs['TeamID_2'] = sub_pairs['TeamID_2'].astype(int)

# 🏀 Add Gender column (assuming TeamID starting with '1' is Male, else Female)
sub_pairs['Gender'] = np.where(sub_pairs['TeamID_1'].astype(str).str.startswith('1'), 'M', 'W')

# ✅ **At this stage:**
# - We have separated `Season`, `TeamID_1`, `TeamID_2` from the ID column.
# - We have added a `Gender` column for filtering men’s and women’s matchups.

# ======================================================================
# 3️⃣ Add Seed Information & Predictions
# ======================================================================

# 🎯 **Create a Lookup Table for Team Seeds**
# Mapping Seed values based on (Season, Gender, TeamID)
seed_lookup = seed_data.set_index(['Season', 'Gender', 'TeamID'])['Seed']

# 🔎 **Fetch Seed Values for Each Team**
sub_pairs['Seed_1'] = sub_pairs.apply(
    lambda x: seed_lookup.get((x['Season'], x['Gender'], x['TeamID_1']), 16), axis=1
)
sub_pairs['Seed_2'] = sub_pairs.apply(
    lambda x: seed_lookup.get((x['Season'], x['Gender'], x['TeamID_2']), 16), axis=1
)

# 🔢 **Compute Seed Difference**
sub_pairs['Seed_Diff'] = sub_pairs['Seed_2'] - sub_pairs['Seed_1']

# ✅ **At this stage:**
# - `Seed_1` and `Seed_2` are added for each team.
# - `Seed_Diff` represents the difference between the two teams.

# ======================================================================
# 4️⃣ Compute Win Probability
# ======================================================================

# 🎯 **Use Seed Difference to Predict Win Probability**
# Formula: P(win) = 0.5 + (0.03 × Seed Difference)
sub_pairs['Pred'] = 0.5 + 0.03 * sub_pairs['Seed_Diff']

# 🎯 **Clip Values Between 0.05 and 0.95**
sub_pairs['Pred'] = sub_pairs['Pred'].clip(0.05, 0.95)

# ✅ **At this stage:**
# - Higher-seeded teams (lower seed number) get a **higher probability of winning**.
# - Probability is **capped between 5% and 95%**.

# ======================================================================
# 5️⃣ Final Submission
# ======================================================================

# 📜 Save Predictions to Submission File
submission['Pred'] = sub_pairs['Pred']
submission.to_csv('submission.csv', index=False)

# ✅ Completion Message
print("✅ Submission file created successfully! 🏀")


✅ Submission file created successfully! 🏀
