# **March Madness Machine Learning 2025** #
##### By: CoNiya Butler & Daniel Davis #####

### **Introduction** ###

This notebook explores historical NCAA Division I men's and women's basketball data to predict the outcomes of March Madness tournament games. We'll leverage provided datasets containing team information, game results, and tournament seeds to build a predictive model. Data files are prefixed with 'M' for men's, 'W' for women's,and some span both.

##### Goal: #####


Minimize the Brier score, the evaluation metric for this competition.


##### Approach: #####

1. Start with a model with the features on the list provided below.
    - Seeding differences
    - Average points Per Game
    - Win percentage regular season
    - Win percentage tournament
    - Location
    - Average scoring difference per game
2. Apply feature engineering and model tuning techniques

### **Import Libraries** ###

In [2]:
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import log_loss, brier_score_loss, mean_squared_error, roc_curve, auc
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss, accuracy_score
import warnings
warnings.filterwarnings("ignore")

### **Load Data** ###

Load the datasets need for the model M is for men's basketball and W is for women's basketball

In [5]:
M_seed_df = pd.read_csv("/app/March-Madness-Prediction/Data/march-machine-learning-mania-2025/MNCAATourneySeeds.csv")
W_seed_df = pd.read_csv("/app/March-Madness-Prediction/Data/march-machine-learning-mania-2025/WNCAATourneySeeds.csv")
M_regular_results = pd.read_csv("/app/March-Madness-Prediction/Data/march-machine-learning-mania-2025/MRegularSeasonDetailedResults.csv")
M_tourney_results = pd.read_csv("/app/March-Madness-Prediction/Data/march-machine-learning-mania-2025/MNCAATourneyDetailedResults.csv")
W_regular_results = pd.read_csv("/app/March-Madness-Prediction/Data/march-machine-learning-mania-2025/WRegularSeasonDetailedResults.csv")
W_tourney_results = pd.read_csv("/app/March-Madness-Prediction/Data/march-machine-learning-mania-2025/WNCAATourneyDetailedResults.csv")
submissionstage2_df = pd.read_csv("/app/March-Madness-Prediction/Data/march-machine-learning-mania-2025/SampleSubmissionStage2.csv")
submissionstage1_df = pd.read_csv("/app/March-Madness-Prediction/Data/march-machine-learning-mania-2025/SampleSubmissionStage1.csv")

### **Data Preprocessing** ###

In [6]:
# Join the men's and women's dataset together
regular_results = pd.concat([M_regular_results, W_regular_results])
tourney_results = pd.concat([M_tourney_results, W_tourney_results])
seed_df = pd.concat([M_seed_df, W_seed_df])
submission_df = pd.concat([submissionstage1_df, submissionstage2_df])

In [7]:
#This function will caculate the teams' average win score, loss score, points and point differential, for each season.
def compute_stats(results):
    wins = results.groupby(["Season", "WTeamID"])["WScore"].agg(["count", "mean"]).rename(columns={"count": "Wins", "mean": "AvgWinScore"}).reset_index()
    wins = wins.rename(columns={'WTeamID': 'Team1'})
    losses = results.groupby(["Season", "LTeamID"])["LScore"].agg(["count", "mean"]).rename(columns={"count": "Losses", "mean": "AvgLossScore"}).reset_index()
    losses = losses.rename(columns={'LTeamID' : 'Team1'})

    stats = wins.merge(losses, on=["Season", "Team1"]).fillna(0)
    stats["TotalGames"] = stats["Wins"] + stats["Losses"]
    stats["WinRate"] = stats["Wins"] / stats["TotalGames"]
    
   
    stats["AvgScore"] = (stats["Wins"] * stats["AvgWinScore"] + stats["Losses"] * stats["AvgLossScore"]) / stats["TotalGames"]
    

    stats["AvgScoreDiff"] = stats["AvgWinScore"] - stats["AvgLossScore"]
    
    return stats

season_stats = compute_stats(regular_results)
tourney_stats = compute_stats(tourney_results)

In [8]:
season_stats

Unnamed: 0,Season,Team1,Wins,AvgWinScore,Losses,AvgLossScore,TotalGames,WinRate,AvgScore,AvgScoreDiff
0,2003,1102,12,68.750000,16,48.625000,28,0.428571,57.250000,20.125000
1,2003,1103,13,87.769231,14,70.428571,27,0.481481,78.777778,17.340659
2,2003,1104,17,74.705882,11,60.909091,28,0.607143,69.285714,13.796791
3,2003,1105,7,79.428571,19,68.947368,26,0.269231,71.769231,10.481203
4,2003,1106,13,68.307692,15,59.533333,28,0.464286,63.607143,8.774359
...,...,...,...,...,...,...,...,...,...,...
13527,2025,3476,8,72.250000,11,56.818182,19,0.421053,63.315789,15.431818
13528,2025,3477,2,77.500000,14,61.928571,16,0.125000,63.875000,15.571429
13529,2025,3478,2,73.000000,17,47.647059,19,0.105263,50.315789,25.352941
13530,2025,3479,4,69.250000,12,59.666667,16,0.250000,62.062500,9.583333


In [9]:
def feature_selection(results_df, df):
    df['Season'] = df['ID'].map(lambda x: x.split('_')[0]).astype(int)
    df['Team1'] = df['ID'].map(lambda x: x.split('_')[1]).astype(int)
    df['Team2'] = df['ID'].map(lambda x: x.split('_')[2]).astype(int)

    feature_df = df.merge(results_df[['Season', 'Team1' , 'WinRate', 'AvgScore', 'AvgScoreDiff']], on=['Season', 'Team1'])
    feature_df = feature_df.rename(columns={'WinRate': 'Team1WinRatio', 'AvgScore': 'Team1AvgScore', 'AvgScoreDiff': 'Team1AvgScoreDiff'})
    results_df = results_df.rename(columns={'Team1': 'Team2'})
    feature_df = feature_df.merge(results_df[['Season', 'Team2' , 'WinRate', 'AvgScore', 'AvgScoreDiff']], on=['Season', 'Team2'])
    feature_df = feature_df.rename(columns={'WinRate': 'Team2WinRatio', 'AvgScore': 'Team2AvgScore', 'AvgScoreDiff': 'Team2AvgScoreDiff'})
    feature_df['Team1WinPrediction'] = 1 / (1 + 10 ** ((feature_df['Team2WinRatio']*1000 - feature_df['Team1WinRatio']*1000) / 400))
    
    feature_df = feature_df.drop(['ID','Pred'], axis=1)
    return feature_df


In [79]:
season_training_df = feature_selection(season_stats, submission_df)
tourney_training_df = feature_selection(tourney_stats, submission_df)
tourney_training_df['ST'] = 1
season_training_df['ST'] = 0

In [80]:
training_df = pd.concat([season_training_df, tourney_training_df])
training_df

Unnamed: 0,Season,Team1,Team2,Team1WinRatio,Team1AvgScore,Team1AvgScoreDiff,Team2WinRatio,Team2AvgScore,Team2AvgScoreDiff,Team1WinPrediction,ST
0,2021,1101,1102,0.826087,76.304348,15.197368,0.200000,58.720000,8.350000,0.973508,0
1,2021,1101,1103,0.826087,76.304348,15.197368,0.619048,76.571429,8.596154,0.767065,0
2,2021,1101,1104,0.826087,76.304348,15.197368,0.800000,79.566667,19.666667,0.537472,0
3,2021,1101,1105,0.826087,76.304348,15.197368,0.400000,63.333333,21.666667,0.920763,0
4,2021,1101,1106,0.826087,76.304348,15.197368,0.222222,64.222222,5.821429,0.970001,0
...,...,...,...,...,...,...,...,...,...,...,...
4422,2024,3428,3439,0.500000,67.000000,2.000000,0.500000,82.000000,20.000000,0.500000,1
4423,2024,3428,3452,0.500000,67.000000,2.000000,0.500000,58.500000,9.000000,0.500000,1
4424,2024,3435,3439,0.500000,67.500000,9.000000,0.500000,82.000000,20.000000,0.500000,1
4425,2024,3435,3452,0.500000,67.500000,9.000000,0.500000,58.500000,9.000000,0.500000,1


In [81]:
training_df[(training_df['Season'] == 2021) & (training_df['ST'] == 1)]

Final_training_df = training_df.query('Season >= 2023')
Final_training_df

Unnamed: 0,Season,Team1,Team2,Team1WinRatio,Team1AvgScore,Team1AvgScoreDiff,Team2WinRatio,Team2AvgScore,Team2AvgScoreDiff,Team1WinPrediction,ST
240956,2023,1101,1102,0.346154,71.192308,11.601307,0.437500,66.937500,14.460317,0.371490,0
240957,2023,1101,1103,0.346154,71.192308,11.601307,0.645161,74.161290,13.495455,0.151713,0
240958,2023,1101,1104,0.346154,71.192308,11.601307,0.852941,82.176471,15.213793,0.051305,0
240959,2023,1101,1105,0.346154,71.192308,11.601307,0.400000,67.966667,4.638889,0.423124,0
240960,2023,1101,1106,0.346154,71.192308,11.601307,0.233333,62.433333,9.683230,0.656886,0
...,...,...,...,...,...,...,...,...,...,...,...
4422,2024,3428,3439,0.500000,67.000000,2.000000,0.500000,82.000000,20.000000,0.500000,1
4423,2024,3428,3452,0.500000,67.000000,2.000000,0.500000,58.500000,9.000000,0.500000,1
4424,2024,3435,3439,0.500000,67.500000,9.000000,0.500000,82.000000,20.000000,0.500000,1
4425,2024,3435,3452,0.500000,67.500000,9.000000,0.500000,58.500000,9.000000,0.500000,1


### **Exploratory Data Analysis (EDA)** ###

In [82]:
Final_training_df.columns

Index(['Season', 'Team1', 'Team2', 'Team1WinRatio', 'Team1AvgScore',
       'Team1AvgScoreDiff', 'Team2WinRatio', 'Team2AvgScore',
       'Team2AvgScoreDiff', 'Team1WinPrediction', 'ST'],
      dtype='object')

### **Model Training** ###

In [99]:
# Split features and target
X = training_df[['Season', 'Team1', 'Team2', 'Team1WinRatio', 'Team1AvgScore', 'Team1AvgScoreDiff', 'Team2WinRatio', 'Team2AvgScore', 'Team2AvgScoreDiff', 'ST']]
y = training_df["Team1WinPrediction"]

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the numerical features (excluding TeamID)
scaler = StandardScaler()
X_train.iloc[:, 3:] = scaler.fit_transform(X_train.iloc[:, 3:])
X_test.iloc[:, 3:] = scaler.transform(X_test.iloc[:, 3:])

# Convert to DMatrix for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Define model parameters
params = {
    "objective": "binary:logistic",  # Probability prediction
    "eval_metric": "logloss",
    "eta": 0.1,
    "max_depth": 6,
    "subsample": 0.8,
    "colsample_bytree": 0.8,
    "seed": 42
}

# Train model
num_rounds = 100
bst = xgb.train(params, dtrain, num_rounds)

# Predict probabilities
y_pred_proba = bst.predict(dtest)

print("Example Predictions:", y_pred_proba[:5])

Example Predictions: [0.5359901  0.69231    0.7469562  0.09507716 0.91091955]


In [100]:
print("Training features:", dtrain.feature_names)
print("Test features:", dtest.feature_names)

Training features: ['Season', 'Team1', 'Team2', 'Team1WinRatio', 'Team1AvgScore', 'Team1AvgScoreDiff', 'Team2WinRatio', 'Team2AvgScore', 'Team2AvgScoreDiff', 'ST']
Test features: ['Season', 'Team1', 'Team2', 'Team1WinRatio', 'Team1AvgScore', 'Team1AvgScoreDiff', 'Team2WinRatio', 'Team2AvgScore', 'Team2AvgScoreDiff', 'ST']


### **Prediction Submission** ###

In [85]:
submissionstage2_df['Season'] = submissionstage2_df['ID'].map(lambda x: x.split('_')[0]).astype(int)
submissionstage2_df['Team1'] = submissionstage2_df['ID'].map(lambda x: x.split('_')[1]).astype(int)
submissionstage2_df['Team2'] = submissionstage2_df['ID'].map(lambda x: x.split('_')[2]).astype(int)
submissionstage2_df

Unnamed: 0,ID,Pred,Season,Team1,Team2
0,2025_1101_1102,0.5,2025,1101,1102
1,2025_1101_1103,0.5,2025,1101,1103
2,2025_1101_1104,0.5,2025,1101,1104
3,2025_1101_1105,0.5,2025,1101,1105
4,2025_1101_1106,0.5,2025,1101,1106
...,...,...,...,...,...
131402,2025_3477_3479,0.5,2025,3477,3479
131403,2025_3477_3480,0.5,2025,3477,3480
131404,2025_3478_3479,0.5,2025,3478,3479
131405,2025_3478_3480,0.5,2025,3478,3480


In [86]:
season_training_df
total_stat_df = pd.concat([season_training_df, tourney_training_df])

In [87]:
# Prediction dataset
pred = total_stat_df[(total_stat_df['Season'] == 2025) | (total_stat_df['Season'] == 2024)].groupby(['Team1', "Team2"]).mean().reset_index()
pred['Season'] = 2025
pred

Unnamed: 0,Team1,Team2,Season,Team1WinRatio,Team1AvgScore,Team1AvgScoreDiff,Team2WinRatio,Team2AvgScore,Team2AvgScoreDiff,Team1WinPrediction,ST
0,1101,1102,2025,0.392473,68.039427,10.760154,0.216590,64.747312,12.133838,0.733181,0.0
1,1101,1103,2025,0.392473,68.039427,10.760154,0.704861,77.588542,12.965385,0.150461,0.0
2,1101,1104,2025,0.392473,68.039427,10.760154,0.753125,90.550000,16.503820,0.142010,0.0
3,1101,1105,2025,0.392473,68.039427,10.760154,0.284314,70.024955,11.413462,0.650697,0.0
4,1101,1106,2025,0.392473,68.039427,10.760154,0.356624,69.692377,10.366917,0.549345,0.0
...,...,...,...,...,...,...,...,...,...,...,...
131374,3477,3479,2025,0.125000,63.875000,15.571429,0.250000,62.062500,9.583333,0.327490,0.0
131375,3477,3480,2025,0.125000,63.875000,15.571429,0.352941,67.176471,4.878788,0.212130,0.0
131376,3478,3479,2025,0.105263,50.315789,25.352941,0.250000,62.062500,9.583333,0.302975,0.0
131377,3478,3480,2025,0.105263,50.315789,25.352941,0.352941,67.176471,4.878788,0.193762,0.0


In [88]:
final_pred_df = pd.merge(submissionstage2_df, pred, on=['Season', 'Team1', 'Team2'], how='left')

In [89]:
final_pred_df

Unnamed: 0,ID,Pred,Season,Team1,Team2,Team1WinRatio,Team1AvgScore,Team1AvgScoreDiff,Team2WinRatio,Team2AvgScore,Team2AvgScoreDiff,Team1WinPrediction,ST
0,2025_1101_1102,0.5,2025,1101,1102,0.392473,68.039427,10.760154,0.216590,64.747312,12.133838,0.733181,0.0
1,2025_1101_1103,0.5,2025,1101,1103,0.392473,68.039427,10.760154,0.704861,77.588542,12.965385,0.150461,0.0
2,2025_1101_1104,0.5,2025,1101,1104,0.392473,68.039427,10.760154,0.753125,90.550000,16.503820,0.142010,0.0
3,2025_1101_1105,0.5,2025,1101,1105,0.392473,68.039427,10.760154,0.284314,70.024955,11.413462,0.650697,0.0
4,2025_1101_1106,0.5,2025,1101,1106,0.392473,68.039427,10.760154,0.356624,69.692377,10.366917,0.549345,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
131402,2025_3477_3479,0.5,2025,3477,3479,0.125000,63.875000,15.571429,0.250000,62.062500,9.583333,0.327490,0.0
131403,2025_3477_3480,0.5,2025,3477,3480,0.125000,63.875000,15.571429,0.352941,67.176471,4.878788,0.212130,0.0
131404,2025_3478_3479,0.5,2025,3478,3479,0.105263,50.315789,25.352941,0.250000,62.062500,9.583333,0.302975,0.0
131405,2025_3478_3480,0.5,2025,3478,3480,0.105263,50.315789,25.352941,0.352941,67.176471,4.878788,0.193762,0.0


In [90]:
final_pred_df.fillna(0)

Unnamed: 0,ID,Pred,Season,Team1,Team2,Team1WinRatio,Team1AvgScore,Team1AvgScoreDiff,Team2WinRatio,Team2AvgScore,Team2AvgScoreDiff,Team1WinPrediction,ST
0,2025_1101_1102,0.5,2025,1101,1102,0.392473,68.039427,10.760154,0.216590,64.747312,12.133838,0.733181,0.0
1,2025_1101_1103,0.5,2025,1101,1103,0.392473,68.039427,10.760154,0.704861,77.588542,12.965385,0.150461,0.0
2,2025_1101_1104,0.5,2025,1101,1104,0.392473,68.039427,10.760154,0.753125,90.550000,16.503820,0.142010,0.0
3,2025_1101_1105,0.5,2025,1101,1105,0.392473,68.039427,10.760154,0.284314,70.024955,11.413462,0.650697,0.0
4,2025_1101_1106,0.5,2025,1101,1106,0.392473,68.039427,10.760154,0.356624,69.692377,10.366917,0.549345,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
131402,2025_3477_3479,0.5,2025,3477,3479,0.125000,63.875000,15.571429,0.250000,62.062500,9.583333,0.327490,0.0
131403,2025_3477_3480,0.5,2025,3477,3480,0.125000,63.875000,15.571429,0.352941,67.176471,4.878788,0.212130,0.0
131404,2025_3478_3479,0.5,2025,3478,3479,0.105263,50.315789,25.352941,0.250000,62.062500,9.583333,0.302975,0.0
131405,2025_3478_3480,0.5,2025,3478,3480,0.105263,50.315789,25.352941,0.352941,67.176471,4.878788,0.193762,0.0


In [91]:
final_pred_df['ST'] = 1
final_pred_df

Unnamed: 0,ID,Pred,Season,Team1,Team2,Team1WinRatio,Team1AvgScore,Team1AvgScoreDiff,Team2WinRatio,Team2AvgScore,Team2AvgScoreDiff,Team1WinPrediction,ST
0,2025_1101_1102,0.5,2025,1101,1102,0.392473,68.039427,10.760154,0.216590,64.747312,12.133838,0.733181,1
1,2025_1101_1103,0.5,2025,1101,1103,0.392473,68.039427,10.760154,0.704861,77.588542,12.965385,0.150461,1
2,2025_1101_1104,0.5,2025,1101,1104,0.392473,68.039427,10.760154,0.753125,90.550000,16.503820,0.142010,1
3,2025_1101_1105,0.5,2025,1101,1105,0.392473,68.039427,10.760154,0.284314,70.024955,11.413462,0.650697,1
4,2025_1101_1106,0.5,2025,1101,1106,0.392473,68.039427,10.760154,0.356624,69.692377,10.366917,0.549345,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
131402,2025_3477_3479,0.5,2025,3477,3479,0.125000,63.875000,15.571429,0.250000,62.062500,9.583333,0.327490,1
131403,2025_3477_3480,0.5,2025,3477,3480,0.125000,63.875000,15.571429,0.352941,67.176471,4.878788,0.212130,1
131404,2025_3478_3479,0.5,2025,3478,3479,0.105263,50.315789,25.352941,0.250000,62.062500,9.583333,0.302975,1
131405,2025_3478_3480,0.5,2025,3478,3480,0.105263,50.315789,25.352941,0.352941,67.176471,4.878788,0.193762,1


In [92]:
final_pred_df.drop(columns=['ID', 'Pred', 'Team1WinPrediction'])

Unnamed: 0,Season,Team1,Team2,Team1WinRatio,Team1AvgScore,Team1AvgScoreDiff,Team2WinRatio,Team2AvgScore,Team2AvgScoreDiff,ST
0,2025,1101,1102,0.392473,68.039427,10.760154,0.216590,64.747312,12.133838,1
1,2025,1101,1103,0.392473,68.039427,10.760154,0.704861,77.588542,12.965385,1
2,2025,1101,1104,0.392473,68.039427,10.760154,0.753125,90.550000,16.503820,1
3,2025,1101,1105,0.392473,68.039427,10.760154,0.284314,70.024955,11.413462,1
4,2025,1101,1106,0.392473,68.039427,10.760154,0.356624,69.692377,10.366917,1
...,...,...,...,...,...,...,...,...,...,...
131402,2025,3477,3479,0.125000,63.875000,15.571429,0.250000,62.062500,9.583333,1
131403,2025,3477,3480,0.125000,63.875000,15.571429,0.352941,67.176471,4.878788,1
131404,2025,3478,3479,0.105263,50.315789,25.352941,0.250000,62.062500,9.583333,1
131405,2025,3478,3480,0.105263,50.315789,25.352941,0.352941,67.176471,4.878788,1


In [101]:
# Split features and target
X = final_pred_df[['Season', 'Team1', 'Team2', 'Team1WinRatio', 'Team1AvgScore', 'Team1AvgScoreDiff', 'Team2WinRatio', 'Team2AvgScore', 'Team2AvgScoreDiff', 'ST']]
scaler_X = pd.DataFrame(scaler.fit_transform(X.iloc[:, 3:]), columns=X.columns[3:])

scaler_X.insert(0, "Season", X['Season'].values)
scaler_X.insert(1, "Team1", X['Team1'].values)
scaler_X.insert(2, "Team2", X['Team2'].values)


# Scale the numerical features (excluding TeamID)

dnew = xgb.DMatrix(scaler_X)

In [102]:
dnew.num_col()

10

In [103]:

# Predict probabilities
final_pred_df["Team1_Win_Probability"] = bst.predict(dnew)  # Convert to %

final_pred_df

Unnamed: 0,ID,Pred,Season,Team1,Team2,Team1WinRatio,Team1AvgScore,Team1AvgScoreDiff,Team2WinRatio,Team2AvgScore,Team2AvgScoreDiff,Team1WinPrediction,ST,Team1_Win_Probability
0,2025_1101_1102,0.5,2025,1101,1102,0.392473,68.039427,10.760154,0.216590,64.747312,12.133838,0.733181,1,0.745743
1,2025_1101_1103,0.5,2025,1101,1103,0.392473,68.039427,10.760154,0.704861,77.588542,12.965385,0.150461,1,0.115861
2,2025_1101_1104,0.5,2025,1101,1104,0.392473,68.039427,10.760154,0.753125,90.550000,16.503820,0.142010,1,0.083368
3,2025_1101_1105,0.5,2025,1101,1105,0.392473,68.039427,10.760154,0.284314,70.024955,11.413462,0.650697,1,0.654044
4,2025_1101_1106,0.5,2025,1101,1106,0.392473,68.039427,10.760154,0.356624,69.692377,10.366917,0.549345,1,0.535065
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131402,2025_3477_3479,0.5,2025,3477,3479,0.125000,63.875000,15.571429,0.250000,62.062500,9.583333,0.327490,1,0.335959
131403,2025_3477_3480,0.5,2025,3477,3480,0.125000,63.875000,15.571429,0.352941,67.176471,4.878788,0.212130,1,0.206337
131404,2025_3478_3479,0.5,2025,3478,3479,0.105263,50.315789,25.352941,0.250000,62.062500,9.583333,0.302975,1,0.284986
131405,2025_3478_3480,0.5,2025,3478,3480,0.105263,50.315789,25.352941,0.352941,67.176471,4.878788,0.193762,1,0.169852


In [115]:
submission_df = final_pred_df[['ID', 'Team1_Win_Probability']]
submission_df = submission_df.rename(columns={'Team1_Win_Probability': 'Pred'})
submission_df

Unnamed: 0,ID,Pred
0,2025_1101_1102,0.745743
1,2025_1101_1103,0.115861
2,2025_1101_1104,0.083368
3,2025_1101_1105,0.654044
4,2025_1101_1106,0.535065
...,...,...
131402,2025_3477_3479,0.335959
131403,2025_3477_3480,0.206337
131404,2025_3478_3479,0.284986
131405,2025_3478_3480,0.169852


In [116]:
submission_df.to_csv('FinalSubmissionMarchMadness.csv', index=False)