# Cleaning both Men and Women

In [8]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import brier_score_loss
from xgboost import XGBClassifier
from skopt import BayesSearchCV
from skopt.space import Real, Integer
import json
from datetime import datetime
from datetime import datetime
import re

In [9]:
def get_bracket_data(CATEGORY):
    if CATEGORY == "mens":
        games = """
1 Auburn vs. 16 Alabama State/St. Francis PA
8 Louisville vs. 9 Creighton
5 Michigan vs. 12 UC San Diego
4 Texas A&M vs. 13 Yale
6 Ole Miss vs. 11 SDSU/North Carolina
3 Iowa State vs. 14 Lipscomb
7 Marquette vs. 10 New Mexico
2 Michigan State vs. 15 Bryant
1 Florida vs. 16 Norfolk State
8 UConn vs. 9 Oklahoma
5 Memphis vs. 12 Colorado State
4 Maryland vs. 13 Grand Canyon
6 Missouri vs. 11 Drake
3 Texas Tech vs. 14 UNC Wilmington
7 Kansas vs. 10 Arkansas
2 St. John’s vs. 15 Omaha
1 Duke vs. 16 American/Mount St. Mary’s
8 Mississippi State vs. 9 Baylor
5 Oregon vs. 12 Liberty
4 Arizona vs. 13 Akron
6 BYU vs. 11 VCU
3 Wisconsin vs. 14 Montana
7 St. Mary’s vs. 10 Vanderbilt
2 Alabama vs. 15 Robert Morris
1 Houston vs. 16 SIUE
8 Gonzaga vs. 9 Georgia
5 Clemson vs. 12 McNeese
4 Purdue vs. 13 High Point
6 Illinois vs. 11 Texas/Xavier
3 Kentucky vs. 14 Troy
7 UCLA vs. 10 Utah State
2 Tennessee vs. 15 Wofford
        """

        matchups = games.split("\n")[1:-1]
        m = [m.split(" vs. ") for m in matchups]
        brck = [re.sub(r'^\d+\s+', '', team) for pair in m for team in pair]



        bmap = {
            "Ole Miss":"Mississippi",
            "Iowa State":"Iowa St",
            "Michigan State":"Michigan St",
            "Norfolk State":"Norfolk St",
            "UConn":"Connecticut",
            "Colorado State":"Colorado St",
            "St. John’s":"St John's",
            "Omaha":"NE Omaha",
            "Mississippi State":"Mississippi St",
            "St. Mary’s":"St Mary's CA",
            "McNeese":"McNeese St",
            "Utah State":"Utah St",
            "SDSU":"South Dakota",
            "Mount St. Mary’s":"Mt St Mary's",
            "St. Francis PA":"St Francis PA",
            "Alabama State":"Alabama St",
            "American":"American Univ"}
    else:
        games = """
No. 1 UCLA vs. No. 16 UC San Diego/Southern
No. 8 Richmond vs. No. 9 Georgia Tech
No. 4 Baylor vs. No. 13 Grand Canyon
No. 5 Ole Miss vs. No. 12 Ball State
No. 3 LSU vs. No. 14 San Diego State
No. 6 Florida State vs. No. 11 George Mason
No. 2 NC State vs. No. 15 Vermont
No. 7 Michigan State vs. No. 10 Harvard
No. 1 USC vs. No. 16 UNC Greensboro
No. 8 California vs. No. 9 Mississippi State
No. 4 Kentucky vs. No. 13 Liberty
No. 5 Kansas State vs. No. 12 Fairfield
No. 3 Oklahoma vs. No. 14 Florida Gulf Coast
No. 6 Iowa vs. No. 11 Murray State
No. 2 UConn vs. No. 15 Arkansas State
No. 7 Oklahoma State vs. No. 10 South Dakota State
No. 1 South Carolina vs. No. 16 Tennessee Tech
No. 8 Utah vs. No. 9 Indiana
No. 4 Maryland vs. No. 13 Norfolk State
No. 5 Alabama vs. No. 12 Green Bay
No. 3 North Carolina vs. No. 14 Oregon State
No. 6 West Virginia vs. No. 11 Columbia/Washington
No. 2 Duke vs. No. 15 Lehigh
No. 7 Vanderbilt vs. No. 10 Oregon
No. 1 Texas vs. No. 16 High Point/William & Mary
No. 8 Illinois vs. No. 9 Creighton
No. 4 Ohio State vs. No. 13 Montana State
No. 5 Tennessee vs. No. 12 South Florida
No. 3 Notre Dame vs. No. 14 Stephen F. Austin
No. 6 Michigan vs. No. 11 Iowa State/Princeton
No. 2 TCU vs. No. 15 Fairleigh Dickinson
No. 7 Louisville vs. No. 10 Nebraska
        """

        matchups = games.split("\n")[1:-1]
        m = [m.split(" vs. ") for m in matchups]
        brck = [re.sub(r'^(No\.\s*)?\d+\s+', '', team).strip() for pair in m for team in pair]

        bmap = {
            "Ole Miss":"Mississippi",
            "Ball State":"Ball St",
            "San Diego State":"San Diego St",
            "Florida State":"Florida St",
            "Michigan State":"Michigan St",
            "Mississippi State":"Mississippi St",
            "Kansas State":"Kansas St",
            "Florida Gulf Coast":"FGCU",
            "Murray State":"Murray St",
            "UConn":"Connecticut",
            "Arkansas State":"Arkansas St",
            "Oklahoma State":"Oklahoma St",
            "South Dakota State":"South Dakota",
            "Norfolk State":"Norfolk St",
            "Green Bay":"WI Green Bay",
            "Oregon State":"Oregon St",
            "Ohio State":"Ohio St",
            "Montana State":"Montana St",
            "Stephen F. Austin":"SF Austin",
            "Fairleigh Dickinson":"F Dickinson",
            "Iowa State":"Iowa St",
            "Southern":"Southern Univ"
        }

    return bmap, brck

In [10]:
def data_cleaning(season, tournament, conferences):
    all_matches = pd.concat([season, tournament], axis=0).sort_values(["Season", "DayNum"]).reset_index(drop=True)
    all_matches.tail()
    all_matches["LLoc"] = all_matches.WLoc
    all_matches = all_matches.replace({"LLoc":{"H":"A", "A":"H", "N":"N"}})
    all_matches.head()
    cols = ["Season", "first_id", "second_id"]
    all_matches["first_id"] = all_matches[['WTeamID','LTeamID']].min(axis=1)
    all_matches["second_id"] = all_matches[['WTeamID','LTeamID']].max(axis=1)
    all_matches["prob"] = 0
    all_matches.loc[all_matches.first_id == all_matches.WTeamID, "prob"] = 1
    all_matches["game_id"] = all_matches[cols].apply(lambda row: '_'.join(row.values.astype(str)), axis=1)
    all_matches.head()
    adf = all_matches.drop(columns=["first_id", "second_id"])
    winning_cols = [c for c in adf.columns if c.startswith("W")]
    losing_cols = [c for c in adf.columns if c.startswith("L")]
    neutral_cols = [c for c in adf.columns if not(c.startswith("W") or c.startswith("L"))]

    #Figure out location for losing team

    df_w = adf[neutral_cols+winning_cols+["LScore"]].copy()
    df_l = adf[neutral_cols+losing_cols+["WScore"]].copy()

    df_w = df_w.rename(columns=lambda x:x[1:] if x.startswith("W") else x)
    df_l = df_l.rename(columns=lambda x:x[1:] if x.startswith("L") else x)
    df_l = df_l.rename(columns={"WScore":"points_allowed"})
    df_w = df_w.rename(columns={"LScore":"points_allowed"})

    df_w["result"] = 1
    df_l["result"] = 0

    df = pd.concat([df_w, df_l], ignore_index=True)
    df = df.sort_values(by=["Season", "TeamID", "DayNum"]).reset_index(drop=True)
    display(df.isna().sum())
    saved_df = df.copy()
    df = saved_df.copy()
    df = df.sort_values(by=["Season", "TeamID", "DayNum"]).reset_index(drop=True)
    stats = ["Score", "FGM", "FGA", "FGM3", "FGA3", "FTM", "FTA", "OR", "DR", "Ast", "TO", "Stl", "Blk", "PF", "points_allowed"]
    cum_stats_cols = [f"cum_{s}" for s in stats]

    for stat in stats:
        df[f"cum_{stat}"] = df.groupby(["Season", "TeamID"])[stat].cumsum().shift(fill_value=0)

    df["games_won"] = df.groupby(["Season", "TeamID"])["result"].cumsum().shift(fill_value=0)

    df["games_played"] = df.groupby(["Season", "TeamID"]).cumcount()

    df["prev_TeamID"] = df["TeamID"].shift(1)
    for stat in [*cum_stats_cols, "games_played", "games_won"]:
        df.loc[df["TeamID"] != df["prev_TeamID"], stat] = 0

    df["games_lost"] = df["games_played"] - df["games_won"]

    df["win_percentage"] = df["games_won"]/df["games_played"]

    df = df.drop(columns=["prev_TeamID"])

    df = pd.merge(df, conferences, how="left", left_on=["Season", "TeamID"], right_on=["Season", "TeamID"])
    print(df.columns)
    df.head()
    cum_stats = df.drop(columns=stats)

    averages = ["cum_Score", "cum_OR", "cum_DR", "cum_Ast", "cum_TO", "cum_Stl", "cum_Blk", "cum_PF", "cum_points_allowed"]
    percentages = [("cum_FGM", "cum_FGA", "FG%"), ("cum_FGM3", "cum_FGA3", "FG3%"), ("cum_FTM", "cum_FTA", "FT%")]

    print(cum_stats[cum_stats.games_played == 0].shape)

    cum_stats = cum_stats[cum_stats.games_played != 0]

    for col in averages:
        colname = "avg_" + col[4:]
        cum_stats[colname] = cum_stats[col] / cum_stats["games_played"]

    for make, attempt, new_col in percentages:
        cum_stats[new_col] = cum_stats[make] / cum_stats[attempt]

    averages = cum_stats.drop(columns=cum_stats_cols)

    unimportant_cols = ["prob", "NumOT", "result", "Loc"]

    averages_to_merge = averages.drop(columns=unimportant_cols)
    safe = ['Season', 'DayNum', 'first_id', 'second_id', 'prob', 'game_id', 'NumOT']
    fdf = all_matches[safe]
    fdf.head()

    df_merged = pd.merge(fdf, averages_to_merge, how="left", left_on=["Season", "DayNum", "game_id", "first_id"], right_on=["Season", "DayNum", "game_id", "TeamID"])
    df_merged = df_merged.drop(columns=["TeamID"])
    for col in df_merged.columns:
        if col not in safe:
            df_merged[f"first_{col}"] = df_merged[col]
            df_merged = df_merged.drop(columns=[col])

    df_merged = pd.merge(df_merged, averages_to_merge, how="left", left_on=["Season", "DayNum", "game_id", "second_id"], right_on=["Season", "DayNum", "game_id", "TeamID"])
    df_merged.drop(columns=["TeamID"])
    for col in df_merged.columns:
        if col in safe or col.startswith("first_"):
            continue
        df_merged[f"second_{col}"] = df_merged[col]
        df_merged = df_merged.drop(columns=[col])

    nfg_df = df_merged.dropna()
    return nfg_df, averages

In [11]:
def model(df, cat):
    not_needed = ["Season", "first_id", "second_TeamID", "second_id", "game_id", "NumOT", "DayNum"]

    df = df.drop(columns=not_needed)

    X = df.drop(columns=["prob"])
    y = df["prob"]

    # Identify categorical columns
    categorical_features = X.select_dtypes(include=['object', 'category']).columns

    # ColumnTransformer to apply OneHotEncoder only to categorical columns
    preprocessor = ColumnTransformer(
        transformers=[
            ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False), categorical_features)
        ],
        remainder="passthrough"  # Keep non-categorical columns as they are
    )

    param_grid = {
        'n_estimators': Integer(10, 1000),
        'max_depth': Integer(3, 25),
        'learning_rate': Real(0.01, 0.5, prior='log-uniform'),
        'subsample': Real(0.5, 1.0),
        'colsample_bytree': Real(0.5, 1.0)
    }

    classifier = XGBClassifier(device="cuda")

    grid_search = BayesSearchCV(classifier, param_grid, scoring="neg_brier_score", cv=5, verbose=3, n_iter=25)

    pipe = Pipeline([
        ("preprocessing", preprocessor),
        ("classifier", grid_search)  
    ])

    pipe.fit(X, y)

    print("Best set of hyperparameters: ", pipe.named_steps["classifier"].best_params_)
    print("Best score: ", -pipe.named_steps["classifier"].best_score_)

    with open(f"./model_params/{cat}_{datetime.isoformat(datetime.now())}.json", "w") as f:
        temp = pipe.named_steps["classifier"].best_params_
        temp["score"] = -pipe.named_steps["classifier"].best_score_
        json.dump(temp, f)

    return pipe

In [12]:
def get_data(CATEGORY):
    if CATEGORY == "mens":
        season = pd.read_csv("./data/MRegularSeasonDetailedResults.csv")
        tournament = pd.read_csv("./data/MNCAATourneyDetailedResults.csv")
        conferences = pd.read_csv("./data/MTeamConferences.csv")
    else:
        season = pd.read_csv("./data/WRegularSeasonDetailedResults.csv")
        tournament = pd.read_csv("./data/WNCAATourneyDetailedResults.csv")
        conferences = pd.read_csv("./data/WTeamConferences.csv")
    return season, tournament, conferences

In [13]:
def make_predictions(CATEGORY, averages, pipe):
    submission = pd.read_csv("./data/SampleSubmissionStage2.csv")
    teams = pd.DataFrame(submission["ID"].str.split("_").to_list())
    cols = ["Season", "first_team_id", "second_team_id"]
    teams.columns = cols
    teams = pd.concat([teams, submission], axis=1)
    for c in cols:
        teams[c] = teams[c].astype("int64")

    if CATEGORY == "mens":
        teams = teams[teams.first_team_id < 2000]
    else:
        teams = teams[teams.first_team_id > 2000]

    temp_avgs = averages.drop(columns=["NumOT", "prob", "Loc", "result"])
    data2025 = temp_avgs[temp_avgs.Season == 2025]
    last_info = data2025.loc[data2025.groupby(["TeamID"])["DayNum"].idxmax()]

    d = pd.merge(teams, last_info, how="left", left_on="first_team_id", right_on="TeamID", suffixes=("", "_first"))
    d=d.drop(columns=["Season_first", "DayNum", "TeamID", "game_id"])
    d.columns = [f"first_{col}" if col not in teams.columns else col for col in d.columns ]
    d.head()
    d = pd.merge(d, last_info, how="left", left_on="second_team_id", right_on="TeamID", suffixes=("", "_second"))
    d = d.drop(columns=["Season_second", "DayNum", "game_id", "TeamID"])
    d.columns = [f"second_{col}" if col in last_info.columns else col for col in d.columns ]
    d = d.drop(columns=["second_Season", "first_team_id", "second_team_id"])
    d.head()

    preds = pipe.predict_proba(d)[:,1]

    teams["Pred"] = preds

    submission = teams[["ID", "Pred"]]
    return submission, teams

In [14]:
def get_tm_id(tm_name, cpy_tms):
    cpy_tms = cpy_tms[["TeamName", "TeamID"]]
    return cpy_tms.loc[cpy_tms.TeamName == tm_name, "TeamID"].iloc[0]

def get_result(round, team_df, map, team_names):
    round = [
        [map.get(round[0], round[0]), 0], 
        [map.get(round[1], round[1]), 0]
    ]
    round[0][1] = get_tm_id(round[0][0], team_names)
    round[1][1] = get_tm_id(round[1][0], team_names)
    round.sort(key=lambda x:x[1])
    first = round[0][0]
    second = round[1][0]
    pred = team_df.loc[(team_df.first_team_name == first) & (team_df.second_team_name == second), "Pred"]
    # print(pred)
    pred = pred.iloc[0]
    print(first if pred > .5 else  second, "beat", second if pred > .5 else first, "with a prediction of", pred if pred > .5 else 1-pred)
    return first if pred > .5 else second


def team_prediction(bracket, df, map, team_names):
    # One round for play ins:
    next_round = []
    for tm in bracket:
        winner = tm
        if "/" in tm:
            winner = get_result(tm.split("/"), df, map, team_names)
        next_round.append(winner)
    bracket = next_round

    #Loop through since they are all 0 when %2
    while len(bracket) != 1:
        next_round = []
        while len(bracket) != 0:
            curr_round = []
            for i in range(2):
                curr_round.append(bracket.pop(0))
            winner = get_result(curr_round, df, map, team_names)
            next_round.append(winner)
        bracket = next_round

def print_bracket_results(teams, CATEGORY, brck, bmap):

    if CATEGORY == "mens":
        tms = pd.read_csv("./data/MTeams.csv")
    else:
        tms = pd.read_csv("./data/WTeams.csv")


    cpy_tms = tms.copy()
    tms = tms[["TeamID", "TeamName"]]

    teams = teams.merge(tms, how="left", left_on="first_team_id", right_on="TeamID")
    teams["first_team_name"] = teams.TeamName
    teams = teams.drop(columns=["TeamName", "TeamID"])

    teams = teams.merge(tms, how="left", left_on="second_team_id", right_on="TeamID")
    teams["second_team_name"] = teams.TeamName
    teams = teams.drop(columns=["TeamName", "TeamID"])

    print(f"{CATEGORY} BRACKET PREDICTIONS")
    team_prediction(brck, teams, bmap, cpy_tms)

Index(['Season', 'DayNum', 'NumOT', 'prob', 'game_id', 'TeamID', 'Score',
       'Loc', 'FGM', 'FGA', 'FGM3', 'FGA3', 'FTM', 'FTA', 'OR', 'DR', 'Ast',
       'TO', 'Stl', 'Blk', 'PF', 'points_allowed', 'result', 'cum_Score',
       'cum_FGM', 'cum_FGA', 'cum_FGM3', 'cum_FGA3', 'cum_FTM', 'cum_FTA',
       'cum_OR', 'cum_DR', 'cum_Ast', 'cum_TO', 'cum_Stl', 'cum_Blk', 'cum_PF',
       'cum_points_allowed', 'games_won', 'games_played', 'games_lost',
       'win_percentage', 'ConfAbbrev'],
      dtype='object')
(7981, 28)
Modeling...
Fitting 5 folds for each of 1 candidates, totalling 5 fits
/home/madeline/march_madness/.venv/lib/python3.10/site-packages/xgboost/core.py:158: UserWarning: [16:30:45] WARNING: /workspace/src/common/error_msg.cc:58: Falling back to prediction using DMatrix due to mismatched devices. This might lead to higher memory usage and slower performance. XGBoost is running on: cuda:0, while the input data is on: cpu.
Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.

This warning will only be shown once.

  warnings.warn(smsg, UserWarning)
[CV 1/5] END colsample_bytree=0.5240788463623524, learning_rate=0.17095758926751753, max_depth=4, n_estimators=154, subsample=0.8118731565165702;, score=-0.199 total time=   2.5s
[CV 2/5] END colsample_bytree=0.5240788463623524, learning_rate=0.17095758926751753, max_depth=4, n_estimators=154, subsample=0.8118731565165702;, score=-0.194 total time=   1.3s
[CV 3/5] END colsample_bytree=0.5240788463623524, learning_rate=0.17095758926751753, max_depth=4, n_estimators=154, subsample=0.8118731565165702;, score=-0.196 total time=   1.3s
[CV 4/5] END colsample_bytree=0.5240788463623524, learning_rate=0.17095758926751753, max_depth=4, n_estimators=154, subsample=0.8118731565165702;, score=-0.198 total time=   1.3s
[CV 5/5] END colsample_bytree=0.5240788463623524, learning_rate=0.17095758926751753, max_depth=4, n_estimators=154, subsample=0.8118731565165702;, score=-0.203 total time=   1.6s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.7567926355213533, learning_rate=0.24215877969143518, max_depth=13, n_estimators=371, subsample=0.9743764672194786;, score=-0.238 total time=  28.4s
[CV 2/5] END colsample_bytree=0.7567926355213533, learning_rate=0.24215877969143518, max_depth=13, n_estimators=371, subsample=0.9743764672194786;, score=-0.230 total time=  26.8s
[CV 3/5] END colsample_bytree=0.7567926355213533, learning_rate=0.24215877969143518, max_depth=13, n_estimators=371, subsample=0.9743764672194786;, score=-0.230 total time=  28.3s
[CV 4/5] END colsample_bytree=0.7567926355213533, learning_rate=0.24215877969143518, max_depth=13, n_estimators=371, subsample=0.9743764672194786;, score=-0.236 total time=  28.1s
[CV 5/5] END colsample_bytree=0.7567926355213533, learning_rate=0.24215877969143518, max_depth=13, n_estimators=371, subsample=0.9743764672194786;, score=-0.244 total time=  28.3s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.7452359399706149, learning_rate=0.04976762602121671, max_depth=16, n_estimators=867, subsample=0.527636971848315;, score=-0.221 total time= 1.9min
[CV 2/5] END colsample_bytree=0.7452359399706149, learning_rate=0.04976762602121671, max_depth=16, n_estimators=867, subsample=0.527636971848315;, score=-0.215 total time= 1.9min
[CV 3/5] END colsample_bytree=0.7452359399706149, learning_rate=0.04976762602121671, max_depth=16, n_estimators=867, subsample=0.527636971848315;, score=-0.215 total time= 1.9min
[CV 4/5] END colsample_bytree=0.7452359399706149, learning_rate=0.04976762602121671, max_depth=16, n_estimators=867, subsample=0.527636971848315;, score=-0.220 total time= 1.9min
[CV 5/5] END colsample_bytree=0.7452359399706149, learning_rate=0.04976762602121671, max_depth=16, n_estimators=867, subsample=0.527636971848315;, score=-0.225 total time= 1.8min
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.9083182797725724, learning_rate=0.14487253680485826, max_depth=5, n_estimators=214, subsample=0.7244039767418845;, score=-0.200 total time=   2.2s
[CV 2/5] END colsample_bytree=0.9083182797725724, learning_rate=0.14487253680485826, max_depth=5, n_estimators=214, subsample=0.7244039767418845;, score=-0.195 total time=   2.2s
[CV 3/5] END colsample_bytree=0.9083182797725724, learning_rate=0.14487253680485826, max_depth=5, n_estimators=214, subsample=0.7244039767418845;, score=-0.196 total time=   2.3s
[CV 4/5] END colsample_bytree=0.9083182797725724, learning_rate=0.14487253680485826, max_depth=5, n_estimators=214, subsample=0.7244039767418845;, score=-0.199 total time=   2.3s
[CV 5/5] END colsample_bytree=0.9083182797725724, learning_rate=0.14487253680485826, max_depth=5, n_estimators=214, subsample=0.7244039767418845;, score=-0.205 total time=   5.0s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.9102168048755042, learning_rate=0.3998797701650731, max_depth=7, n_estimators=573, subsample=0.5517081527352217;, score=-0.272 total time=   9.3s
[CV 2/5] END colsample_bytree=0.9102168048755042, learning_rate=0.3998797701650731, max_depth=7, n_estimators=573, subsample=0.5517081527352217;, score=-0.256 total time=  10.1s
[CV 3/5] END colsample_bytree=0.9102168048755042, learning_rate=0.3998797701650731, max_depth=7, n_estimators=573, subsample=0.5517081527352217;, score=-0.260 total time=  10.0s
[CV 4/5] END colsample_bytree=0.9102168048755042, learning_rate=0.3998797701650731, max_depth=7, n_estimators=573, subsample=0.5517081527352217;, score=-0.263 total time=  11.0s
[CV 5/5] END colsample_bytree=0.9102168048755042, learning_rate=0.3998797701650731, max_depth=7, n_estimators=573, subsample=0.5517081527352217;, score=-0.277 total time=   9.2s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.7507618473191144, learning_rate=0.3535917305934535, max_depth=20, n_estimators=885, subsample=0.624908831212835;, score=-0.269 total time=  57.1s
[CV 2/5] END colsample_bytree=0.7507618473191144, learning_rate=0.3535917305934535, max_depth=20, n_estimators=885, subsample=0.624908831212835;, score=-0.260 total time=  58.1s
[CV 3/5] END colsample_bytree=0.7507618473191144, learning_rate=0.3535917305934535, max_depth=20, n_estimators=885, subsample=0.624908831212835;, score=-0.260 total time=  55.1s
[CV 4/5] END colsample_bytree=0.7507618473191144, learning_rate=0.3535917305934535, max_depth=20, n_estimators=885, subsample=0.624908831212835;, score=-0.263 total time=  58.8s
[CV 5/5] END colsample_bytree=0.7507618473191144, learning_rate=0.3535917305934535, max_depth=20, n_estimators=885, subsample=0.624908831212835;, score=-0.273 total time=  56.5s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.5949486627374695, learning_rate=0.04616889380056079, max_depth=18, n_estimators=100, subsample=0.7551629433654146;, score=-0.204 total time=  39.2s
[CV 2/5] END colsample_bytree=0.5949486627374695, learning_rate=0.04616889380056079, max_depth=18, n_estimators=100, subsample=0.7551629433654146;, score=-0.201 total time=  36.9s
[CV 3/5] END colsample_bytree=0.5949486627374695, learning_rate=0.04616889380056079, max_depth=18, n_estimators=100, subsample=0.7551629433654146;, score=-0.201 total time=  40.3s
[CV 4/5] END colsample_bytree=0.5949486627374695, learning_rate=0.04616889380056079, max_depth=18, n_estimators=100, subsample=0.7551629433654146;, score=-0.204 total time=  37.8s
[CV 5/5] END colsample_bytree=0.5949486627374695, learning_rate=0.04616889380056079, max_depth=18, n_estimators=100, subsample=0.7551629433654146;, score=-0.209 total time=  37.7s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.9881152654495023, learning_rate=0.0981261550052628, max_depth=18, n_estimators=489, subsample=0.9311807768312271;, score=-0.228 total time= 1.3min
[CV 2/5] END colsample_bytree=0.9881152654495023, learning_rate=0.0981261550052628, max_depth=18, n_estimators=489, subsample=0.9311807768312271;, score=-0.221 total time= 1.3min
[CV 3/5] END colsample_bytree=0.9881152654495023, learning_rate=0.0981261550052628, max_depth=18, n_estimators=489, subsample=0.9311807768312271;, score=-0.222 total time= 1.3min
[CV 4/5] END colsample_bytree=0.9881152654495023, learning_rate=0.0981261550052628, max_depth=18, n_estimators=489, subsample=0.9311807768312271;, score=-0.226 total time= 1.2min
[CV 5/5] END colsample_bytree=0.9881152654495023, learning_rate=0.0981261550052628, max_depth=18, n_estimators=489, subsample=0.9311807768312271;, score=-0.232 total time= 1.3min
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.8946918904000343, learning_rate=0.013290206460693262, max_depth=4, n_estimators=806, subsample=0.7931730831533674;, score=-0.201 total time=   5.8s
[CV 2/5] END colsample_bytree=0.8946918904000343, learning_rate=0.013290206460693262, max_depth=4, n_estimators=806, subsample=0.7931730831533674;, score=-0.196 total time=   5.8s
[CV 3/5] END colsample_bytree=0.8946918904000343, learning_rate=0.013290206460693262, max_depth=4, n_estimators=806, subsample=0.7931730831533674;, score=-0.198 total time=   5.8s
[CV 4/5] END colsample_bytree=0.8946918904000343, learning_rate=0.013290206460693262, max_depth=4, n_estimators=806, subsample=0.7931730831533674;, score=-0.201 total time=   5.8s
[CV 5/5] END colsample_bytree=0.8946918904000343, learning_rate=0.013290206460693262, max_depth=4, n_estimators=806, subsample=0.7931730831533674;, score=-0.204 total time=   8.5s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.9846841437276848, learning_rate=0.010690594363866742, max_depth=20, n_estimators=35, subsample=0.6984926672869631;, score=-0.228 total time=  21.4s
[CV 2/5] END colsample_bytree=0.9846841437276848, learning_rate=0.010690594363866742, max_depth=20, n_estimators=35, subsample=0.6984926672869631;, score=-0.225 total time=  21.2s
[CV 3/5] END colsample_bytree=0.9846841437276848, learning_rate=0.010690594363866742, max_depth=20, n_estimators=35, subsample=0.6984926672869631;, score=-0.226 total time=  22.7s
[CV 4/5] END colsample_bytree=0.9846841437276848, learning_rate=0.010690594363866742, max_depth=20, n_estimators=35, subsample=0.6984926672869631;, score=-0.227 total time=  19.7s
[CV 5/5] END colsample_bytree=0.9846841437276848, learning_rate=0.010690594363866742, max_depth=20, n_estimators=35, subsample=0.6984926672869631;, score=-0.230 total time=  23.4s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.859875153926642, learning_rate=0.02089469229801169, max_depth=6, n_estimators=928, subsample=0.9701079282465561;, score=-0.198 total time=  12.4s
[CV 2/5] END colsample_bytree=0.859875153926642, learning_rate=0.02089469229801169, max_depth=6, n_estimators=928, subsample=0.9701079282465561;, score=-0.194 total time=  10.3s
[CV 3/5] END colsample_bytree=0.859875153926642, learning_rate=0.02089469229801169, max_depth=6, n_estimators=928, subsample=0.9701079282465561;, score=-0.195 total time=  10.4s
[CV 4/5] END colsample_bytree=0.859875153926642, learning_rate=0.02089469229801169, max_depth=6, n_estimators=928, subsample=0.9701079282465561;, score=-0.198 total time=  13.2s
[CV 5/5] END colsample_bytree=0.859875153926642, learning_rate=0.02089469229801169, max_depth=6, n_estimators=928, subsample=0.9701079282465561;, score=-0.203 total time=  10.3s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=1.0, learning_rate=0.10507287392524874, max_depth=3, n_estimators=10, subsample=0.5139204651682341;, score=-0.218 total time=   0.6s
[CV 2/5] END colsample_bytree=1.0, learning_rate=0.10507287392524874, max_depth=3, n_estimators=10, subsample=0.5139204651682341;, score=-0.215 total time=   0.6s
[CV 3/5] END colsample_bytree=1.0, learning_rate=0.10507287392524874, max_depth=3, n_estimators=10, subsample=0.5139204651682341;, score=-0.216 total time=   0.7s
[CV 4/5] END colsample_bytree=1.0, learning_rate=0.10507287392524874, max_depth=3, n_estimators=10, subsample=0.5139204651682341;, score=-0.218 total time=   0.7s
[CV 5/5] END colsample_bytree=1.0, learning_rate=0.10507287392524874, max_depth=3, n_estimators=10, subsample=0.5139204651682341;, score=-0.219 total time=   0.6s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.5, learning_rate=0.030885403452728673, max_depth=5, n_estimators=10, subsample=0.8378602349947626;, score=-0.233 total time=   0.7s
[CV 2/5] END colsample_bytree=0.5, learning_rate=0.030885403452728673, max_depth=5, n_estimators=10, subsample=0.8378602349947626;, score=-0.232 total time=   0.7s
[CV 3/5] END colsample_bytree=0.5, learning_rate=0.030885403452728673, max_depth=5, n_estimators=10, subsample=0.8378602349947626;, score=-0.233 total time=   0.6s
[CV 4/5] END colsample_bytree=0.5, learning_rate=0.030885403452728673, max_depth=5, n_estimators=10, subsample=0.8378602349947626;, score=-0.233 total time=   0.7s
[CV 5/5] END colsample_bytree=0.5, learning_rate=0.030885403452728673, max_depth=5, n_estimators=10, subsample=0.8378602349947626;, score=-0.234 total time=   0.7s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.8632295146086023, learning_rate=0.010109770550575053, max_depth=25, n_estimators=377, subsample=0.6173244673859046;, score=-0.203 total time= 3.2min
[CV 2/5] END colsample_bytree=0.8632295146086023, learning_rate=0.010109770550575053, max_depth=25, n_estimators=377, subsample=0.6173244673859046;, score=-0.198 total time= 3.3min
[CV 3/5] END colsample_bytree=0.8632295146086023, learning_rate=0.010109770550575053, max_depth=25, n_estimators=377, subsample=0.6173244673859046;, score=-0.199 total time= 3.2min
[CV 4/5] END colsample_bytree=0.8632295146086023, learning_rate=0.010109770550575053, max_depth=25, n_estimators=377, subsample=0.6173244673859046;, score=-0.203 total time= 3.2min
[CV 5/5] END colsample_bytree=0.8632295146086023, learning_rate=0.010109770550575053, max_depth=25, n_estimators=377, subsample=0.6173244673859046;, score=-0.206 total time= 3.2min
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.6423206560386095, learning_rate=0.01, max_depth=25, n_estimators=1000, subsample=0.5285320827765579;, score=-0.205 total time= 5.6min
[CV 2/5] END colsample_bytree=0.6423206560386095, learning_rate=0.01, max_depth=25, n_estimators=1000, subsample=0.5285320827765579;, score=-0.201 total time= 5.6min
[CV 3/5] END colsample_bytree=0.6423206560386095, learning_rate=0.01, max_depth=25, n_estimators=1000, subsample=0.5285320827765579;, score=-0.201 total time= 5.5min
[CV 4/5] END colsample_bytree=0.6423206560386095, learning_rate=0.01, max_depth=25, n_estimators=1000, subsample=0.5285320827765579;, score=-0.205 total time= 5.5min
[CV 5/5] END colsample_bytree=0.6423206560386095, learning_rate=0.01, max_depth=25, n_estimators=1000, subsample=0.5285320827765579;, score=-0.208 total time= 5.5min
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.5162590910800271, learning_rate=0.04340942902978675, max_depth=25, n_estimators=258, subsample=0.889186782957693;, score=-0.211 total time= 1.7min
[CV 2/5] END colsample_bytree=0.5162590910800271, learning_rate=0.04340942902978675, max_depth=25, n_estimators=258, subsample=0.889186782957693;, score=-0.206 total time= 1.7min
[CV 3/5] END colsample_bytree=0.5162590910800271, learning_rate=0.04340942902978675, max_depth=25, n_estimators=258, subsample=0.889186782957693;, score=-0.206 total time= 1.8min
[CV 4/5] END colsample_bytree=0.5162590910800271, learning_rate=0.04340942902978675, max_depth=25, n_estimators=258, subsample=0.889186782957693;, score=-0.211 total time= 1.7min
[CV 5/5] END colsample_bytree=0.5162590910800271, learning_rate=0.04340942902978675, max_depth=25, n_estimators=258, subsample=0.889186782957693;, score=-0.214 total time= 1.7min
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.6392475582944646, learning_rate=0.01704794997943932, max_depth=25, n_estimators=1000, subsample=0.8416232717882166;, score=-0.211 total time= 5.3min
[CV 2/5] END colsample_bytree=0.6392475582944646, learning_rate=0.01704794997943932, max_depth=25, n_estimators=1000, subsample=0.8416232717882166;, score=-0.208 total time= 5.3min
[CV 3/5] END colsample_bytree=0.6392475582944646, learning_rate=0.01704794997943932, max_depth=25, n_estimators=1000, subsample=0.8416232717882166;, score=-0.207 total time= 5.3min
[CV 4/5] END colsample_bytree=0.6392475582944646, learning_rate=0.01704794997943932, max_depth=25, n_estimators=1000, subsample=0.8416232717882166;, score=-0.212 total time= 5.4min
[CV 5/5] END colsample_bytree=0.6392475582944646, learning_rate=0.01704794997943932, max_depth=25, n_estimators=1000, subsample=0.8416232717882166;, score=-0.215 total time= 5.3min
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.9209312471042967, learning_rate=0.01, max_depth=3, n_estimators=460, subsample=0.5033336379918557;, score=-0.206 total time=   3.1s
[CV 2/5] END colsample_bytree=0.9209312471042967, learning_rate=0.01, max_depth=3, n_estimators=460, subsample=0.5033336379918557;, score=-0.201 total time=   3.0s
[CV 3/5] END colsample_bytree=0.9209312471042967, learning_rate=0.01, max_depth=3, n_estimators=460, subsample=0.5033336379918557;, score=-0.202 total time=   3.0s
[CV 4/5] END colsample_bytree=0.9209312471042967, learning_rate=0.01, max_depth=3, n_estimators=460, subsample=0.5033336379918557;, score=-0.205 total time=   3.0s
[CV 5/5] END colsample_bytree=0.9209312471042967, learning_rate=0.01, max_depth=3, n_estimators=460, subsample=0.5033336379918557;, score=-0.208 total time=   3.0s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.8930099635437758, learning_rate=0.026063156291301653, max_depth=11, n_estimators=1000, subsample=0.9757408049134444;, score=-0.204 total time=  58.1s
[CV 2/5] END colsample_bytree=0.8930099635437758, learning_rate=0.026063156291301653, max_depth=11, n_estimators=1000, subsample=0.9757408049134444;, score=-0.199 total time=  57.5s
[CV 3/5] END colsample_bytree=0.8930099635437758, learning_rate=0.026063156291301653, max_depth=11, n_estimators=1000, subsample=0.9757408049134444;, score=-0.199 total time=  57.1s
[CV 4/5] END colsample_bytree=0.8930099635437758, learning_rate=0.026063156291301653, max_depth=11, n_estimators=1000, subsample=0.9757408049134444;, score=-0.203 total time=  54.5s
[CV 5/5] END colsample_bytree=0.8930099635437758, learning_rate=0.026063156291301653, max_depth=11, n_estimators=1000, subsample=0.9757408049134444;, score=-0.208 total time=  57.4s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.936152084853387, learning_rate=0.01, max_depth=25, n_estimators=632, subsample=0.7155064092132014;, score=-0.205 total time= 4.9min
[CV 2/5] END colsample_bytree=0.936152084853387, learning_rate=0.01, max_depth=25, n_estimators=632, subsample=0.7155064092132014;, score=-0.200 total time= 4.8min
[CV 3/5] END colsample_bytree=0.936152084853387, learning_rate=0.01, max_depth=25, n_estimators=632, subsample=0.7155064092132014;, score=-0.201 total time= 4.8min
[CV 4/5] END colsample_bytree=0.936152084853387, learning_rate=0.01, max_depth=25, n_estimators=632, subsample=0.7155064092132014;, score=-0.204 total time= 4.9min
[CV 5/5] END colsample_bytree=0.936152084853387, learning_rate=0.01, max_depth=25, n_estimators=632, subsample=0.7155064092132014;, score=-0.207 total time= 4.9min
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=1.0, learning_rate=0.01, max_depth=3, n_estimators=1000, subsample=0.5;, score=-0.202 total time=   8.5s
[CV 2/5] END colsample_bytree=1.0, learning_rate=0.01, max_depth=3, n_estimators=1000, subsample=0.5;, score=-0.198 total time=   5.5s
[CV 3/5] END colsample_bytree=1.0, learning_rate=0.01, max_depth=3, n_estimators=1000, subsample=0.5;, score=-0.199 total time=   5.6s
[CV 4/5] END colsample_bytree=1.0, learning_rate=0.01, max_depth=3, n_estimators=1000, subsample=0.5;, score=-0.203 total time=   5.5s
[CV 5/5] END colsample_bytree=1.0, learning_rate=0.01, max_depth=3, n_estimators=1000, subsample=0.5;, score=-0.205 total time=   5.6s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.5465182941436568, learning_rate=0.15355533105669106, max_depth=23, n_estimators=1000, subsample=0.7572954723091451;, score=-0.244 total time= 1.7min
[CV 2/5] END colsample_bytree=0.5465182941436568, learning_rate=0.15355533105669106, max_depth=23, n_estimators=1000, subsample=0.7572954723091451;, score=-0.239 total time= 1.6min
[CV 3/5] END colsample_bytree=0.5465182941436568, learning_rate=0.15355533105669106, max_depth=23, n_estimators=1000, subsample=0.7572954723091451;, score=-0.238 total time= 1.6min
[CV 4/5] END colsample_bytree=0.5465182941436568, learning_rate=0.15355533105669106, max_depth=23, n_estimators=1000, subsample=0.7572954723091451;, score=-0.241 total time= 1.7min
[CV 5/5] END colsample_bytree=0.5465182941436568, learning_rate=0.15355533105669106, max_depth=23, n_estimators=1000, subsample=0.7572954723091451;, score=-0.245 total time= 1.6min
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.5228139987554592, learning_rate=0.24128478473922038, max_depth=25, n_estimators=10, subsample=0.6138611760691632;, score=-0.222 total time=   5.8s
[CV 2/5] END colsample_bytree=0.5228139987554592, learning_rate=0.24128478473922038, max_depth=25, n_estimators=10, subsample=0.6138611760691632;, score=-0.216 total time=   5.6s
[CV 3/5] END colsample_bytree=0.5228139987554592, learning_rate=0.24128478473922038, max_depth=25, n_estimators=10, subsample=0.6138611760691632;, score=-0.221 total time=   5.5s
[CV 4/5] END colsample_bytree=0.5228139987554592, learning_rate=0.24128478473922038, max_depth=25, n_estimators=10, subsample=0.6138611760691632;, score=-0.222 total time=   5.4s
[CV 5/5] END colsample_bytree=0.5228139987554592, learning_rate=0.24128478473922038, max_depth=25, n_estimators=10, subsample=0.6138611760691632;, score=-0.227 total time=   8.2s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.9600409814283288, learning_rate=0.01852852869335224, max_depth=13, n_estimators=541, subsample=0.7217628005105772;, score=-0.202 total time= 1.1min
[CV 2/5] END colsample_bytree=0.9600409814283288, learning_rate=0.01852852869335224, max_depth=13, n_estimators=541, subsample=0.7217628005105772;, score=-0.197 total time= 1.1min
[CV 3/5] END colsample_bytree=0.9600409814283288, learning_rate=0.01852852869335224, max_depth=13, n_estimators=541, subsample=0.7217628005105772;, score=-0.198 total time= 1.1min
[CV 4/5] END colsample_bytree=0.9600409814283288, learning_rate=0.01852852869335224, max_depth=13, n_estimators=541, subsample=0.7217628005105772;, score=-0.201 total time= 1.1min
[CV 5/5] END colsample_bytree=0.9600409814283288, learning_rate=0.01852852869335224, max_depth=13, n_estimators=541, subsample=0.7217628005105772;, score=-0.206 total time= 1.0min
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.9753955984918199, learning_rate=0.01, max_depth=3, n_estimators=725, subsample=1.0;, score=-0.204 total time=   4.5s
[CV 2/5] END colsample_bytree=0.9753955984918199, learning_rate=0.01, max_depth=3, n_estimators=725, subsample=1.0;, score=-0.200 total time=   4.3s
[CV 3/5] END colsample_bytree=0.9753955984918199, learning_rate=0.01, max_depth=3, n_estimators=725, subsample=1.0;, score=-0.201 total time=   4.3s
[CV 4/5] END colsample_bytree=0.9753955984918199, learning_rate=0.01, max_depth=3, n_estimators=725, subsample=1.0;, score=-0.204 total time=   4.3s
[CV 5/5] END colsample_bytree=0.9753955984918199, learning_rate=0.01, max_depth=3, n_estimators=725, subsample=1.0;, score=-0.207 total time=   4.3s
Best set of hyperparameters:  OrderedDict([('colsample_bytree', 0.859875153926642), ('learning_rate', 0.02089469229801169), ('max_depth', 6), ('n_estimators', 928), ('subsample', 0.9701079282465561)])
Best score:  0.1975681930931877
Making Predictions...
mens BRACKET PREDICTIONS
Alabama St beat St Francis PA with a prediction of 0.5152937
North Carolina beat South Dakota with a prediction of 0.81057984
Mt St Mary's beat American Univ with a prediction of 0.509496
Texas beat Xavier with a prediction of 0.5605127
Auburn beat Alabama St with a prediction of 0.9636424
Louisville beat Creighton with a prediction of 0.59849626
Michigan beat UC San Diego with a prediction of 0.52104735
Yale beat Texas A&M with a prediction of 0.55304223
Mississippi beat North Carolina with a prediction of 0.6390433
Iowa St beat Lipscomb with a prediction of 0.70225596
Marquette beat New Mexico with a prediction of 0.52168447
Michigan St beat Bryant with a prediction of 0.83779716
Florida beat Norfolk St with a prediction of 0.89871526
Connecticut beat Oklahoma with a prediction of 0.68535477
Memphis beat Colorado St with a prediction of 0.55927634
Maryland beat Grand Canyon with a prediction of 0.68470454
Missouri beat Drake with a prediction of 0.5275874
Texas Tech beat UNC Wilmington with a prediction of 0.68105143
Kansas beat Arkansas with a prediction of 0.5931146
St John's beat NE Omaha with a prediction of 0.8857349
Duke beat Mt St Mary's with a prediction of 0.9305454
Mississippi St beat Baylor with a prediction of 0.6084884
Oregon beat Liberty with a prediction of 0.66313314
Arizona beat Akron with a prediction of 0.5544213
VCU beat BYU with a prediction of 0.7003275
Wisconsin beat Montana with a prediction of 0.88052833
St Mary's CA beat Vanderbilt with a prediction of 0.63441986
Alabama beat Robert Morris with a prediction of 0.6469547
Houston beat SIUE with a prediction of 0.88632613
Gonzaga beat Georgia with a prediction of 0.69100076
Clemson beat McNeese St with a prediction of 0.681075
Purdue beat High Point with a prediction of 0.53558606
Illinois beat Texas with a prediction of 0.60994
Kentucky beat Troy with a prediction of 0.65769255
Utah St beat UCLA with a prediction of 0.54260963
Tennessee beat Wofford with a prediction of 0.8456709
Auburn beat Louisville with a prediction of 0.6660342
Michigan beat Yale with a prediction of 0.54593396
Iowa St beat Mississippi with a prediction of 0.7072686
Michigan St beat Marquette with a prediction of 0.6985786
Florida beat Connecticut with a prediction of 0.61776114
Maryland beat Memphis with a prediction of 0.7135941
Texas Tech beat Missouri with a prediction of 0.53481585
St John's beat Kansas with a prediction of 0.7506169
Duke beat Mississippi St with a prediction of 0.7595846
Arizona beat Oregon with a prediction of 0.5310278
Wisconsin beat VCU with a prediction of 0.513296
St Mary's CA beat Alabama with a prediction of 0.5441308
Gonzaga beat Houston with a prediction of 0.5239203
Clemson beat Purdue with a prediction of 0.6186115
Illinois beat Kentucky with a prediction of 0.67685
Tennessee beat Utah St with a prediction of 0.63342243
Auburn beat Michigan with a prediction of 0.7088824
Michigan St beat Iowa St with a prediction of 0.6270033
Florida beat Maryland with a prediction of 0.5379876
St John's beat Texas Tech with a prediction of 0.58152586
Duke beat Arizona with a prediction of 0.7112074
Wisconsin beat St Mary's CA with a prediction of 0.5602983
Gonzaga beat Clemson with a prediction of 0.59236413
Tennessee beat Illinois with a prediction of 0.6709318
Auburn beat Michigan St with a prediction of 0.51428944
St John's beat Florida with a prediction of 0.5682152
Duke beat Wisconsin with a prediction of 0.67446977
Gonzaga beat Tennessee with a prediction of 0.5302639
St John's beat Auburn with a prediction of 0.56962526
Duke beat Gonzaga with a prediction of 0.59986836
Duke beat St John's with a prediction of 0.6167959
Cleaning data...

In [15]:
# Running everything
results = pd.DataFrame()
for cat in ["mens", "womens"]:
    print("Cleaning data...")
    season, tournament, conferences = get_data(cat)
    df, averages = data_cleaning(season, tournament, conferences)
    print("Modeling...")
    pipe = model(df, cat)
    print("Making Predictions...")
    bmap, brck = get_bracket_data(cat)
    submission, teams = make_predictions(cat, averages, pipe)
    submission.to_csv(f"./individual_submissions/{cat}_{datetime.isoformat(datetime.now())}", index=False)
    print_bracket_results(teams, cat, brck, bmap)
    results = pd.concat([results, submission], axis=0)

Cleaning data...


Season            0
DayNum            0
NumOT             0
prob              0
game_id           0
TeamID            0
Score             0
Loc               0
FGM               0
FGA               0
FGM3              0
FGA3              0
FTM               0
FTA               0
OR                0
DR                0
Ast               0
TO                0
Stl               0
Blk               0
PF                0
points_allowed    0
result            0
dtype: int64

Index(['Season', 'DayNum', 'NumOT', 'prob', 'game_id', 'TeamID', 'Score',
       'Loc', 'FGM', 'FGA', 'FGM3', 'FGA3', 'FTM', 'FTA', 'OR', 'DR', 'Ast',
       'TO', 'Stl', 'Blk', 'PF', 'points_allowed', 'result', 'cum_Score',
       'cum_FGM', 'cum_FGA', 'cum_FGM3', 'cum_FGA3', 'cum_FTM', 'cum_FTA',
       'cum_OR', 'cum_DR', 'cum_Ast', 'cum_TO', 'cum_Stl', 'cum_Blk', 'cum_PF',
       'cum_points_allowed', 'games_won', 'games_played', 'games_lost',
       'win_percentage', 'ConfAbbrev'],
      dtype='object')
(7981, 28)
Modeling...
Fitting 5 folds for each of 1 candidates, totalling 5 fits


Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




[CV 1/5] END colsample_bytree=0.5240788463623524, learning_rate=0.17095758926751753, max_depth=4, n_estimators=154, subsample=0.8118731565165702;, score=-0.199 total time=   2.5s
[CV 2/5] END colsample_bytree=0.5240788463623524, learning_rate=0.17095758926751753, max_depth=4, n_estimators=154, subsample=0.8118731565165702;, score=-0.194 total time=   1.3s
[CV 3/5] END colsample_bytree=0.5240788463623524, learning_rate=0.17095758926751753, max_depth=4, n_estimators=154, subsample=0.8118731565165702;, score=-0.196 total time=   1.3s
[CV 4/5] END colsample_bytree=0.5240788463623524, learning_rate=0.17095758926751753, max_depth=4, n_estimators=154, subsample=0.8118731565165702;, score=-0.198 total time=   1.3s
[CV 5/5] END colsample_bytree=0.5240788463623524, learning_rate=0.17095758926751753, max_depth=4, n_estimators=154, subsample=0.8118731565165702;, score=-0.203 total time=   1.6s
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.75679263552135

Season            0
DayNum            0
NumOT             0
prob              0
game_id           0
TeamID            0
Score             0
Loc               0
FGM               0
FGA               0
FGM3              0
FGA3              0
FTM               0
FTA               0
OR                0
DR                0
Ast               0
TO                0
Stl               0
Blk               0
PF                0
points_allowed    0
result            0
dtype: int64

Index(['Season', 'DayNum', 'NumOT', 'prob', 'game_id', 'TeamID', 'Score',
       'Loc', 'FGM', 'FGA', 'FGM3', 'FGA3', 'FTM', 'FTA', 'OR', 'DR', 'Ast',
       'TO', 'Stl', 'Blk', 'PF', 'points_allowed', 'result', 'cum_Score',
       'cum_FGM', 'cum_FGA', 'cum_FGM3', 'cum_FGA3', 'cum_FTM', 'cum_FTA',
       'cum_OR', 'cum_DR', 'cum_Ast', 'cum_TO', 'cum_Stl', 'cum_Blk', 'cum_PF',
       'cum_points_allowed', 'games_won', 'games_played', 'games_lost',
       'win_percentage', 'ConfAbbrev'],
      dtype='object')
(5602, 28)
Modeling...
Fitting 5 folds for each of 1 candidates, totalling 5 fits
[CV 1/5] END colsample_bytree=0.8452473300942064, learning_rate=0.0627903460535773, max_depth=17, n_estimators=358, subsample=0.8995668303438928;, score=-0.203 total time=  50.5s
[CV 2/5] END colsample_bytree=0.8452473300942064, learning_rate=0.0627903460535773, max_depth=17, n_estimators=358, subsample=0.8995668303438928;, score=-0.197 total time=  53.6s
[CV 3/5] END colsample_bytree=0.84524733009420

In [18]:
#Creating submission
mens = pd.read_csv("individual_submissions/mens_2025-03-19T19:05:15.844130.csv")
womens = pd.read_csv("individual_submissions/womens_2025-03-19T20:00:51.139226.csv")
combined = pd.concat([mens, womens], axis=0).reset_index(drop=True)
combined.to_csv("./submissions/final_submission.csv", index=False)