# NCAA Bracket Challenge 2024

## Team Starford

Install required libraries

In [1]:
#pip install -r requirements.txt

## Import libraries

In [2]:
import random
import pandas as pd
import numpy as np

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate,  Dropout, BatchNormalization, LeakyReLU
from tensorflow.keras.losses import MeanSquaredError, MeanAbsoluteError
from tensorflow.keras.models import load_model
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping
from tensorflow.keras.regularizers import l2

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


Set seeds

In [3]:
seed = 21
random.seed(seed)
np.random.seed(seed)
tf.random.set_seed(seed)
tf.experimental.numpy.random.seed(seed)

## Load data

In [4]:
teamname_df = pd.read_csv('data/MTeams.csv')
teamseeds_df = pd.read_csv('data/MNCAATourneySeeds.csv')
gameresults_df = pd.read_csv('data/MRegularSeasonCompactResults.csv')
teamseeds_2024_df = pd.read_csv('data/2024_BracketSeeds.csv')
teams_2024_df = pd.read_csv('data/Teams_2024.csv')

## Data Preprocessing

Label Encoding

In [5]:
def label_encoding(df):
    """
    parameters:
    df: dataframe to be encoded

    returns:
    labels: encoded dataframe
    num: number of unique characters in the dataframe

    """
    label_encoder = LabelEncoder()
    unique_chars = list(set(df.tolist()))
    num =len(unique_chars)
    labels = label_encoder.fit(unique_chars)
    return labels, num

In [6]:
team_encode, num_teams = label_encoding(teamname_df['TeamID'])
teamname_df['TeamID_encoded'] = team_encode.transform(teamname_df['TeamID'])

In [7]:
teamname_df.head()

Unnamed: 0,TeamID,TeamName,FirstD1Season,LastD1Season,TeamID_encoded
0,1101,Abilene Chr,2014,2023,0
1,1102,Air Force,1985,2023,1
2,1103,Akron,1985,2023,2
3,1104,Alabama,1985,2023,3
4,1105,Alabama A&M,2000,2023,4


In [8]:
teams_2024_df['TeamID_encoded']= team_encode.transform(teams_2024_df['TeamID'])
teams_2024_df.head()

Unnamed: 0,TeamName,TeamID,TeamID_encoded
0,Akron,1103,2
1,Alabama,1104,3
2,Arizona,1112,11
3,Auburn,1120,19
4,Baylor,1124,23


Get game results of previous season matches to train the model

In [9]:
gameresults_df['WTeamID_encoded'] = team_encode.transform(gameresults_df['WTeamID'])
gameresults_df['LTeamID_encoded'] = team_encode.transform(gameresults_df['LTeamID'])

In [10]:
gameresults_df.head()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WTeamID_encoded,LTeamID_encoded
0,1985,20,1228,81,1328,64,N,0,127,227
1,1985,25,1106,77,1354,70,H,0,5,253
2,1985,25,1112,63,1223,56,H,0,11,122
3,1985,25,1165,70,1432,54,H,0,64,331
4,1985,25,1192,86,1447,74,H,0,91,346


In [11]:
gameresults_df.shape

(181682, 10)

Train the model using game results until 2023 and predict for 2024

Get match history from 1985 to 2023 of the teams playing in 2024

In [12]:
result_history_df_w = gameresults_df.merge(teams_2024_df[['TeamID_encoded']], left_on='WTeamID_encoded', right_on='TeamID_encoded', how='inner').drop(columns=['TeamID_encoded'])

# Merge for 'LTeamID_encoded'
result_history_df_l = gameresults_df.merge(teams_2024_df[['TeamID_encoded']], left_on='LTeamID_encoded', right_on='TeamID_encoded', how='inner').drop(columns=['TeamID_encoded'])

# Concatenate the two merged dataframes
result_history_df = pd.concat([result_history_df_w, result_history_df_l])

result_history_df.head()

Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WTeamID_encoded,LTeamID_encoded
0,1985,20,1228,81,1328,64,N,0,127,227
1,1985,25,1112,63,1223,56,H,0,11,122
2,1985,25,1228,64,1226,44,N,0,127,125
3,1985,25,1242,58,1268,56,N,0,141,167
4,1985,25,1305,97,1424,89,H,0,204,323


In [13]:
result_history_df.shape

(75108, 10)

In [14]:
def data_filter(row):

    """
    parameters:
    row: row of dataframe

    returns:
    new_row: new dataframe with filtered data
    """
    if np.random.uniform() < 0.5:
        new_row = {
            'team_a': row['WTeamID'],
            'team_a_encoded': row['WTeamID_encoded'],
            'team_a_score': row['WScore'],
            'team_b': row['LTeamID'],
            'team_b_encoded': row['LTeamID_encoded'],
            'team_b_score': row['LScore']

        }
    else:
        new_row = {
            'team_b': row['WTeamID'],
            'team_b_encoded': row['WTeamID_encoded'],
            'team_b_score': row['WScore'],
            'team_a': row['LTeamID'],
            'team_a_encoded': row['LTeamID_encoded'],
            'team_a_score': row['LScore']

        }
    return new_row

In [15]:
result_history_df = result_history_df.apply(data_filter, axis=1).tolist()
result_history_df = pd.DataFrame(result_history_df)
result_history_df.head()

Unnamed: 0,team_a,team_a_encoded,team_a_score,team_b,team_b_encoded,team_b_score
0,1228,127,81,1328,227,64
1,1112,11,63,1223,122,56
2,1226,125,44,1228,127,64
3,1242,141,58,1268,167,56
4,1305,204,97,1424,323,89


In [16]:
def calculate_score_diff(row):
    """
    parameters:
    row: row of dataframe

    returns:
    new_row: new dataframe with filtered data
    """
    
    return row['team_a_score'] - row['team_b_score']

In [17]:
result_history_df["score_diff"] = result_history_df.apply(calculate_score_diff, axis=1)
result_history_df.head()

Unnamed: 0,team_a,team_a_encoded,team_a_score,team_b,team_b_encoded,team_b_score,score_diff
0,1228,127,81,1328,227,64,17
1,1112,11,63,1223,122,56,7
2,1226,125,44,1228,127,64,-20
3,1242,141,58,1268,167,56,2
4,1305,204,97,1424,323,89,8


Split data into train and test

In [18]:
features = ['team_a_encoded', 'team_b_encoded']
target = 'score_diff'

X = result_history_df[features]
y = result_history_df[target]


In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(len(X_train), len(X_test), len(y_train), len(y_test))

60086 15022 60086 15022


## Build model

In [20]:

def get_model(num_teams, embedding_dim=32):
    """
    parameters:
    num_teams: number of teams in dataset
    embedding_dim: dimension of embedding layer

    returns:
    model: compiled tensorflow model
    
    """
    team_a = Input(shape=(1,), name='team_a')
    team_b = Input(shape=(1,), name='team_b')

    team_a_embedding = Embedding(num_teams, embedding_dim, name='team_a_embedding')(team_a)
    team_b_embedding = Embedding(num_teams, embedding_dim, name='team_b_embedding')(team_b)
    
    team_a_flat = Flatten()(team_a_embedding)
    team_b_flat = Flatten()(team_b_embedding)
    merged = Concatenate()([team_a_flat, team_b_flat])
    
    dense_1 = Dense(1024, kernel_regularizer=l2(0.001))(merged)  
    dense_1 = BatchNormalization()(dense_1)
    dense_1 = LeakyReLU()(dense_1)
    dense_1 = Dropout(0.6)(dense_1)  
    
    dense_2 = Dense(512, kernel_regularizer=l2(0.001))(dense_1)  
    dense_2 = BatchNormalization()(dense_2)
    dense_2 = LeakyReLU()(dense_2)
    dense_2 = Dropout(0.6)(dense_2)  

    dense_3 = Dense(256, kernel_regularizer=l2(0.001))(dense_2)  
    dense_3 = BatchNormalization()(dense_3)
    dense_3 = LeakyReLU()(dense_3)
    dense_3 = Dropout(0.6)(dense_3)  

    output_layer = Dense(1, activation='linear', name='output')(dense_3)

    model = Model(inputs=[team_a, team_b], outputs=output_layer)

    optimizer = Adam(learning_rate=0.0001)  
    loss_fn = MeanSquaredError()
    metrics = [MeanAbsoluteError()]
    
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)
    early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

    model.compile(optimizer=optimizer, loss=loss_fn, metrics=metrics)
    
    return model, reduce_lr, early_stop



Fit the model

In [21]:
Brackets_model, reduce_lr, early_stop = get_model(num_teams)   
Brackets_model.summary()

In [22]:
Brackets_model.fit(
    [X_train['team_a_encoded'], X_train['team_b_encoded']],
    y_train,
    epochs=100,
    batch_size=64,
    validation_split=0.2,
    callbacks=[reduce_lr, early_stop]
)

Epoch 1/100
[1m752/752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 9ms/step - loss: 254.7673 - mean_absolute_error: 12.5216 - val_loss: 213.9269 - val_mean_absolute_error: 11.4730 - learning_rate: 1.0000e-04
Epoch 2/100
[1m752/752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - loss: 205.1811 - mean_absolute_error: 11.3009 - val_loss: 182.8196 - val_mean_absolute_error: 10.6580 - learning_rate: 1.0000e-04
Epoch 3/100
[1m752/752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - loss: 189.1259 - mean_absolute_error: 10.8719 - val_loss: 179.4461 - val_mean_absolute_error: 10.5633 - learning_rate: 1.0000e-04
Epoch 4/100
[1m752/752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - loss: 185.4089 - mean_absolute_error: 10.7641 - val_loss: 178.4012 - val_mean_absolute_error: 10.5357 - learning_rate: 1.0000e-04
Epoch 5/100
[1m752/752[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - loss: 184.2686 - mean_absolute_erro

<keras.src.callbacks.history.History at 0x1e06a65b1d0>

Model Prediction

In [23]:
prediction = Brackets_model.predict([X_test['team_a_encoded'], X_test['team_b_encoded']])

[1m470/470[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


In [24]:
#Calculate the model accuracy

preds = prediction.reshape(-1).tolist()
total_preds = len(prediction)
correct_preds = 0

y_test_list = y_test.tolist() if isinstance(y_test, np.ndarray) else y_test

for y_pred, y_true in zip(preds, y_test_list):
    if y_pred > 0 and y_true > 0:
        correct_preds += 1
    elif y_pred < 0 and y_true < 0:
        correct_preds += 1

accuracy = correct_preds / total_preds * 100

print(f"Model Accuracy: {accuracy:.2f}%")

Model Accuracy: 65.42%


Save model

In [25]:
Brackets_model.save('Brackets_model.keras')

### 2024 Brackets results

Now we have trained the model, we can use the trained model to predict the results of the 2024 tournament brackets

In [18]:
# load the model

Brackets_model = load_model('Brackets_model.keras')

In [19]:
teamseeds_2024_df["TeamID_encoded"] = team_encode.transform(teamseeds_2024_df["TeamID"])


In [20]:
teamseeds_2024_df

Unnamed: 0,Seed,TeamName,TeamID,TeamID_encoded
0,W01,Uconn,1163,62
1,W02,Iowa State,1235,134
2,W03,Illinois,1228,127
3,W04,Auburn,1120,19
4,W05,San Diego St,1361,260
...,...,...,...,...
59,Z12,McNeese,1270,169
60,Z13,Samford,1359,258
61,Z14,Akron,1103,2
62,Z15,Saint Peter's,1389,288


In [21]:
# create the match pairs
def pair_teams(df,col):
    """
    parameters:
    df: dataframe to be encoded
    col: column to be used for encoding
    
    returns:
    match_df: encoded dataframe
    
    """
    num = len(df)
    match_pairs = [(df[col].iloc[i], df[col].iloc[num - 1 - i]) for i in range(num // 2)]
    match_df = pd.DataFrame(match_pairs, columns=['TeamA', 'TeamB'])
    return match_df

def team_pair_names(df):
    """
    parameters:
    df: dataframe to get team names
    
    returns:
    df: dataframe with team names
    
    """
    df['TeamA_name'] = df.merge(teamseeds_2024_df[['TeamID_encoded', 'TeamName']], left_on='TeamA', right_on='TeamID_encoded', how='left')['TeamName']
    df['TeamB_name'] = df.merge(teamseeds_2024_df[['TeamID_encoded', 'TeamName']], left_on='TeamB', right_on='TeamID_encoded', how='left')['TeamName']
    return df

In [22]:
# Function to determine the winning team based on predictions
def determine_winner(row):
    """
    parameters:
    row: row of dataframe
    
    returns:
    new_row: new dataframe with filtered data

    """
    if row['predictions'] > 0:
        return row['TeamA']
    else:
        return row['TeamB']

def preds(model,df,col1,col2):
    """
    parameters:
    model: model to be used
    df: dataframe to be predicted
    col1: column 1 to be used
    col2: column 2 to be used
    
    returns:
    df: predicted dataframe

    """
    prediction = model.predict([df[col1], df[col2]])
    prediction = prediction.reshape(-1).tolist()
    df['predictions'] = prediction

    df['Team_won'] = df.apply(lambda row: int(determine_winner(row)), axis=1)
    return df

def get_winning_teamname(df):
    """
    parameters:
    df: dataframe to get team names
    
    returns:
    df: dataframe with team names
    
    """
    df = df.merge(teamseeds_2024_df[['TeamID_encoded', 'TeamName']], left_on='Team_won', right_on='TeamID_encoded', how='left')
    df = df.drop(columns=['TeamID_encoded'])
    df = df.rename(columns={'TeamName': 'TeamName_won'})
    return df

In [23]:
W_team_seeds_df = teamseeds_2024_df[teamseeds_df['Seed'].str[0] == 'W']
X_team_seeds_df = teamseeds_2024_df[teamseeds_df['Seed'].str[0] == 'X']
Y_team_seeds_df = teamseeds_2024_df[teamseeds_df['Seed'].str[0] == 'Y']
Z_team_seeds_df = teamseeds_2024_df[teamseeds_df['Seed'].str[0] == 'Z']

  W_team_seeds_df = teamseeds_2024_df[teamseeds_df['Seed'].str[0] == 'W']
  X_team_seeds_df = teamseeds_2024_df[teamseeds_df['Seed'].str[0] == 'X']
  Y_team_seeds_df = teamseeds_2024_df[teamseeds_df['Seed'].str[0] == 'Y']
  Z_team_seeds_df = teamseeds_2024_df[teamseeds_df['Seed'].str[0] == 'Z']


### Round of 64

East region brackets

In [24]:
W01_pairs_df = pair_teams(W_team_seeds_df, 'TeamID_encoded')
W01_pairs_df = team_pair_names(W01_pairs_df)
W01_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,62,290,Uconn,Stetson
1,134,254,Iowa State,S Dakota St
2,127,186,Illinois,Morehead St
3,19,362,Auburn,Yale
4,260,311,San Diego St,UAB
5,39,81,BYU,Duquesne
6,349,78,Washington St,Drake
7,93,220,FAU,Northwestern


In [25]:
W_1st_round_df = preds(Brackets_model,W01_pairs_df,'TeamA','TeamB')
W_1st_round_df=get_winning_teamname(W_1st_round_df)
W_1st_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 312ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,62,290,Uconn,Stetson,20.562836,62,Uconn
1,134,254,Iowa State,S Dakota St,11.244709,134,Iowa State
2,127,186,Illinois,Morehead St,22.994284,127,Illinois
3,19,362,Auburn,Yale,10.990559,19,Auburn
4,260,311,San Diego St,UAB,-1.127488,311,UAB
5,39,81,BYU,Duquesne,8.951199,39,BYU
6,349,78,Washington St,Drake,5.258483,349,Washington St
7,93,220,FAU,Northwestern,-12.080555,220,Northwestern


West region brackets

In [26]:
X01_pairs_df = pair_teams(X_team_seeds_df,'TeamID_encoded')
X01_pairs_df = team_pair_names(X01_pairs_df)
X01_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,213,346,North Carolina,WAG
1,11,152,Arizona,Long Beach St
2,23,58,Baylor,Colgate
3,3,48,Alabama,Charleston
4,287,112,Saint Mary's,Grand Canyon
5,54,206,Clemson,New Mexico
6,72,204,Dayton,Nevada
7,179,176,Mississippi St,Michigan St


In [27]:
X_1st_round_df = preds(Brackets_model,X01_pairs_df,'TeamA','TeamB')
X_1st_round_df = get_winning_teamname(X_1st_round_df)
X_1st_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,213,346,North Carolina,WAG,30.163134,213,North Carolina
1,11,152,Arizona,Long Beach St,18.349806,11,Arizona
2,23,58,Baylor,Colgate,14.477116,23,Baylor
3,3,48,Alabama,Charleston,20.968664,3,Alabama
4,287,112,Saint Mary's,Grand Canyon,5.854482,287,Saint Mary's
5,54,206,Clemson,New Mexico,4.607848,54,Clemson
6,72,204,Dayton,Nevada,1.666485,72,Dayton
7,179,176,Mississippi St,Michigan St,-5.063118,176,Michigan St


South region brackets

In [28]:
Y01_pairs_df = pair_teams(Y_team_seeds_df, 'TeamID_encoded')
Y01_pairs_df = team_pair_names(Y01_pairs_df)
Y01_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,121,154,Houston,Longwood
1,165,342,Marquette,Western KY
2,145,223,Kentucky,Oakland
3,80,335,Duke,Vermont
4,357,140,Wisconsin,James Madison
5,302,200,Texas Tech,NC State
6,95,59,Florida,COL
7,203,300,Nebraska,Texas A&M


In [29]:
Y_1st_round_df = preds(Brackets_model,Y01_pairs_df,'TeamA','TeamB')
Y_1st_round_df = get_winning_teamname(Y_1st_round_df)
Y_1st_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,121,154,Houston,Longwood,22.04833,121,Houston
1,165,342,Marquette,Western KY,5.622268,165,Marquette
2,145,223,Kentucky,Oakland,18.993193,145,Kentucky
3,80,335,Duke,Vermont,28.79957,80,Duke
4,357,140,Wisconsin,James Madison,13.066276,357,Wisconsin
5,302,200,Texas Tech,NC State,-2.259185,200,NC State
6,95,59,Florida,COL,8.380654,95,Florida
7,203,300,Nebraska,Texas A&M,0.001762,203,Nebraska


Midwest region brackets

In [30]:
Z01_pairs_df = pair_teams(Z_team_seeds_df, 'TeamID_encoded')
Z01_pairs_df = team_pair_names(Z01_pairs_df)
Z01_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,244,111,Purdue,GRAM
1,296,288,Tennessee,Saint Peter's
2,65,2,Creighton,Akron
3,141,258,Kansas,Samford
4,110,169,Gonzaga,McNeese
5,275,231,South Carolina,Oregon
6,299,60,Texas,CSU
7,328,294,Utah State,TCU


In [31]:
Z_1st_round_df = preds(Brackets_model,Z01_pairs_df,'TeamA','TeamB')
Z_1st_round_df = get_winning_teamname(Z_1st_round_df)
Z_1st_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,244,111,Purdue,GRAM,37.093803,244,Purdue
1,296,288,Tennessee,Saint Peter's,12.814701,296,Tennessee
2,65,2,Creighton,Akron,3.088022,65,Creighton
3,141,258,Kansas,Samford,28.70973,141,Kansas
4,110,169,Gonzaga,McNeese,21.977791,110,Gonzaga
5,275,231,South Carolina,Oregon,-1.936162,231,Oregon
6,299,60,Texas,CSU,9.919846,299,Texas
7,328,294,Utah State,TCU,-0.216908,294,TCU


### Round of 32

East region brackets

In [32]:
W02_pairs_df = pair_teams(W_1st_round_df,'Team_won')
W02_pairs_df = team_pair_names(W02_pairs_df)
W02_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,62,220,Uconn,Northwestern
1,134,349,Iowa State,Washington St
2,127,39,Illinois,BYU
3,19,311,Auburn,UAB


In [33]:
W_2nd_round_df = preds(Brackets_model,W02_pairs_df,'TeamA','TeamB')
W_2nd_round_df = get_winning_teamname(W_2nd_round_df)
W_2nd_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 132ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,62,220,Uconn,Northwestern,9.537054,62,Uconn
1,134,349,Iowa State,Washington St,6.767318,134,Iowa State
2,127,39,Illinois,BYU,2.961244,127,Illinois
3,19,311,Auburn,UAB,0.450184,19,Auburn


West region brackets

In [34]:
X02_pairs_df = pair_teams(X_1st_round_df,'Team_won')
X02_pairs_df = team_pair_names(X02_pairs_df)
X02_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,213,176,North Carolina,Michigan St
1,11,72,Arizona,Dayton
2,23,54,Baylor,Clemson
3,3,287,Alabama,Saint Mary's


In [35]:
X_2nd_round_df = preds(Brackets_model,X02_pairs_df,'TeamA','TeamB')
X_2nd_round_df = get_winning_teamname(X_2nd_round_df)
X_2nd_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,213,176,North Carolina,Michigan St,5.852607,213,North Carolina
1,11,72,Arizona,Dayton,11.764378,11,Arizona
2,23,54,Baylor,Clemson,-1.86329,54,Clemson
3,3,287,Alabama,Saint Mary's,7.304039,3,Alabama


South region brackets

In [36]:
Y02_pairs_df = pair_teams(Y_1st_round_df,'Team_won')
Y02_pairs_df = team_pair_names(Y02_pairs_df)
Y02_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,121,203,Houston,Nebraska
1,165,95,Marquette,Florida
2,145,200,Kentucky,NC State
3,80,357,Duke,Wisconsin


In [37]:
Y_2nd_round_df = preds(Brackets_model,Y02_pairs_df,'TeamA','TeamB')
Y_2nd_round_df = get_winning_teamname(Y_2nd_round_df)
Y_2nd_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,121,203,Houston,Nebraska,0.771986,121,Houston
1,165,95,Marquette,Florida,-2.228626,95,Florida
2,145,200,Kentucky,NC State,6.002376,145,Kentucky
3,80,357,Duke,Wisconsin,13.841864,80,Duke


Midwest region brackets

In [38]:
Z02_pairs_df = pair_teams(Z_1st_round_df,'Team_won')
Z02_pairs_df = team_pair_names(Z02_pairs_df)
Z02_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,244,294,Purdue,TCU
1,296,299,Tennessee,Texas
2,65,231,Creighton,Oregon
3,141,110,Kansas,Gonzaga


In [39]:
Z_2nd_round_df = preds(Brackets_model,Z02_pairs_df,'TeamA','TeamB')
Z_2nd_round_df = get_winning_teamname(Z_2nd_round_df)
Z_2nd_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,244,294,Purdue,TCU,10.229565,244,Purdue
1,296,299,Tennessee,Texas,-1.049932,299,Texas
2,65,231,Creighton,Oregon,-0.7894,231,Oregon
3,141,110,Kansas,Gonzaga,4.432876,141,Kansas


### Sweet 16

East region brackets

In [40]:
W_03_pairs_df = pair_teams(W_2nd_round_df,'Team_won')
W_03_pairs_df = team_pair_names(W_03_pairs_df)
W_03_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,62,19,Uconn,Auburn
1,134,127,Iowa State,Illinois


In [41]:
W_3rd_round_df = preds(Brackets_model,W_03_pairs_df,'TeamA','TeamB')
W_3rd_round_df = get_winning_teamname(W_3rd_round_df)
W_3rd_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,62,19,Uconn,Auburn,6.91515,62,Uconn
1,134,127,Iowa State,Illinois,-4.760874,127,Illinois


West region brackets

In [42]:
X03_pairs_df = pair_teams(X_2nd_round_df,'Team_won')
X03_pairs_df = team_pair_names(X03_pairs_df)
X03_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,213,3,North Carolina,Alabama
1,11,54,Arizona,Clemson


In [43]:
X_3rd_round_df = preds(Brackets_model,X03_pairs_df,'TeamA','TeamB')
X_3rd_round_df = get_winning_teamname(X_3rd_round_df)
X_3rd_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,213,3,North Carolina,Alabama,6.100444,213,North Carolina
1,11,54,Arizona,Clemson,6.946853,11,Arizona


South region brackets

In [44]:
Y03_pairs_df = pair_teams(Y_2nd_round_df,'Team_won')
Y03_pairs_df = team_pair_names(Y03_pairs_df)
Y03_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,121,80,Houston,Duke
1,95,145,Florida,Kentucky


In [45]:
Y_3rd_round_df = preds(Brackets_model,Y03_pairs_df,'TeamA','TeamB')
Y_3rd_round_df = get_winning_teamname(Y_3rd_round_df)
Y_3rd_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,121,80,Houston,Duke,-11.859605,80,Duke
1,95,145,Florida,Kentucky,-4.515151,145,Kentucky


Midwest region brackets

In [46]:
Z03_pairs_df = pair_teams(Z_2nd_round_df,'Team_won')
Z03_pairs_df = team_pair_names(Z03_pairs_df)
Z03_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,244,141,Purdue,Kansas
1,299,231,Texas,Oregon


In [47]:
Z_3rd_round_df = preds(Brackets_model,Z03_pairs_df,'TeamA','TeamB')
Z_3rd_round_df = get_winning_teamname(Z_3rd_round_df)
Z_3rd_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,244,141,Purdue,Kansas,-1.328283,141,Kansas
1,299,231,Texas,Oregon,2.547154,299,Texas


### Round of 8

East region brackets

In [48]:
W_04_pairs_df = pair_teams(W_3rd_round_df,'Team_won')
W_04_pairs_df = team_pair_names(W_04_pairs_df)
W_04_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,62,127,Uconn,Illinois


In [49]:
W_4th_round_df = preds(Brackets_model,W_04_pairs_df,'TeamA','TeamB')
W_4th_round_df = get_winning_teamname(W_4th_round_df)
W_4th_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,62,127,Uconn,Illinois,-0.613336,127,Illinois


West region brackets

In [50]:
X04_pairs_df = pair_teams(X_3rd_round_df,'Team_won')
X04_pairs_df = team_pair_names(X04_pairs_df)
X04_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,213,11,North Carolina,Arizona


In [51]:
X_4th_round_df = preds(Brackets_model,X04_pairs_df,'TeamA','TeamB')
X_4th_round_df = get_winning_teamname(X_4th_round_df)
X_4th_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,213,11,North Carolina,Arizona,2.203191,213,North Carolina


South region brackets

In [52]:
Y04_pairs_df = pair_teams(Y_3rd_round_df,'Team_won')
Y04_pairs_df = team_pair_names(Y04_pairs_df)
Y04_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,80,145,Duke,Kentucky


In [53]:
Y_4th_round_df = preds(Brackets_model,Y04_pairs_df,'TeamA','TeamB')
Y_4th_round_df = get_winning_teamname(Y_4th_round_df)
Y_4th_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,80,145,Duke,Kentucky,3.654013,80,Duke


Midwest region brackets

In [54]:
Z04_pairs_df = pair_teams(Z_3rd_round_df,'Team_won')
Z04_pairs_df = team_pair_names(Z04_pairs_df)
Z04_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,141,299,Kansas,Texas


In [55]:
Z_4th_round_df = preds(Brackets_model,Z04_pairs_df,'TeamA','TeamB')
Z_4th_round_df = get_winning_teamname(Z_4th_round_df)
Z_4th_round_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,141,299,Kansas,Texas,6.527709,141,Kansas


Concatting teams selected for the playoffs from each region

In [56]:
columns = ['Teams_selected']
Playoffs_selected_teams_df = pd.DataFrame(columns=columns)

In [57]:
Playoffs_selected_teams_df.loc[len(Playoffs_selected_teams_df)] = W_4th_round_df['Team_won'].values
Playoffs_selected_teams_df.loc[len(Playoffs_selected_teams_df)] = Z_4th_round_df['Team_won'].values
Playoffs_selected_teams_df.loc[len(Playoffs_selected_teams_df)] = Y_4th_round_df['Team_won'].values
Playoffs_selected_teams_df.loc[len(Playoffs_selected_teams_df)] = X_4th_round_df['Team_won'].values
Playoffs_selected_teams_df

Unnamed: 0,Teams_selected
0,127
1,141
2,80
3,213


### Semi-finals

In [58]:
semifinals_pairs_df = pair_teams(Playoffs_selected_teams_df, 'Teams_selected')
semifinals_pairs_df = team_pair_names(semifinals_pairs_df)
semifinals_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,127,213,Illinois,North Carolina
1,141,80,Kansas,Duke


In [59]:
semifinals_results_df = preds(Brackets_model,semifinals_pairs_df,'TeamA','TeamB')
semifinals_results_df = get_winning_teamname(semifinals_results_df)
semifinals_results_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,127,213,Illinois,North Carolina,-3.807022,213,North Carolina
1,141,80,Kansas,Duke,0.268894,141,Kansas


### Finals

In [60]:
Finals_pairs_df = pair_teams(semifinals_results_df,'Team_won')
Finals_pairs_df = team_pair_names(Finals_pairs_df)
Finals_pairs_df

Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name
0,213,141,North Carolina,Kansas


In [61]:
Finals_results_df = preds(Brackets_model,Finals_pairs_df,'TeamA','TeamB')
Finals_results_df = get_winning_teamname(Finals_results_df)
Finals_results_df

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step


Unnamed: 0,TeamA,TeamB,TeamA_name,TeamB_name,predictions,Team_won,TeamName_won
0,213,141,North Carolina,Kansas,1.724854,213,North Carolina


In [62]:
print("Team won the tournament: " + Finals_results_df['TeamName_won'].values[0])

Team won the tournament: North Carolina
