In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import pandas as pd
import numpy as np
import io
import random
import math
import scipy.stats
import matplotlib.pylab as plt
import seaborn as sns

  from IPython.core.display import display, HTML


In [2]:
# Initialize year variable
year = 2025 #(2019, 2021, 2022, 2023, 2024, 2025)

In [3]:
# Read in data based on the year
if year == 2019:
    data = pd.read_csv('data/18_19tourneycs.csv')
elif year == 2021:
    data = pd.read_csv('data/20_21tourneycs.csv')
elif year == 2022:
    data = pd.read_csv('data/21_22tourneycs.csv')
elif year == 2023:
    data = pd.read_csv('data/22_23tourneycs.csv')
elif year == 2024:
    data = pd.read_csv('data/23_24tourneycs.csv')
elif year == 2025:
    data = pd.read_csv('data/24_25tourneycs.csv')
data.head()

Unnamed: 0,Region,Seed,School,SRS
0,1,1,Auburn,29.58
1,1,16,Alabama State,-9.27
2,1,8,Louisville,19.3
3,1,9,Creighton,16.36
4,1,4,Texas A&M,20.05


In [4]:
# Rename columns of data DataFrame
data.columns = ['Region', 'Seed', 'Team', 'SRS']
data

Unnamed: 0,Region,Seed,Team,SRS
0,1,1,Auburn,29.58
1,1,16,Alabama State,-9.27
2,1,8,Louisville,19.30
3,1,9,Creighton,16.36
4,1,4,Texas A&M,20.05
...,...,...,...,...
59,4,11,Xavier,15.54
60,4,7,UCLA,19.24
61,4,10,Utah State,14.48
62,4,2,Tennessee,25.04


In [5]:
num_of_sims = 10000  # Number of simulations
teams = data.to_numpy().tolist()  # Convert DataFrame to a nested list
team_dict = {}  # Dictionary to store team data

# Iterate through each team
for i in range(0, len(teams)):
    # Initialize an empty list for each team in the team_dict
    team_dict[teams[i][2]] = [0, 0, 0, 0, 0, 0, 0]
    # The team name is accessed using teams[i][2], assuming the team name is in the third column of the data

In [6]:
"""
Given a list of teams in a specific round and the round number,
find_winners randomly determines the winners of each matchup based on the
SRS difference between the teams and returns a list of the winners
"""
def find_winners(input_list, round):

    winners = []

    # Iterate over the input list of teams
    for i in range(0, len(input_list)):
        # Skips if the index is odd (represents the second team in a matchup)
        if i % 2 != 0:
            pass
        else:
            # Get the first team and the second team in the matchup
            team1 = input_list[i]
            team2 = input_list[i+1]

            # Calculate the SRS difference between the teams
            srs_diff = team1[3] - team2[3]

            # Calculate the win probability of Team 1 using the SRS difference
            team1_prob = scipy.stats.norm(0, 10.36).cdf(srs_diff)

            # Generate a random draw between 0 and 1
            draw = random.random()

            # Determine the winner based on the draw and win probability
            if draw <= team1_prob:
                # Team 1 wins
                winners.append(team1)
                team_dict[team1[2]][round] += 1
            else:
                # Team 2 wins
                winners.append(team2)
                team_dict[team2[2]][round] += 1

    return winners


In [7]:
"""
Prints out the bracket representation of the tournament rounds
based on the provided round_list
"""
def bracket_printout(round_list):

    for i in range(0, len(round_list)):
        temp_list = round_list[i]

        # Iterate over the matchups in the current round
        for j in range(0, len(temp_list)):
            # Print the team seed, name, and SRS for each matchup
            print(str(temp_list[j][1]) + " " + temp_list[j][2] + " " + str(temp_list[j][3]))

            # Print an empty line after every even-indexed matchup
            if j % 2 != 0:
                print("")

        # Print empty lines between each round
        print("")
        print("")
        print("")


In [8]:
"""
Simulates a tournament by finding winners in each round and printing the bracket
"""
def tourney_sim(teams):
    # Increment the count of tournaments for each team
    for x in team_dict:
        team_dict[x][0] += 1

    # Find winners in the Round of 64
    round_32 = find_winners(teams, 1)

    # Find winners in the Round of 32
    sweet_16 = find_winners(round_32, 2)

    # Find winners in the Sweet 16
    elite_8 = find_winners(sweet_16, 3)

    # Find winners in the Elite 8
    final_4 = find_winners(elite_8, 4)

    # Find winners in the Final 4
    championship = find_winners(final_4, 5)

    # Find the winner in the Championship
    winner = find_winners(championship, 6)

    # Print the bracket for all rounds
    # Uncomment below to see bracket format of simulation. Heads up: it is a lot of output!
    #bracket_printout([teams, round_32, sweet_16, elite_8, final_4, championship, winner])


In [9]:
%%time
# Run the monte-carlo tournament simulation
for i in range(1, num_of_sims+1):
    tourney_sim(teams)

# Calculate the average statistics for each team
for x in team_dict:
    # Divide the count of wins in each round by the total number of simulations
    for i in range(0, len(team_dict[x])):
        team_dict[x][i] = team_dict[x][i] / num_of_sims

    # Print the team name and its average statistics
    print(x + " " + str(team_dict[x]))


Auburn [1.0, 1.0, 0.8652, 0.7159, 0.5532, 0.3687, 0.2153]
Alabama State [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Louisville [1.0, 0.6104, 0.0949, 0.0475, 0.018, 0.0052, 0.0016]
Creighton [1.0, 0.3896, 0.0399, 0.0149, 0.0047, 0.0007, 0.0001]
Texas A&M [1.0, 0.9055, 0.5077, 0.1238, 0.0569, 0.0177, 0.0043]
Yale [1.0, 0.0945, 0.013, 0.0006, 0.0, 0.0, 0.0]
Michigan [1.0, 0.7506, 0.4068, 0.0916, 0.0412, 0.0127, 0.003]
UC San Diego [1.0, 0.2494, 0.0725, 0.0057, 0.001, 0.0001, 0.0]
Iowa State [1.0, 0.9566, 0.6554, 0.3823, 0.1431, 0.0636, 0.0219]
Lipscomb [1.0, 0.0434, 0.0049, 0.0001, 0.0, 0.0, 0.0]
Mississippi [1.0, 0.501, 0.1668, 0.0659, 0.0133, 0.0034, 0.0004]
North Carolina [1.0, 0.499, 0.1729, 0.0644, 0.0126, 0.0031, 0.0007]
Marquette [1.0, 0.6289, 0.2365, 0.0965, 0.0222, 0.0066, 0.0022]
New Mexico [1.0, 0.3711, 0.0913, 0.0241, 0.004, 0.0011, 0.0002]
Michigan State [1.0, 0.9908, 0.6719, 0.3667, 0.1298, 0.0544, 0.0181]
Bryant [1.0, 0.0092, 0.0003, 0.0, 0.0, 0.0, 0.0]
Florida [1.0, 0.9988, 0.8569

In [10]:
expected_games = {}

for team, probs in team_dict.items():
    # Sum only the first six probabilities
    expected_games[team] = sum(probs[:6])

# Print results
for team, exp in expected_games.items():
    print(f"{team}: {exp:.3f}")

Auburn: 4.503
Alabama State: 1.000
Louisville: 1.776
Creighton: 1.450
Texas A&M: 2.612
Yale: 1.108
Michigan: 2.303
UC San Diego: 1.329
Iowa State: 3.201
Lipscomb: 1.048
Mississippi: 1.750
North Carolina: 1.752
Marquette: 1.991
New Mexico: 1.492
Michigan State: 3.214
Bryant: 1.010
Florida: 4.141
Norfolk State: 1.001
Connecticut: 1.741
Oklahoma: 1.473
Maryland: 3.332
Grand Canyon: 1.049
Memphis: 1.528
Colorado State: 1.683
Texas Tech: 3.292
UNC Wilmington: 1.019
Missouri: 2.425
Drake: 1.145
Kansas: 2.139
Arkansas: 1.498
St. John's: 2.984
Omaha: 1.013
Duke: 4.425
Mount St Mary's: 1.000
Mississippi State: 1.504
Baylor: 1.704
Arizona: 3.035
Akron: 1.035
Oregon: 2.131
Liberty: 1.205
Wisconsin: 2.913
Montana: 1.016
BYU: 2.112
VCU: 1.446
Saint Mary's: 1.787
Vanderbilt: 1.482
Alabama: 3.771
Robert Morris: 1.007
Houston: 3.953
SIU Edwardsville: 1.000
Gonzaga: 2.212
Georgia: 1.330
Purdue: 2.776
High Point: 1.084
Clemson: 2.311
McNeese State: 1.202
Kentucky: 2.839
Troy: 1.040
Illinois: 2.341
Xavie

In [11]:
point_system = [0, 1, 2, 3, 5, 8, 13]  # Predefined point system for each round

expected_values = {}  # Dictionary to store the expected value for each team

# Calculate the expected value for each team
for team, probabilities in team_dict.items():
    expected_value = sum(probabilities[i] * point_system[i] for i in range(len(probabilities)))
    expected_values[team] = expected_value

# Sort teams by expected value in descending order
sorted_teams = sorted(expected_values.items(), key=lambda x: x[1], reverse=True)

# Print the expected value for each team
for team, expected_value in sorted_teams:
    print(f"{team}: {expected_value:.3f}")

Auburn: 13.393
Duke: 13.113
Florida: 10.313
Houston: 9.747
Alabama: 7.517
Tennessee: 6.495
Texas Tech: 5.405
Maryland: 5.322
Iowa State: 4.923
Michigan State: 4.754
Arizona: 4.024
St. John's: 3.925
Wisconsin: 3.514
Kentucky: 3.470
Purdue: 3.181
Texas A&M: 2.774
Illinois: 2.510
Missouri: 2.443
Gonzaga: 2.396
Michigan: 2.186
Clemson: 2.018
Kansas: 1.999
BYU: 1.815
Marquette: 1.584
Oregon: 1.556
UCLA: 1.524
North Carolina: 1.135
Mississippi: 1.131
Louisville: 1.095
Saint Mary's: 1.077
Connecticut: 0.994
Baylor: 0.940
Colorado State: 0.840
Arkansas: 0.717
New Mexico: 0.657
Mississippi State: 0.627
Memphis: 0.618
VCU: 0.606
Vanderbilt: 0.590
Oklahoma: 0.571
Creighton: 0.544
Xavier: 0.523
Utah State: 0.496
UC San Diego: 0.417
Georgia: 0.414
McNeese State: 0.232
Liberty: 0.223
Drake: 0.158
Yale: 0.122
High Point: 0.098
Grand Canyon: 0.060
Lipscomb: 0.053
Troy: 0.042
Akron: 0.040
UNC Wilmington: 0.020
Montana: 0.016
Omaha: 0.013
Bryant: 0.010
Wofford: 0.009
Robert Morris: 0.008
Norfolk State: 

In [12]:
# Convert dictionary to DataFrame
df_expected_values = pd.DataFrame.from_dict(expected_values, orient='index', columns=['Expected Value'])

# Sort teams by expected value in descending order
df_expected_values = df_expected_values.sort_values(by='Expected Value', ascending=False)

# Reset the index
df_expected_values = df_expected_values.reset_index().rename(columns={'index': 'Team'})

# Display the DataFrame
display(df_expected_values)

Unnamed: 0,Team,Expected Value
0,Auburn,13.3926
1,Duke,13.1127
2,Florida,10.3128
3,Houston,9.7474
4,Alabama,7.5174
...,...,...
59,Robert Morris,0.0076
60,Norfolk State,0.0012
61,SIU Edwardsville,0.0005
62,Mount St Mary's,0.0002


In [13]:
# Merge the dataframes based on the 'Team' column
df_merged = data.merge(df_expected_values, on='Team')

# Display the merged DataFrame
df_merged.head()

Unnamed: 0,Region,Seed,Team,SRS,Expected Value
0,1,1,Auburn,29.58,13.3926
1,1,16,Alabama State,-9.27,0.0
2,1,8,Louisville,19.3,1.0951
3,1,9,Creighton,16.36,0.5445
4,1,4,Texas A&M,20.05,2.7743


In [14]:
# Define the mapping of seed values to cost values
seed_cost_mapping = {
    1: 75,
    2: 40,
    3: 25,
    4: 20,
    5: 17,
    6: 15,
    7: 12,
    8: 10,
    9: 9,
    10: 8,
    11: 7,
    12: 6,
    13: 5,
    14: 4,
    15: 3,
    16: 1,
}

# Add a new column 'Cost' to df_merged based on the 'Seed' column
df_merged['Cost'] = df_merged['Seed'].map(seed_cost_mapping)

# Display the merged DataFrame
df_merged.head()

Unnamed: 0,Region,Seed,Team,SRS,Expected Value,Cost
0,1,1,Auburn,29.58,13.3926,75
1,1,16,Alabama State,-9.27,0.0,1
2,1,8,Louisville,19.3,1.0951,10
3,1,9,Creighton,16.36,0.5445,9
4,1,4,Texas A&M,20.05,2.7743,20


In [15]:
# Define the knapsack function
def knapsack(items, budget):
    n = len(items)
    dp = [[0] * (budget + 1) for _ in range(n + 1)]

    for i in range(1, n + 1):
        for j in range(1, budget + 1):
            cost = items[i - 1]['Cost']
            value = items[i - 1]['Expected Value']
            if cost > j:
                dp[i][j] = dp[i - 1][j]
            else:
                dp[i][j] = max(dp[i - 1][j], value + dp[i - 1][j - cost])

    selected = []
    j = budget
    for i in range(n, 0, -1):
        if dp[i][j] != dp[i - 1][j]:
            selected.append(items[i - 1])
            j -= items[i - 1]['Cost']

    return selected

# Run the knapsack optimization on df_merged
selected_teams = knapsack(df_merged.to_dict('records'), budget=224)

# Create a new DataFrame for the selected teams
df_selected = pd.DataFrame(selected_teams)

# Display the selected teams and their information
display(df_selected)

# Calculate the total sum of the cost and expected value columns
total_cost = df_selected['Cost'].sum()
total_expected_value = df_selected['Expected Value'].sum()

# Print the total sums
print(f"Total Cost: {total_cost}")
print(f"Total Expected Value: {total_expected_value:.3f}")

Unnamed: 0,Region,Seed,Team,SRS,Expected Value,Cost
0,4,8,Gonzaga,23.63,2.3958,10
1,4,16,SIU Edwardsville,-5.83,0.0005,1
2,3,2,Alabama,25.97,7.5174,40
3,3,4,Arizona,23.14,4.0244,20
4,2,3,Texas Tech,23.91,5.4047,25
5,2,4,Maryland,23.54,5.322,20
6,2,16,Norfolk State,-3.61,0.0012,1
7,1,11,North Carolina,17.79,1.1349,7
8,1,3,Iowa State,23.05,4.9233,25
9,1,1,Auburn,29.58,13.3926,75


Total Cost: 224
Total Expected Value: 44.117
