In [2]:
import pandas as pd

df = pd.read_csv('marchmadness_model24.csv')

Unnamed: 0,SEED_1,TEAM_1,SEED_2,TEAM_2,Team1W_Prob
0,1.0,Connecticut,1.0,Houston,0.360
1,1.0,Connecticut,1.0,North Carolina,0.778
2,1.0,Connecticut,1.0,Purdue,0.547
3,1.0,Connecticut,2.0,Arizona,0.690
4,1.0,Connecticut,2.0,Iowa St.,0.611
...,...,...,...,...,...
4551,16.0,Wagner,16.0,Grambling St.,0.418
4552,16.0,Wagner,16.0,Howard,0.492
4553,16.0,Wagner,16.0,Longwood,0.212
4554,16.0,Wagner,16.0,Montana St.,0.388


In [8]:
def get_team1_winning_probability(df, team1, team2):
    """
    Searches for the match between team1 and team2 in the dataframe and prints out the probability
    of team1 winning.
    
    Parameters:
    - df: Pandas DataFrame containing the match data
    - team1: Name of Team 1
    - team2: Name of Team 2
    """
    # Filter the dataframe for the specific match
    match = df[((df['TEAM_1'] == team1) & (df['TEAM_2'] == team2)) | ((df['TEAM_1'] == team2) & (df['TEAM_2'] == team1))]
    
    # Check if the match is found and print the probability
    if not match.empty:
        # Determine if team1 is actually TEAM_1 in the data for the correct probability
        if match.iloc[0]['TEAM_1'] == team1:
            print(f"Probability of {team1} winning against {team2}: {match.iloc[0]['Team1W_Prob']:.3f}")
        else:
            print(f"Probability of {team1} winning against {team2}: {1 - match.iloc[0]['Team1W_Prob']:.3f}")
    else:
        print("Match not found.")

# Example usage:
get_team1_winning_probability(df, "", "Marquette")


Probability of Texas Tech winning against Marquette: 0.332


In [15]:
def print_all_matchups_for_team1(df, team1):
    """
    Searches for all matches involving team1 and prints out the matchups along with the probability
    of team1 winning each match.
    
    Parameters:
    - df: Pandas DataFrame containing the match data
    - team1: Name of Team 1
    """
    # Filter the dataframe for matches involving team1 either as TEAM_1 or TEAM_2
    matches = df[(df['TEAM_1'] == team1) | (df['TEAM_2'] == team1)]
    
    # Check if there are matches found
    if not matches.empty:
        print(f"Matchups for {team1} and their winning probabilities:")
        for index, row in matches.iterrows():
            # Determine if team1 is TEAM_1 or TEAM_2 in the data
            if row['TEAM_1'] == team1:
                opponent = row['TEAM_2']
                prob = row['Team1W_Prob']
            else:
                opponent = row['TEAM_1']
                prob = 1 - row['Team1W_Prob']
            print(f"{team1} vs {opponent}: {prob:.3f}")
    else:
        print("No matchups found for this team.")

# Example usage
print_all_matchups_for_team1(df, "Arizona")


Matchups for Arizona and their winning probabilities:
Arizona vs Connecticut: 0.310
Arizona vs Houston: 0.202
Arizona vs North Carolina: 0.611
Arizona vs Purdue: 0.352
Arizona vs Connecticut: 0.310
Arizona vs Houston: 0.202
Arizona vs North Carolina: 0.611
Arizona vs Purdue: 0.352
Arizona vs Iowa St.: 0.414
Arizona vs Marquette: 0.557
Arizona vs Tennessee: 0.561
Arizona vs Baylor: 0.632
Arizona vs Creighton: 0.720
Arizona vs Illinois: 0.619
Arizona vs Kentucky: 0.723
Arizona vs Alabama: 0.626
Arizona vs Auburn: 0.464
Arizona vs Duke: 0.628
Arizona vs Kansas: 0.705
Arizona vs Gonzaga: 0.656
Arizona vs Saint Mary's: 0.680
Arizona vs San Diego St.: 0.735
Arizona vs Wisconsin: 0.675
Arizona vs BYU: 0.651
Arizona vs Clemson: 0.803
Arizona vs South Carolina: 0.756
Arizona vs Texas Tech: 0.717
Arizona vs Dayton: 0.765
Arizona vs Florida: 0.684
Arizona vs Texas: 0.704
Arizona vs Washington St.: 0.781
Arizona vs Florida Atlantic: 0.776
Arizona vs Mississippi St.: 0.745
Arizona vs Nebraska: 0.72

In [9]:
def print_upset_predictions(df):
    """
    Searches for and prints out every matchup where team1 is a lower seed than team2 (meaning a higher
    numerical seed value, which indicates a lower rank) but the model predicts that team2 will win
    (Team1W_Prob is less than 0.50).
    """
    # Filter the dataframe for matches where TEAM_1 has a higher seed number than TEAM_2 and
    # Team1W_Prob is less than 0.50
    upset_matches = df[(df['SEED_1'] < df['SEED_2']) & (df['Team1W_Prob'] < 0.55) & (df['Team1W_Prob'] > 0.45)]
    
    # Check if there are any such matches
    if not upset_matches.empty:
        print("Matchups where a lower-seeded TEAM_1 is predicted to lose to a higher-seeded TEAM_2:")
        for index, row in upset_matches.iterrows():
            print(f"Seed {int(row['SEED_1'])} {row['TEAM_1']} vs Seed {int(row['SEED_2'])} {row['TEAM_2']}: TEAM_2 Winning Probability: {1 - row['Team1W_Prob']:.3f}")
    else:
        print("No such matchups found.")

# Call the function to print upset predictions
print_upset_predictions(df)


Matchups where a lower-seeded TEAM_1 is predicted to lose to a higher-seeded TEAM_2:
Seed 1 North Carolina vs Seed 3 Baylor: TEAM_2 Winning Probability: 0.478
Seed 1 North Carolina vs Seed 3 Illinois: TEAM_2 Winning Probability: 0.492
Seed 1 North Carolina vs Seed 4 Alabama: TEAM_2 Winning Probability: 0.484
Seed 1 North Carolina vs Seed 4 Duke: TEAM_2 Winning Probability: 0.482
Seed 1 North Carolina vs Seed 5 Gonzaga: TEAM_2 Winning Probability: 0.452
Seed 1 North Carolina vs Seed 6 BYU: TEAM_2 Winning Probability: 0.457
Seed 1 North Carolina vs Seed 11 New Mexico: TEAM_2 Winning Probability: 0.479
Seed 2 Arizona vs Seed 4 Auburn: TEAM_2 Winning Probability: 0.536
Seed 3 Baylor vs Seed 4 Alabama: TEAM_2 Winning Probability: 0.506
Seed 3 Baylor vs Seed 4 Duke: TEAM_2 Winning Probability: 0.504
Seed 3 Baylor vs Seed 5 Gonzaga: TEAM_2 Winning Probability: 0.474
Seed 3 Baylor vs Seed 5 Wisconsin: TEAM_2 Winning Probability: 0.452
Seed 3 Baylor vs Seed 6 BYU: TEAM_2 Winning Probability: 0.

In [25]:
def print_matchups_for_team1_with_max_prob(df, team1, max_prob):
    """
    Searches for all matches involving team1 and prints out the matchups along with the probability
    of team1 winning each match, filtering to only include matches where team1's winning probability
    is at most the specified maximum probability.
    
    Parameters:
    - df: Pandas DataFrame containing the match data
    - team1: Name of Team 1
    - max_prob: Maximum winning probability (as a decimal) to include in the output
    """
    # Filter the dataframe for matches involving team1 either as TEAM_1 or TEAM_2
    matches = df[(df['TEAM_1'] == team1) | (df['TEAM_2'] == team1)]
    
    print(f"Matchups for {team1} with winning probabilities of {max_prob*100}% or lower:")
    
    # Track if any matches are found that meet the criteria
    matches_found = False

    for index, row in matches.iterrows():
        # Determine if team1 is TEAM_1 or TEAM_2 in the data
        if row['TEAM_1'] == team1:
            opponent = row['TEAM_2']
            prob = row['Team1W_Prob']
        else:
            opponent = row['TEAM_1']
            prob = 1 - row['Team1W_Prob']
        
        # Print only if the probability is at most the maximum threshold
        if prob <= max_prob:
            print(f"{team1} vs {opponent}: {prob:.3f}")
            matches_found = True
    
    if not matches_found:
        print(f"No matchups found for {team1} with winning probabilities of {max_prob*100}% or lower.")

# Example usage with a maximum probability threshold
print_matchups_for_team1_with_max_prob(df, "Purdue", 0.60)


Matchups for Purdue with winning probabilities of 60.0% or lower:
Purdue vs Connecticut: 0.453
Purdue vs Houston: 0.318
Purdue vs Connecticut: 0.453
Purdue vs Houston: 0.318
Purdue vs Iowa St.: 0.566
Purdue vs Iowa St.: 0.566
