In [72]:
import numpy as np
import pandas as pd
from scipy.special import comb

In [65]:
def generate_revenue(num_games,round_str,df_revenue,teams):
    #make sure team 1 is the higher seed
    rev_high_seed = df_revenue.loc[(teams[0])].loc[round_str]
    rev_low_seed = df_revenue.loc[(teams[1])].loc[round_str]
    if num_games == 4:
        return 2.*rev_high_seed + 2.*rev_low_seed
    if num_games == 5:
        return 3.*rev_high_seed + 2.*rev_low_seed
    if num_games == 6:
        return 3.*rev_high_seed + 3.*rev_low_seed
    if num_games == 7:
        return 4.*rev_high_seed + 3.*rev_low_seed
    return None

In [206]:
def generate_probabilites(num_games,df_probs,teams):
    #check if num_games - 4,5,6,7
    p_H = df_probs.loc[(teams[0],teams[1])].loc['Prob1WinsHome']
    p_A = df_probs.loc[(teams[0],teams[1])].loc['Prob1WinsAway']

    if num_games == 4:
        probA = (p_H*p_A)**2.
        probB = ((1.-p_H)*(1.-p_A))**2.
    if num_games == 5:
        probA = p_H**2.*p_A*(1.-p_A) + p_A**2.*p_H*(1.-p_H)
        probA *= 2.*p_H
        probB = (1.-p_H)**2.*p_A*(1-p_A) + (1.-p_A)**2.*p_H*(1.-p_H)
        probB *= 2.*(1-p_H)
    if num_games == 6:
        probA = 0
        probB = 0
        for i in range(1,4):
            probA += p_H**i * (1.-p_H)**(3.-i) * p_A**(3.-i) * (1.-p_A)**(i-1.)*comb(3,i)*comb(2,3-i)
            probB += (1.-p_H)**i * p_H**(3.-i) * (1.-p_A)**(3.-i) * (p_A)**(i-1.)*comb(3,i)*comb(2,3-i)
        probA *= p_A
        probB *= (1-p_A)
    if num_games == 7:
        probA = 0
        probB = 0
        for i in range(0,4):
            probA += p_H**i * (1.-p_H)**(3.-i) * p_A**(3.-i) * (1.-p_A)**(i)*comb(3,i)*comb(3,3-i)
            probB += (1.-p_H)**i * p_H**(3.-i) * (1.-p_A)**(3.-i) * (p_A)**(i)*comb(3,i)*comb(3,3-i)
        probA *= p_H
        probB *= (1-p_H)
    return probA,probB

In [207]:
def series_probs(teams,df_probs):
    probs = []
    teams_arr = []
    for i in range(4,8):
        teams_arr.append( (i,teams[0]) )
        teams_arr.append( (i,teams[1]) )
        probA,probB = generate_probabilites(i,df_probs,teams)
        probs.append(probA)
        probs.append(probB)
    return probs,teams_arr

In [221]:
def simulate_round(teams, round_num, df_revenue, df_probs):
    advancing_teams = []
    revenue = 0
    for pair in teams:
        outcome_probs, outcomes = series_probs(pair, df_probs)

        index = np.random.choice(np.arange(0,8), p = outcome_probs)
        outcome = outcomes[index]
        advancing_teams.append(outcome[1])
        revenue += generate_revenue(outcome[0], round_num, df_revenue, pair)
    return revenue, advancing_teams

In [391]:
def compare_seeding(team1, team2):
    if team1[-1] < team2[-1]:
        return team1, team2
    elif team1[-1] == team2[-1]:
        return team1, team2
    else: 
        return team2, team1

In [390]:
def construct_bracket(bracket):
    new_bracket = []
    if len(bracket) == 1:
        return bracket[0]
    
    for i in range(int(len(bracket)/2)):
        team1, team2 = compare_seeding(bracket[2*i],bracket[2*i+1])
        print(team1,team2)
        new_bracket.append([team1, team2])
    return new_bracket

In [364]:
def generate_bracket(division):
    bracket = []
    for i in range(1,5):
        bracket.append([division+str(i),division+str(9-i)])
    temp = bracket[1]
    bracket[1] = bracket[3]
    bracket[3] = temp
    return bracket

In [365]:
teams = [('East1','East2')]
round_num = 'Round1_Gate'
df_revenue = pd.DataFrame.from_csv("Business-Track-Application-Datasets.csv", index_col = 0)
df_probs = pd.DataFrame.from_csv('win_probabilities.csv', index_col = (0,1), tupleize_cols = True)

In [366]:
simulate_round(teams, round_num, df_revenue, df_probs)

(10435716.0, ['East2'])

In [518]:
def build_final_probs():
    df_probs = pd.DataFrame.from_csv('win_probabilities.csv', index_col = (0,1), tupleize_cols = True)
    df = pd.DataFrame()
    for i in range(1,9):
        team1 = 'East'+str(i)
        for i in range(1,9):
            team2 = 'West'+str(i)
            home_prob = 1-df_probs.loc[(team1,team2)].loc['Prob1WinsAway']
            away_prob = 1-df_probs.loc[(team1,team2)].loc['Prob1WinsHome']
            df = df.append(pd.DataFrame([[(team2,team1), home_prob, away_prob]], columns=['index', 'Prob1WinsHome', 'Prob1WinsAway']))
            #print(pd.DataFrame([[(team2,team1), home_prob, away_prob]], columns=['index', 'Prob1WinsHome', 'Prob1WinsAway']))
    return df

In [517]:
df2

Unnamed: 0,index,Prob1WinsHome,Prob1WinsAway
0,"(West1, East1)",0.53,0.52
0,"(West2, East1)",0.44,0.37
0,"(West3, East1)",0.38,0.34
0,"(West4, East1)",0.33,0.26
0,"(West5, East1)",0.29,0.21
0,"(West6, East1)",0.20,0.16
0,"(West7, East1)",0.16,0.09
0,"(West8, East1)",0.08,0.04
0,"(West1, East2)",0.54,0.52
0,"(West2, East2)",0.52,0.45


In [460]:
def simulate_playoff(df_probs, df_revenue):
    east_bracket = generate_bracket('East')
    west_bracket = generate_bracket('West')
    current_round = 'Round1_Gate'
    total_revenue = 0
    for i in range(1,4):
        revenue, teams = simulate_round(east_bracket,current_round, df_revenue, df_probs)
        total_revenue += revenue
        east_bracket = construct_bracket(teams)
        
        revenue, teams = simulate_round(west_bracket,current_round, df_revenue, df_probs)
        total_revenue += revenue
        west_bracket = construct_bracket(teams)
        
        current_round = 'Round' + str(i+1) + '_Gate'
    #finals
    #team1, team2 = compare_seeding(east_bracket, west_bracket)
    #print([[team1, team2]])
    revenue, teams = simulate_round([(east_bracket, west_bracket)], current_round, df_revenue, df_probs)
    total_revenue += revenue
    return total_revenue, teams

In [468]:
simulate_playoff(df_probs, df_revenue)

East1 East4
East2 East3
West1 West5
West2 West6
East1 East2
West1 West2


(229332937.0, ['West1'])