In [1]:
import pandas as pd
import numpy as np
import random
import warnings
warnings.filterwarnings('ignore')

from IPython.display import display, HTML


#Writing File. Select top for Production. Bottom for Debug

# Prod - jupyter nbconvert --to html PBCCompBal.ipynb --no-input --no-prompt
# Debug - jupyter nbconvert --to html PBCCompBal.ipynb

# PBC Competitive Balance Draft
***

The PBC Competitive Balance Draft Rounds A and B are designed to counteract imbalances in the league due simply to differences in the local market sizes of our teams. The 10 teams with the smallest markets (as determined by local media revenue from the previous season) will be eligible for these picks. The order of these picks will be determined by a lottery drawing with each team receiving a certain number of balls based on three components: Local Media Revenue and Market Size. There will be 10 picks awarded. 6 at the end of the first round, and 4 at the end of the 2nd round. The rest of this document outlines this process in detail, and shows the results at each step.


In [19]:
index = [1,2,3,4,5,6,7,8,9,10]
balls_base = pd.DataFrame([250,200,150,100,80,70,60,40,30,20], columns = ['Balls'], index = index)
balls_base

Unnamed: 0,Balls
1,250
2,200
3,150
4,100
5,80
6,70
7,60
8,40
9,30
10,20


Balls will be assigned according to the above weights twice using the following two components:

1. **Market Size.** This is the team's Media Revenue during the prior season. The bottom 10 Media Revenues will be granted eligibility into the competitive balance pool. Balls are assigned by lowest Media Revenue to highest among these bottom 10 teams.

    > Tiebreakers are handled by giving the average number of available chances for selection to all tied teams. For example, in this season, we have a tie for 2nd, 3rd, and 4th. The total number of lottery chances for those positions are 200, 150, 100 respectively. Given the sum of those chances is 450, each team receives 150 chances in the lottery draw.

    
2. **Total Revenue.** Total Revenue a team earns in a given season. This is simply sorted from lowest revenue, to highest to determine eligibility. Lowest Revenue gets the most, decreasing down to 10 teams.


Note, no playoff teams are eligible. They are removed immediately from any eligibility.


Below is a list of all teams, sorted alphabetically with all criteria for selection. 

In [3]:
standings = pd.read_csv('PBC_standings_2031.csv', index_col = 0)[['team', 'wins']]
comps = pd.read_csv('PBC_comp_balance_2031.csv', index_col = 0)
comps = comps.merge(standings, on = 'team')

comps.columns = comps.columns.str.title()
comps['Playoff Team'] = np.where(comps['playoffs'] == 1, 'Y','N')

comp = comps['Team','Wins','Market Size', 'Revenue', 'Playoff Team']

comp = comp[comp['Playoff Team'] == 'N']

teams = comp.sort_values('Team')
teams.index = np.arange(1,len(teams)+1)
teams

Unnamed: 0,Team,Wins,Market Size,Revenue,Playoff Team
1,Arizona Diamondbacks,62,5,132698668,N
2,Atlanta Braves,81,6,138163547,N
3,Baltimore Orioles,74,6,141867483,N
4,Chicago Cubs,85,10,247745944,N
5,Cincinnati Reds,78,5,126116052,N
6,Cleveland Clevelands,71,5,137665267,N
7,Colorado Rockies,52,7,148484457,N
8,Kansas City Royals,84,6,177730833,N
9,London Cannons,63,8,156604831,N
10,Los Angeles Angels,80,8,192971219,N


## Market size Eligibiity

Below are the lowest 10 teams, sorted by Market Size. These 10 teams are eligible for a competitive balance draft pick in the upcoming draft.

If the Market size is tied, then the teams tied will split all available balls for the slots they fall into. For example: if teams 2 through 5 have a market size of 4, then Spot 2, 3, 4, 5 total balls are added up, and divided by 4.

In [18]:
market_10 = teams.sort_values(by = ['Market Size', 'Wins']).head(10).reset_index()
market_10

Unnamed: 0,Team,Wins,Market Size,Revenue,Playoff Team
12,Miami Marlins,61,3,101802525,N
16,Oakland Athletics,74,4,161228804,N
17,Pittsburgh Pirates,94,4,174676873,N
18,Seattle Mariners,49,5,126196936,N
1,Arizona Diamondbacks,62,5,132698668,N
6,Cleveland Clevelands,71,5,137665267,N
5,Cincinnati Reds,78,5,126116052,N
20,Texas Rangers,58,6,127947959,N
13,Minnesota Twins,65,6,187578472,N
3,Baltimore Orioles,74,6,141867483,N


In [5]:
# Here's the code to calculate the available balls given the tiebreakers in Market Size.

balls = balls_base.copy(deep = True)
balls.loc[1,'Market Balls'] = balls_base.loc[1,'Balls'] 
balls.loc[2,'Market Balls'] = (balls_base.loc[2,'Balls'] + balls_base.loc[3,'Balls']) / 2
balls.loc[3,'Market Balls'] = (balls_base.loc[2,'Balls'] + balls_base.loc[3,'Balls']) / 2
balls.loc[4,'Market Balls'] = (balls_base.loc[4,'Balls'] + balls_base.loc[5,'Balls'] + balls_base.loc[6,'Balls'] + balls_base.loc[7,'Balls']) / 4
balls.loc[5,'Market Balls'] = (balls_base.loc[4,'Balls'] + balls_base.loc[5,'Balls'] + balls_base.loc[6,'Balls'] + balls_base.loc[7,'Balls']) / 4
balls.loc[6,'Market Balls'] = (balls_base.loc[4,'Balls'] + balls_base.loc[5,'Balls'] + balls_base.loc[6,'Balls'] + balls_base.loc[7,'Balls']) / 4
balls.loc[7,'Market Balls'] = (balls_base.loc[4,'Balls'] + balls_base.loc[5,'Balls'] + balls_base.loc[6,'Balls'] + balls_base.loc[7,'Balls']) / 4
balls.loc[8,'Market Balls'] = (balls_base.loc[8,'Balls'] + balls_base.loc[9,'Balls'] + balls_base.loc[10,'Balls']) / 3
balls.loc[9,'Market Balls'] = (balls_base.loc[8,'Balls'] + balls_base.loc[9,'Balls'] + balls_base.loc[10,'Balls']) / 3
balls.loc[10,'Market Balls'] = (balls_base.loc[8,'Balls'] + balls_base.loc[9,'Balls'] + balls_base.loc[10,'Balls']) / 3
balls = pd.DataFrame(balls['Market Balls'])
balls = balls.astype(float).round(1)

# Code to determine Market Size Eligibility

market_10.index = np.arange(1, len(market_10) + 1)
market = pd.concat([market_10,balls], axis = 1)
market_final = market[['Team','Market Balls']].head(10)
market_final['Market Balls'] = market_final['Market Balls'].apply(np.ceil).astype(int)
market_final

Unnamed: 0,Team,Market Balls
1,Miami Marlins,250
2,Oakland Athletics,175
3,Pittsburgh Pirates,175
4,Seattle Mariners,78
5,Arizona Diamondbacks,78
6,Cleveland Clevelands,78
7,Cincinnati Reds,78
8,Texas Rangers,30
9,Minnesota Twins,30
10,Baltimore Orioles,30


## Revenue Eligibility

Below are the eligible teams sorted by Revenue, ascending. Given no ties exist, the lottery balls are assigned sequentially. 

In [6]:
# Calculating the eligibility for teams by Revenue

revenue_elig = teams.sort_values('Revenue', ascending = True)
revenue_elig.index = np.arange(1,len(revenue_elig) +1)
revenue_elig = revenue_elig.join(balls_base)
revenue_elig.rename(columns = {'Balls': 'Revenue Balls'}, inplace = True)
revenue_final = revenue_elig[['Team', 'Revenue Balls']].head(10)
revenue_final


Unnamed: 0,Team,Revenue Balls
1,Miami Marlins,250.0
2,Cincinnati Reds,200.0
3,Seattle Mariners,150.0
4,Texas Rangers,100.0
5,Arizona Diamondbacks,80.0
6,Cleveland Clevelands,70.0
7,Atlanta Braves,60.0
8,Baltimore Orioles,40.0
9,Colorado Rockies,30.0
10,Mexico City Diablos,20.0


## Final Draft Lottery Probabilities

For the Comp Balance Lottery, here are the chances provided for each team for the top selection. There's an important distinction to advise you on. This lottery is drawn randomly, without replacement, with a caveat. The process works as follows: 

1. The program draws a ball. The team on the ball is awarded the selection.
2. All corresponding balls are removed from the lottery pool.
3. The program then draws another ball. This only includes the remaining teams that have lottery balls.
4. Once the playoff teams come up, they are ranked from lowest win total, to highest.

This process repeats until the draft is over. So the probabilities you see below are exclusive to the first overall selection. Upon request, I can provide probabilities for each subsequent round if you are interested in seeing it. 

The draft lottery itself is not displayed below, simply the results. The code is provided upon request. 
> Side note: for those interested in the programatic methodology: The process is done through a manual iteration of 10 steps (10 picks) by utilizing the random package in python. The random_choices() method allows us to provide a list of teams, with assigned weights by constructing vectorized operations on those columns in the dataset. From there, we iterate 1 output of random choice. I then reconstruct the dataset to exclude the team selected so as to properly remove them from the process. This process is then repeated until the draft is over. 

In [7]:
# Creating the final dataset, by merging Market and Revenue balls.

final = pd.merge(teams,market_final, how = 'left', on = 'Team').merge(revenue_final, how = 'left', on = 'Team').fillna(0)

# Calculate total balls available for teams, as well as % chance for selection.
final['Total Balls'] = final['Market Balls'] + final['Revenue Balls']
final['% Pick Chance'] = round((final['Total Balls']/sum(final['Total Balls']))*100,1)

# Limit the dataset to only teams that have lottery balls
final = final[final['Total Balls'] > 0]

# Select the final columns, sorting byy Total Balls (Descending), and Wins (Ascending).
final_cols = ['Team', 'Wins','Market Size', 'Revenue', 'Total Balls','% Pick Chance']
final = final[final_cols].sort_values(['Total Balls','Wins'], ascending = [False,True]).set_index('Team')
final['Total Balls'] = final['Total Balls'].astype('int')
final

Unnamed: 0_level_0,Wins,Market Size,Revenue,Total Balls,% Pick Chance
Team,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Miami Marlins,61,3,101802525,500,25.0
Cincinnati Reds,78,5,126116052,278,13.9
Seattle Mariners,49,5,126196936,228,11.4
Oakland Athletics,74,4,161228804,175,8.7
Pittsburgh Pirates,94,4,174676873,175,8.7
Arizona Diamondbacks,62,5,132698668,158,7.9
Cleveland Clevelands,71,5,137665267,148,7.4
Texas Rangers,58,6,127947959,130,6.5
Baltimore Orioles,74,6,141867483,70,3.5
Atlanta Braves,81,6,138163547,60,3.0


## Final Results

The following picks will be awarded as follows:

In [8]:
# Lots to unpack here. First, I create a dictionary to store the results, that will allow me to iterate results at the bottom. 
# Secondly, I seed the random drawing. This will vary based on date calculated.
# Finally, each pick is a combination of recalls, and application of the random.choices package, selecting a team at random.
# Code is written to name, and log the results of each pick, as well as calculate the probabilities of each selection occuring.

picks = {}

random.seed(913)

first = random.choices(final.index, final['Total Balls'], k = 1)
two = final[final.index != first[0]]
two['% Pick Chance'] = round(two['Total Balls']/sum(two['Total Balls'])*100,1)
picks['Round 1 - Pick C1'] = [first[0],final.loc[final.index == first[0]]['% Pick Chance'][0], final.loc[final.index == first[0]]['Total Balls'][0], sum(final['Total Balls']) ]

second = random.choices(two.index, two['Total Balls'], k = 1)
three = two[two.index != second[0]]
three['% Pick Chance'] = round(three['Total Balls']/sum(three['Total Balls'])*100,1)
picks['Round 1 - Pick C2'] = [second[0],two.loc[two.index == second[0]]['% Pick Chance'][0], two.loc[two.index == second[0]]['Total Balls'][0], sum(two['Total Balls']) ]

third = random.choices(three.index, three['Total Balls'], k = 1)
four = three[three.index != third[0]]
four['% Pick Chance'] = round(four['Total Balls']/sum(four['Total Balls'])*100,1)
picks['Round 1 - Pick C3'] = [third[0],three.loc[three.index == third[0]]['% Pick Chance'][0], three.loc[three.index == third[0]]['Total Balls'][0], sum(three['Total Balls']) ]

fourth = random.choices(four.index, four['Total Balls'], k = 1)
five = four[four.index != fourth[0]]
five['% Pick Chance'] = round(five['Total Balls']/sum(five['Total Balls'])*100,1)
picks['Round 1 - Pick C4'] = [fourth[0],four.loc[four.index == fourth[0]]['% Pick Chance'][0], four.loc[four.index == fourth[0]]['Total Balls'][0], sum(four['Total Balls']) ]

fifth = random.choices(five.index, five['Total Balls'], k = 1)
six = five[five.index != fifth[0]]
six['% Pick Chance'] = round(six['Total Balls']/sum(six['Total Balls'])*100,1)
picks['Round 1 - Pick C5'] = [fifth[0],five.loc[five.index == fifth[0]]['% Pick Chance'][0], five.loc[five.index == fifth[0]]['Total Balls'][0], sum(five['Total Balls']) ]

sixth = random.choices(six.index, six['Total Balls'], k = 1)
seven = six[six.index != sixth[0]]
seven['% Pick Chance'] = round(seven['Total Balls']/sum(seven['Total Balls'])*100,1)
picks['Round 1 - Pick C6'] = [sixth[0],six.loc[six.index == sixth[0]]['% Pick Chance'][0], six.loc[six.index == sixth[0]]['Total Balls'][0], sum(six['Total Balls']) ]

seventh = random.choices(seven.index, seven['Total Balls'], k = 1)
eight = seven[seven.index != seventh[0]].sort_values('Wins', ascending = True)
eight['% Pick Chance'] = round(eight['Total Balls']/sum(eight['Total Balls'])*100,1)
picks['Round 2 - Pick C1'] = [seventh[0],seven.loc[seven.index == seventh[0]]['% Pick Chance'][0], seven.loc[seven.index == seventh[0]]['Total Balls'][0], sum(seven['Total Balls']) ]

eighth = random.choices(eight.index, eight['Total Balls'], k = 1)
nine = eight[eight.index != eighth[0]].sort_values('Wins', ascending = True)
nine['% Pick Chance'] = round(nine['Total Balls']/sum(nine['Total Balls'])*100,1)
picks['Round 2 - Pick C2'] = [eighth[0],eight.loc[eight.index == eighth[0]]['% Pick Chance'][0], eight.loc[eight.index == eighth[0]]['Total Balls'][0], sum(eight['Total Balls']) ]

ninth = random.choices(nine.index, nine['Total Balls'], k = 1)
ten = nine[nine.index != ninth[0]].sort_values('Wins', ascending = True)
ten['% Pick Chance'] = round(ten['Total Balls']/sum(ten['Total Balls'])*100,1)
picks['Round 2 - Pick C3'] = [ninth[0],nine.loc[nine.index == ninth[0]]['% Pick Chance'][0], nine.loc[nine.index == ninth[0]]['Total Balls'][0], sum(nine['Total Balls']) ]

tenth = random.choices(ten.index, ten['Total Balls'], k = 1)
eleven = ten[ten.index != tenth[0]].sort_values('Wins', ascending = True)
eleven['% Pick Chance'] = round(eleven['Total Balls']/sum(eleven['Total Balls'])*100,1)
picks['Round 2 - Pick C4'] = [tenth[0],ten.loc[ten.index == tenth[0]]['% Pick Chance'][0], ten.loc[ten.index == tenth[0]]['Total Balls'][0], sum(ten['Total Balls']) ]

eleventh = random.choices(eleven.index, eleven['Total Balls'], k = 1)
twelve = eleven[eleven.index != eleventh[0]].sort_values('Wins', ascending = True)
twelve['% Pick Chance'] = round(twelve['Total Balls']/sum(twelve['Total Balls'])*100,1)
picks['Round 2 - Pick C5'] = [eleventh[0],eleven.loc[eleven.index == eleventh[0]]['% Pick Chance'][0], eleven.loc[eleven.index == eleventh[0]]['Total Balls'][0], sum(eleven['Total Balls']) ]

twelveth = random.choices(twelve.index, twelve['Total Balls'], k = 1)
thirteen = twelve[twelve.index != twelveth[0]].sort_values('Wins', ascending = True)
thirteen['% Pick Chance'] = round(thirteen['Total Balls']/sum(thirteen['Total Balls'])*100,1)
picks['Round 2 - Pick C6'] = [twelveth[0],twelve.loc[twelve.index == twelveth[0]]['% Pick Chance'][0], twelve.loc[twelve.index == twelveth[0]]['Total Balls'][0], sum(twelve['Total Balls']) ]

thirteenth = random.choices(thirteen.index, thirteen['Total Balls'], k = 1)
picks['Round 2 - Pick C7'] = [thirteenth[0],thirteen.loc[thirteen.index == thirteenth[0]]['% Pick Chance'][0], thirteen.loc[thirteen.index == thirteenth[0]]['Total Balls'][0], sum(thirteen['Total Balls']) ]


for key, value in picks.items():
    print(str(key) +" : " + str(value[0]) + ' - Chance to win pick = ' + str(value[1]) + ' (' + str(value[2]) + '/' + str(value[3]) + ')')

Round 1 - Pick C1 : Miami Marlins - Chance to win pick = 25.0 (500/2002)
Round 1 - Pick C2 : Pittsburgh Pirates - Chance to win pick = 11.7 (175/1502)
Round 1 - Pick C3 : Cleveland Clevelands - Chance to win pick = 11.2 (148/1327)
Round 1 - Pick C4 : Mexico City Diablos - Chance to win pick = 1.7 (20/1179)
Round 1 - Pick C5 : Arizona Diamondbacks - Chance to win pick = 13.6 (158/1159)
Round 1 - Pick C6 : Seattle Mariners - Chance to win pick = 22.8 (228/1001)
Round 2 - Pick C1 : Colorado Rockies - Chance to win pick = 3.9 (30/773)
Round 2 - Pick C2 : Oakland Athletics - Chance to win pick = 23.6 (175/743)
Round 2 - Pick C3 : Cincinnati Reds - Chance to win pick = 48.9 (278/568)
Round 2 - Pick C4 : Baltimore Orioles - Chance to win pick = 24.1 (70/290)
Round 2 - Pick C5 : Texas Rangers - Chance to win pick = 59.1 (130/220)
Round 2 - Pick C6 : Atlanta Braves - Chance to win pick = 66.7 (60/90)
Round 2 - Pick C7 : Minnesota Twins - Chance to win pick = 100.0 (30/30)
