In [4]:
import math
import numpy as np
import itertools
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
from collections import OrderedDict
import pickle

# Simulating the draw

There are two ways to simulate the draw : with or without order.

The first method is quicker and easier but does not take into account some diff. 
It is still very useful for defining the possible draws, and the unique groups.

First we define what a team and what a group is, and then we define the groups for our example. I consider that UOL got out of playins with the other major regions.

In [5]:
class team():
    
    def __init__(self, name, region, rank, pool, tier=None):
        """
        Class representing a team
        Args : 
            - name (str) : team name
            - region (str) : team region
            - rank (int) : local rank in their region
        """
        
        self.name = name
        self.region = region
        self.rank = rank
        self.pool = pool
        self.tier = tier
        
class group():
    
    def __init__(self, teams):
        
        """
        Class representing a group
        Args : 
            - teams : list of teams 
        """
        
        self.teams = teams
        
    def names(self):
        return tuple([team.name for team in self.teams])
    
    def regions(self):
        return np.unique([team.region for team in self.teams])
    
    def len(self):
        return len(self.teams)
        
    def isvalid(self):
        """
        Asserts whether a group is valid
        Current rules:
            - no two teams from same region
        """
        regions = [team.region for team in self.teams]
        return len(regions) == len(np.unique(regions))
    
    def contains(self, teams):
        """
        Asserts whether teams are in this groups
        """
        for team in teams:
            if team.name not in [i.name for i in self.teams]:
                return False
        return True
    


In [6]:
# TEAMS

G2 = team('G2', 'LEC', 1, 1)
TES = team('TES', 'LPL', 1, 1)
DWG = team('DWG', 'LCK', 1, 1)
TSM = team('TSM', 'LCS', 1, 1)

JDG = team('JDG', 'LPL', 2, 2)
SNG = team('SNG', 'LPL', 3, 2)
FNC = team('FNC', 'LEC', 2, 2)
DRX = team('DRX', 'LCK', 2, 2)

ROG = team('ROG', 'LEC', 3, 3)
GEN = team('GEN', 'LCK', 3, 3)
FLY = team('FLY', 'LCS', 3, 3)
MCX = team('MCX', 'PCS', 1, 3)

TL = team('TL', 'LCS', 3, 4)
LGD = team('LGD', 'LPL', 4, 4)
MAD = team('MAD', 'LEC', 4, 4)
UOL = team('UOL', 'CIS', 1, 4)
PSG = team('PSG', 'PCS', 2, 4)



In [7]:
team_per_pool = 4
npool = 4

pool1 = [TES, G2, DWG, TSM]
pool2 = [JDG, SNG, FNC, DRX]
pool3 = [ROG, GEN, FLY, MCX]
pool4 = [TL, LGD, MAD, UOL]
# pool4 = [TL, JDG, MAD, PSG]

allteams = pool1 + pool2 + pool3 + pool4

pool1_draws = itertools.permutations(pool1)
pool2_draws = itertools.permutations(pool2)
pool3_draws = itertools.permutations(pool3)
pool4_draws = itertools.permutations(pool4)

all_draws = list(itertools.product(pool1_draws, pool2_draws, pool3_draws, pool4_draws))

## Simultaneous draw

We simulate all possible groups, and then pick out the valid ones.

With the current settings (4 pools, 4 teams per pool, LGD, MAD, TL, UOL are the ones getting out of playins), we have :
- 1440 valid group draws
- 40 unique groups

In [8]:
class unordered_draw():
    
    def __init__(self, groups):
        
        """
        Class representing a draw
        Args : 
            - groups : list of groups
        """
        
        self.groups = groups
        
    def isvalid(self):
        """
        Assert if all groups are valid
        """
        for group in self.groups:
            if not group.isvalid():
                return False
        return True
    
    def show(self):
        for group in self.groups:
            print(group.names())
            
    def contains(self, teams):
        """
        Assert if this combination of teams is in a group
        """
        for group in self.groups:
            if group.contains(teams):
                return True
        return False
    
    def result(self):
        """
        Find the final groups : we sort by alphabetical order of the first seed so same draws (but with groups swapped)
        have the same id
        """
        res = [group.names() for group in self.groups]
        return sorted(res, key=lambda x: x[0])

In [9]:
valid_draws = []

for draw in tqdm(all_draws):
    possible_draw = unordered_draw([group([draw[i][j] for i in range(team_per_pool)]) for j in range(npool)])
    
    if possible_draw.isvalid():
        valid_draws.append(possible_draw)

print('With pools of', team_per_pool, 'and', npool, 'pools:')
print(math.factorial(team_per_pool), 'different draws of a pool')
print(math.factorial(team_per_pool)**npool, 'different group draws')
print(len(valid_draws), 'valid group draws')

100%|██████████| 331776/331776 [00:07<00:00, 44461.04it/s]

With pools of 4 and 4 pools:
24 different draws of a pool
331776 different group draws
1440 valid group draws





In [10]:
valid_groups = {}

for draw in valid_draws:
    
    for i in draw.result():
        if i in valid_groups:
            valid_groups[i] += 1
        else:
            valid_groups[i] = 1
            
# sort just for better visualisation
valid_groups = OrderedDict(sorted(valid_groups.items()))
print(len(valid_groups.keys()), 'unique groups')
    
unordered_groups = pd.DataFrame(data=valid_groups.values(), columns=['count'], index=pd.MultiIndex.from_tuples(valid_groups.keys()))
unordered_groups.to_csv('groups_unordered.csv')

40 unique groups


## Ordered draw

We hypothetise all draws do not have the same probability because of order. 

This time we will draw pool by pool in a certain order, and add teams to the groups if it is possible (ie there exists a future valid draw). If not, we try to add them to the next group and repeat.

The question is how to calculate if a draw is valid : simply using regions is not enough as it might create conflict later (for example see this hilarious video of TSM Loco and Reginald making this mistake in 2015) https://youtu.be/6JRWRYeGomc

My first method was to simply scan through all valid draws (we determined them in the part before) and see if by adding a  team we  can find the beginning of a valid draw. With a simple comparison runtime is 5+h, adding a region check cut down time to 1h.

We could probably go even faster because a lot of groups are repeated in the possible groups, and most of the time all the groups are decided before the final draws (so tree methods could speed things up a lot), but I was fine with this runtime.

We keep results in a table of groups so we know how likely each groups are + a table with all groups so we can get region stats (LCK vs LCS proba)

In [8]:
class ordered_draw():
    
    def __init__(self, valid_draws):
        
        self.valid_draws = valid_draws
        self.groups = [group([]), group([]), group([]), group([])]
        
        # drawing round : all groups contains at max self.round teams
        self.round = 0
        
    def add_team(self, team):
        
        for group2add in range(4):
            
            # if we already added a team this round
            if self.groups[group2add].len() == self.round:
                continue
                
            # fast check without looking at possibilities based on region
            if self.groups[group2add].len()>0 and team.region in self.groups[group2add].regions():
                continue
            
            # we check if we can add this team and get a valid draw
            hypothetical_groups = self.groups.copy()
            hypothetical_groups[group2add] = group(self.groups[group2add].teams+[team])
            
            for draw in self.valid_draws:
                
                valid = True
                for i, semigroup in enumerate(hypothetical_groups):
                    valid *= draw.groups[i].contains(semigroup.teams)
                
                # if we are still valid then we can add the team
                if valid:
                    self.groups = hypothetical_groups
                    return None
            
            #if no valid draws are found we try with the next group
        
        # if we are here we couldn't add the team
        # this should be impossible since we always add the team so that a valid group draw can be created
        print('you shouldnt be here')
        
    def show(self):
        
        for group in self.groups:
            print(group.names())
            
    def result(self):
        """
        Find the final groups : we sort by alphabetical order of the first seed so same draws (but with team swapped)
        have the same id
        """
        
        for group in self.groups:
            assert group.len() == self.round
        
        res = []
        for group in self.groups:
            res.append(group.names())
        res = sorted(res, key=lambda x: x[0])
        
        return res

In [15]:
# group_df where groups are keys, for individual count
# groups_df to get higher level information (ex: in how many cases we get all 3 lck with all 3 lcs)
ordered_groups = pd.DataFrame(columns=['count'], index=pd.MultiIndex.from_tuples(valid_groups.keys()))
ordered_groups[:] = 0

groups_df = {}

for (draw1, draw2, draw3, draw4) in tqdm(all_draws):
            
    t = ordered_draw(valid_draws)
    
    t.round = 1
    for i in draw1:
        t.add_team(i)
    t.round = 2
    for i in draw2:
        t.add_team(i)
    t.round = 3
    for i in draw3:
        t.add_team(i)
    t.round = 4
    for i in draw4:
        t.add_team(i)
    
    # add the count of groups to the df
    res = t.result()
    for i in res:
        ordered_groups.loc[i, 'count']+=1
    
    if tuple(res) in groups_df:
        groups_df[tuple(res)] += 1
    else:
        groups_df[tuple(res)] = 1
        
pickle.dump(groups_df, open('draws_ordered.pkl', 'wb'))
ordered_groups.to_csv('groups_ordered.csv')


100%|██████████| 331776/331776 [4:28:15<00:00, 20.61it/s]  


# Observations

## Groups

Memes come true, the most likely group has TSM and Rogue with it. 

In [11]:
ordered_groups = pd.read_csv('groups_ordered.csv')
ordered_groups.columns = ['pool1', 'pool2', 'pool3', 'pool4', 'count']
ordered_groups.set_index(['pool1', 'pool2', 'pool3', 'pool4'], inplace=True)

unordered_groups = pd.read_csv('groups_unordered.csv')
unordered_groups.columns = ['pool1', 'pool2', 'pool3', 'pool4', 'count']
unordered_groups.set_index(['pool1', 'pool2', 'pool3', 'pool4'], inplace=True)

In [15]:
# most likely groups
ordered_groups['probability'] = ordered_groups['count'] / len(all_draws)
print('Most likely groups')
display(ordered_groups.sort_values(by='count', ascending=False).head(10))

Most likely groups


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,probability
pool1,pool2,pool3,pool4,Unnamed: 4_level_1,Unnamed: 5_level_1
TSM,DRX,ROG,LGD,78336,0.236111
TES,FNC,GEN,TL,73848,0.222584
TES,DRX,ROG,TL,62664,0.188874
TSM,FNC,GEN,LGD,49104,0.148003
G2,DRX,FLY,LGD,48480,0.146123
DWG,FNC,FLY,LGD,48480,0.146123
TES,DRX,FLY,MAD,44352,0.133681
G2,SNG,GEN,TL,43536,0.131221
G2,JDG,GEN,TL,43536,0.131221
DWG,SNG,FLY,MAD,42144,0.127025


In [16]:
# least likely groups
print('Least likely groups')
display(ordered_groups.sort_values(by='count', ascending=True).head(10))

Least likely groups


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,probability
pool1,pool2,pool3,pool4,Unnamed: 4_level_1,Unnamed: 5_level_1
DWG,JDG,ROG,UOL,14184,0.042752
DWG,SNG,ROG,UOL,14184,0.042752
G2,SNG,MCX,UOL,14412,0.043439
G2,SNG,MCX,TL,14412,0.043439
G2,JDG,MCX,UOL,14412,0.043439
G2,JDG,MCX,TL,14412,0.043439
TSM,SNG,MCX,MAD,15408,0.046441
TSM,JDG,MCX,MAD,15408,0.046441
G2,SNG,GEN,UOL,16056,0.048394
G2,JDG,GEN,UOL,16056,0.048394


## Individual matchups

For a pair of two teams, what is the probability they meet?

In [18]:
def n_matchup(team1, team2, groups, proba=False):
    wteam1 = groups.index.get_level_values(team1.pool-1) == team1.name
    wteam2 = groups.index.get_level_values(team2.pool-1) == team2.name
    if proba:
        return float(groups[wteam1 & wteam2]['count'].sum() / groups.sum() * 4)
    else:
        return int(groups[wteam1 & wteam2]['count'].sum())

In [26]:
matchups_df = pd.DataFrame()
for team1 in allteams:
    for team2 in allteams:
        if team1 == team2:
            matchups_df.loc[team1.name, team2.name] = None
        else:
            matchups_df.loc[team1.name, team2.name] = 100*n_matchup(team1, team2, unordered_groups, proba=True)

pd.set_option('precision', 1)
print('Matchup percentage')
matchups_df.style.highlight_max(color='darkorange', axis=1)

Matchup percentage


Unnamed: 0,TES,G2,DWG,TSM,JDG,SNG,FNC,DRX,ROG,GEN,FLY,MCX,TL,LGD,MAD,UOL
TES,,0.0,0.0,0.0,0.0,0.0,43.3,56.7,33.3,23.3,23.3,20.0,43.3,0.0,23.3,33.3
G2,0.0,,0.0,0.0,38.3,38.3,0.0,23.3,0.0,36.7,30.0,33.3,36.7,23.3,0.0,40.0
DWG,0.0,0.0,,0.0,38.3,38.3,23.3,0.0,33.3,0.0,46.7,20.0,20.0,23.3,43.3,13.3
TSM,0.0,0.0,0.0,,23.3,23.3,33.3,20.0,33.3,40.0,0.0,26.7,0.0,53.3,33.3,13.3
JDG,0.0,38.3,38.3,23.3,,0.0,0.0,0.0,23.3,28.3,21.7,26.7,28.3,0.0,38.3,33.3
SNG,0.0,38.3,38.3,23.3,0.0,,0.0,0.0,23.3,28.3,21.7,26.7,28.3,0.0,38.3,33.3
FNC,43.3,0.0,23.3,33.3,0.0,0.0,,0.0,0.0,43.3,23.3,33.3,23.3,56.7,0.0,20.0
DRX,56.7,23.3,0.0,20.0,0.0,0.0,0.0,,53.3,0.0,33.3,13.3,20.0,43.3,23.3,13.3
ROG,33.3,0.0,33.3,33.3,23.3,23.3,0.0,53.3,,0.0,0.0,0.0,40.0,20.0,0.0,40.0
GEN,23.3,36.7,0.0,40.0,28.3,28.3,43.3,0.0,0.0,,0.0,0.0,40.0,20.0,20.0,20.0


In [28]:
teamnames = [i.name for i in allteams]
x, y = np.meshgrid(allteams, allteams)

matchups = np.zeros(len(x.ravel()))
for i, (team1, team2) in enumerate(zip(x.ravel(), y.ravel())):
    matchups[i] = n_matchup(team1, team2, ordered_groups) / len(all_draws)

source = pd.DataFrame({'x':[i.name for i in x.ravel()],
                       'y':[i.name for i in y.ravel()],
                       'Proba':matchups
                      })
alt.Chart(source).mark_rect().encode(
    x='x:O',
    y='y:O',
    color='Proba:Q'
)

# Luck of the draw

Given a subjective tier list, which team have the best chance to get lucky and have an easier ride to playoffs?


In [85]:
strength = {
    20:[TES],
    15:[DWG, G2, JDG],
    10:[FNC, SNG, DRX, GEN],
    7:[ROG, LGD, MAD, TSM],
    5:[FLY, TL],
    2:[MCX],
    0:[UOL, MCX]
}

# reverse it
strength = {i.name:j for j in strength.keys() for i in strength[j]}

for idx in ordered_groups.index:
    ordered_groups.loc[idx, 'group strengh'] = np.sum([strenght[i] for i in idx])

display(ordered_groups.sort_values(by='group strengh', ascending=True).head(5))

display(ordered_groups.sort_values(by='group strengh', ascending=False).head(5))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,group strengh
pool1,pool2,pool3,pool4,Unnamed: 4_level_1,Unnamed: 5_level_1
TSM,SNG,ROG,UOL,36072,24.0
TSM,FNC,MCX,LGD,29232,24.0
TSM,SNG,MCX,MAD,15408,24.0
G2,SNG,MCX,UOL,14412,25.0
TSM,JDG,ROG,UOL,36072,29.0


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,count,group strengh
pool1,pool2,pool3,pool4,Unnamed: 4_level_1,Unnamed: 5_level_1
TES,FNC,GEN,TL,73848,45.0
G2,JDG,GEN,TL,43536,45.0
TES,DRX,FLY,MAD,44352,42.0
DWG,JDG,FLY,MAD,42144,42.0
TES,DRX,ROG,TL,62664,42.0


In [90]:
avg_group_strenght = pd.DataFrame() 

for team in allteams:
    tmp = ordered_groups[ordered_groups.index.get_level_values(team.pool-1)==team.name]
    
    # calculate the strengh of remaining team
    # we weight the average with the probability of groups happening
    res = 0
    for idx, count in zip(tmp.index, tmp['count']):
        remaining_team_strength = np.sum([strength[x] for x in idx if team.name!=x])
        res += remaining_team_strength * count
    res /= tmp['count'].sum()

    avg_group_strenght.loc[team.name, 'strength of rest'] = res
avg_group_strenght.sort_values(by='strength of rest', ascending=True)

Unnamed: 0,strength of rest
TES,19.8
G2,20.8
DWG,21.8
JDG,22.7
SNG,22.7
TSM,23.6
LGD,25.8
FNC,26.0
DRX,26.6
ROG,27.5
