# UEFA Champions League group stage draw simulator

This notebook allows to simulate the UEFA Champions League group stage draw procedure.

This notebook relies on a simple constraint solver to decide on the possible groups that each club can be drawn into.

1. Clubs from the same association need to be drawn into different groups;
2. Clubs that are paired based on TV audiences need to play on different days;
3. Clubs from given associations need to be drawn into different groups (e.g., Ukraine and Belarus).

If you spot any mistakes or have suggestions to improve the approach, then please [get in touch](https://twitter.com/JanVanHaaren) with me.

**References**
* [UEFA Champions League group stage draw pots confirmation](https://www.uefa.com/uefachampionsleague/news/0278-15f3603078f7-909c4310d18c-1000--champions-league-group-stage-draw-pots-confirmed/)
* [UEFA Champions League group stage draw](https://www.uefa.com/uefachampionsleague/draws/2023/2001673/)
* [UEFA Champions League regulations](https://documents.uefa.com/r/Regulations-of-the-UEFA-Champions-League-2022/23-Online)

In [None]:
in_colab = 'google.colab' in str(get_ipython())

In [None]:
if in_colab:
    !pip install numpy==1.21.6 pandas==1.3.5 python-constraint==1.4.0 tqdm

In [None]:
import random

from collections import defaultdict
from collections import namedtuple
from itertools import product

import numpy as np
import pandas as pd

from constraint import Problem
from constraint import AllDifferentConstraint
from constraint import FunctionConstraint
from tqdm.notebook import tqdm

In [None]:
random.seed(42)

In [None]:
np.set_printoptions(threshold=np.inf)

In [None]:
pd.options.display.min_rows = 32
pd.options.display.max_rows = 32
pd.options.display.max_columns = 32

# Provide participants and restrictions

In [None]:
Participant = namedtuple('Participant', ['club_name', 'country_name', 'draw_pot'])

In [None]:
participants = [
    Participant('Real Madrid', 'Spain', 1),  # Champions League winner
    Participant('Eintracht Frankfurt', 'Germany', 1),  # Europa League winner
    Participant('Manchester City', 'England', 1),
    Participant('Milan', 'Italy', 1),
    Participant('Bayern Munich', 'Germany', 1),
    Participant('Paris Saint-Germain', 'France', 1),
    Participant('Porto', 'Portugal', 1),
    Participant('Ajax', 'Netherlands', 1),

    Participant('Liverpool', 'England', 2),
    Participant('Chelsea', 'England', 2),
    Participant('Barcelona', 'Spain', 2),
    Participant('Juventus', 'Italy', 2),
    Participant('Atlético Madrid', 'Spain', 2),
    Participant('Sevilla', 'Spain', 2),
    Participant('RB Leipzig', 'Germany', 2),
    Participant('Tottenham Hotspur', 'England', 2),

    Participant('Borussia Dortmund', 'Germany', 3),
    Participant('Red Bull Salzburg', 'Austria', 3),
    Participant('Shakhtar Donetsk', 'Ukraine', 3),
    Participant('Inter Milan', 'Italy', 3),
    Participant('Napoli', 'Italy', 3),
    Participant('Benfica', 'Portugal', 3),
    Participant('Sporting CP', 'Portugal', 3),
    Participant('Bayer Leverkusen', 'Germany', 3),

    Participant('Rangers', 'Scotland', 4),
    Participant('Dinamo Zagreb', 'Croatia', 4),
    Participant('Marseille', 'France', 4),
    Participant('Copenhagen', 'Denmark', 4),
    Participant('Club Brugge', 'Belgium', 4),
    Participant('Celtic', 'Scotland', 4),
    Participant('Viktoria Plzeň', 'Czech Republic', 4),
    Participant('Maccabi Haifa', 'Israel', 4),
]

In [None]:
club_pairings = [
    
    # Spain
    ('Real Madrid', 'Barcelona'),
    ('Atlético Madrid', 'Sevilla'),

    # Germany
    ('Bayern Munich', 'Borussia Dortmund'),
    ('Eintracht Frankfurt', 'RB Leipzig'),

    # England
    ('Manchester City', 'Liverpool'),
    ('Chelsea', 'Tottenham Hotspur'),
    
    # Italy
    ('Milan', 'Napoli'),
    ('Juventus', 'Inter Milan'),

    # France
    ('Paris Saint-Germain', 'Marseille'),
    
    # Portugal
    ('Porto', 'Benfica'),
    
    # Scotland
    ('Rangers', 'Celtic'),
]

In [None]:
country_ban_pairings = [
    ('Ukraine', 'Belarus'),
    ('Ukraine', 'Russia'),
]

In [None]:
group_half_assignment = {
    'A': 1,
    'B': 1,
    'C': 1,
    'D': 1,
    'E': 2,
    'F': 2,
    'G': 2,
    'H': 2,
}

# Build auxiliary data structures

In [None]:
pot_clubs_mapping = defaultdict(list)  # dict[pot, list[club]]
club_pot_mapping = {}  # dict[club, pot]

country_clubs_mapping = defaultdict(list)  # dict[country, list[club]]
club_country_mapping = {}  # dict[club, country]

for participant in participants:
    pot_clubs_mapping[participant.draw_pot].append(participant.club_name)
    club_pot_mapping[participant.club_name] = participant.draw_pot
    
    country_clubs_mapping[participant.country_name].append(participant.club_name)
    club_country_mapping[participant.club_name] = participant.country_name

In [None]:
clubs = list(club_country_mapping.keys())
num_clubs = len(clubs)

In [None]:
country_ban_clubs = []
for country1, country2 in country_ban_pairings:
    country1_clubs = country_clubs_mapping.get(country1, [])
    country2_clubs = country_clubs_mapping.get(country2, [])
    for club1, club2 in product(country1_clubs, country2_clubs):
        country_ban_clubs.append((club1, club2))

In [None]:
groups = list(group_half_assignment.keys())

In [None]:
pots = list(pot_clubs_mapping.keys())

# Simulate one draw

Run the draw procedure once to obtain a possible allocation of clubs to groups.

In [None]:
def different_day(group1, group2):
    day1 = group_half_assignment.get(group1)
    day2 = group_half_assignment.get(group2)
    
    return day1 != day2

In [None]:
def produce_draw(assignment=None):
    """Produce a possible draw, optionally given a partial assignment of clubs to groups.
    
    The problem consists of 32 variables, each corresponding to one of the 32 clubs, that
    can take 8 possible values, each corresponding to one of the 8 possible groups.
 
    The task is to find an assignment of the 8 groups to the 32 clubs such that all constraints are satisfied. 

    Note: The problem can be defined in many different ways, each having advantages and disadvantages with
    respect to the ability to enforce the constraints and to break the inherent symmetries.
    
    """

    if not assignment:
        assignment = {}

    problem = Problem()

    # Set possible groups for each club. If available, use the given assignment. Otherwise, use all groups.
    for club in clubs:
        if club in assignment:
            problem.addVariable(club, assignment.get(club))
        else:
            problem.addVariable(club, groups)

    # Clubs in the same pot need to be drawn into different groups.
    for _, pot_clubs in pot_clubs_mapping.items():
        problem.addConstraint(AllDifferentConstraint(), pot_clubs)

    # Clubs from the same association need to be drawn into different groups.
    for country, country_clubs in country_clubs_mapping.items():
        if len(country_clubs) > 1:
            problem.addConstraint(AllDifferentConstraint(), country_clubs)

    # Clubs from associations that cannot face each other need to be drawn into different groups.
    for country_ban_club_pair in country_ban_clubs:
        problem.addConstraint(AllDifferentConstraint(), country_ban_club_pair)

    # Clubs that are paired based on TV audiences need to play on different days.
    # Groups A, B, C and D play on the same day. Groups E, F, G and H play on the same day.
    for club1, club2 in club_pairings:
        problem.addConstraint(FunctionConstraint(different_day), (club1, club2))

    return problem.getSolution()

In [None]:
# Store the hashes of partial assignments that can result in a valid draw
# to avoid solving the same constraint satisfaction problem multiple times.
valid_assignment_hashes = set()

In [None]:
def run_simulation(partial_draw=None):
    """Simulate the draw procedure to produce a draw that satisfies the constraints."""
    
    draw = {}
    if partial_draw:
        draw.update(partial_draw)
    
    for pot, pot_clubs in pot_clubs_mapping.items():
        pot_groups = groups.copy()

        # The pot_clubs list could be randomly shuffled to mimic the actual draw procedure.
        # However, a random shuffle should not affect the resulting probabilities.
        for pot_club in pot_clubs:
            if pot_club in draw:
                continue
            possible_groups = []

            for group in pot_groups:
                assignment = draw.copy()
                assignment[pot_club] = group

                assignment_hash = hash(frozenset(assignment.items()))
                if assignment_hash in valid_assignment_hashes:
                    possible_groups.append(group)
                else:
                    possible_draw = produce_draw(assignment)
                    if possible_draw:
                        possible_groups.append(group)
                        valid_assignment_hashes.add(assignment_hash)

            selected_group = random.choice(possible_groups)
            draw[pot_club] = selected_group
            pot_groups.remove(selected_group)

    return draw

In [None]:
draw = run_simulation()

In [None]:
# Uncomment the following line to display the draw
# draw

# Display draw result

In [None]:
def print_draw(draw):
    table = defaultdict(dict)
    for club, group in draw.items():
        pot = club_pot_mapping.get(club)
        country = club_country_mapping.get(club)
        table[group][pot] = (club, country)
        
    for group in groups:
        print('-----------------------------------')
        print(f'-- Group {group}')
        print('-----------------------------------')
        for pot in pots:
            club, country = table.get(group, {}).get(pot, ('None', 'None'))
            print(f'{pot}. {club} ({country})')
        print()

In [None]:
print_draw(draw)

# Simulate many draws

Run the draw procedure many times to obtain the probability for each pair of clubs to end up in the same group.

In [None]:
def initialize_grid():
    grid = np.zeros((num_clubs, num_clubs))
    
    return grid

In [None]:
def update_grid(grid, draw):
    for idx1, club1 in enumerate(clubs):
        group1 = draw[club1]
        for idx2, club2 in enumerate(clubs):
            group2 = draw[club2]
            if group1 == group2:
                grid[idx1][idx2] += 1

In [None]:
def run_simulations(num_simulations, partial_draw=None):
    grid = initialize_grid()
    for _ in tqdm(range(num_simulations)):
        draw = run_simulation(partial_draw)
        update_grid(grid, draw)
    grid /= num_simulations

    table = pd.DataFrame(
        data=grid,
        index=clubs,
        columns=clubs,
    )

    return table

In [None]:
num_simulations = 1_000

In [None]:
table = run_simulations(num_simulations)

In [None]:
# Uncomment the following line to display the probability table
# table

# Simulate many draws given current draw

Run the draw procedure many times to obtain the probability for each pair of clubs to end up in the same group, given a partial allocation of clubs to groups, for instance, during the actual draw.

In [None]:
def select_assigned_clubs(partial_draw):
    assigned = {
        club: group
        for club, group in partial_draw.items()
        if group in groups
    }

    return assigned

In [None]:
# Assign a group letter (A through H) to a club to allocate the club to the specified group.
current_draw = {
    'Real Madrid': 'A',
    'Eintracht Frankfurt': 'E',
    'Manchester City': 'B',
    'Milan': 'F',
    'Bayern Munich': 'C',
    'Paris Saint-Germain': 'G',
    'Porto': 'D',
    'Ajax': 'H',
    'Liverpool': 'H',
    'Chelsea': 'D',
    'Barcelona': 'G',
    'Juventus': 'C',
    'Atlético Madrid': 'F',
    'Sevilla': 'B',
    'RB Leipzig': 'A',
    'Tottenham Hotspur': 'E',
    'Borussia Dortmund': '',
    'Red Bull Salzburg': '',
    'Shakhtar Donetsk': '',
    'Inter Milan': '',
    'Napoli': '',
    'Benfica': '',
    'Sporting CP': '',
    'Bayer Leverkusen': '',
    'Rangers': '',
    'Dinamo Zagreb': '',
    'Marseille': '',
    'Copenhagen': '',
    'Club Brugge': '',
    'Celtic': '',
    'Viktoria Plzeň': '',
    'Maccabi Haifa': '', 
}

In [None]:
num_simulations = 1_000

In [None]:
partial_draw = select_assigned_clubs(current_draw)

In [None]:
table = run_simulations(
    num_simulations,
    partial_draw,
)

In [None]:
# Uncomment the following line to display the probability table
# table

# Analyze draw results

In [None]:
def filter_table(table, club_names, pots):
    if isinstance(pots, int):
        pots = [pots]

    clubs = []
    for pot in pots:
        pot_clubs = pot_clubs_mapping.get(pot)
        clubs.extend(pot_clubs)

    filtered_table = table.loc[clubs]
    
    if len(club_names) == 1:
        filtered_table.sort_values(
            by=club_names,
            ascending=False,
            inplace=True,
        )
    
    selected_table = filtered_table[club_names]

    return selected_table

In [None]:
club_names = [
    'Real Madrid',
    'Eintracht Frankfurt',
    'Manchester City',
    'Milan',
    'Bayern Munich',
    'Paris Saint-Germain',
    'Porto',
    'Ajax',
]

In [None]:
filter_table(
    table,
    club_names,
    pots=1,
)

In [None]:
filter_table(
    table,
    club_names,
    pots=2,
)

In [None]:
filter_table(
    table,
    club_names,
    pots=3,
)

In [None]:
filter_table(
    table,
    club_names,
    pots=4,
)

In [None]:
filter_table(
    table,
    club_names,
    pots=[3, 4],
)