# Grouping

Task: Evenly distribute a number of elements into a number of buckets

In [1]:
import numpy as np

In [2]:
s = np.arange(13)
s

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [3]:
l1 = len(s)
l1

13

Buckets should contain minmal l2 or maximal l2+1 elements

In [4]:
l2 = 2
l2

2

In [5]:
m, r = divmod(l1, l2)
m, r

(6, 1)

In [6]:
def maxBucketLength(index):
    m, r = divmod(l1, l2)
    return (l2+1 if index < r else l2)

In [7]:
maxBucketLengths = [ maxBucketLength(x) for x in range(m) ]
maxBucketLengths

[3, 2, 2, 2, 2, 2]

## by distribution

In [8]:
buckets = [[] for b in range(m)]
buckets

[[], [], [], [], [], []]

In [9]:
len(buckets)

6

In [10]:
len(buckets[2])

0

In [11]:
# clean buckets, otherwise code may run forever
buckets = [[] for b in range(m)]

for si in s:
    while True:
        bi = np.random.randint(len(buckets))
        if len(buckets[bi]) < maxBucketLength(bi):
            buckets[bi].append(si)
            break

buckets

[[0, 4, 5], [1, 3], [8, 11], [2, 7], [9, 12], [6, 10]]

## by Permutation

In [12]:
p = np.random.permutation(s)
p

array([ 6,  1,  0,  4,  7,  9,  5,  2, 11,  8, 12, 10,  3])

In [13]:
cuts = np.cumsum(maxBucketLengths)[:-1]
cuts

array([ 3,  5,  7,  9, 11], dtype=int32)

In [14]:
buckets2 = np.split(p, cuts)
buckets2

[array([6, 1, 0]),
 array([4, 7]),
 array([9, 5]),
 array([ 2, 11]),
 array([ 8, 12]),
 array([10,  3])]

## Reolving Attributes

In [15]:
from names import *

In [16]:
Names

[['Marius', 'B'],
 ['Jonatan', 'C'],
 ['Robin Benjamin', 'F'],
 ['Jascha', 'G'],
 ['Christian', 'J'],
 ['Lars', 'L'],
 ['Timo', 'N'],
 ['Nadine', 'N'],
 ['Julian', 'O'],
 ['Ruben Maximilian', 'S'],
 ['Ruben Rolf', 'S'],
 ['Yanick Julian', 'S'],
 ['Henning Alexander', 'U']]

In [17]:
Names[1]

['Jonatan', 'C']

In [18]:
buckets2

[array([6, 1, 0]),
 array([4, 7]),
 array([9, 5]),
 array([ 2, 11]),
 array([ 8, 12]),
 array([10,  3])]

In [20]:
for index, bucket in enumerate(buckets2):
    
    s = "Group " + str(index) + ": "
    
    for member in bucket:
        s += str(Names[member]) + ", "
    
    print(s)

Group 0: ['Timo', 'N'], ['Jonatan', 'C'], ['Marius', 'B'], 
Group 1: ['Christian', 'J'], ['Nadine', 'N'], 
Group 2: ['Ruben Maximilian', 'S'], ['Lars', 'L'], 
Group 3: ['Robin Benjamin', 'F'], ['Yanick Julian', 'S'], 
Group 4: ['Julian', 'O'], ['Henning Alexander', 'U'], 
Group 5: ['Ruben Rolf', 'S'], ['Jascha', 'G'], 
