## Generate Seed Data

In [2]:
from faker import Faker
from random_object_id import generate as generate_random_mongo_id
import json
from pymongo import MongoClient

def generate_seed_data(user_count = 100, output_path='../mock/users.json', domain_name="pwc.com"):
    fake = Faker()
    user_list = []

    for i in range(user_count):
        user_id = generate_random_mongo_id()
        firstName = fake.first_name()
        lastName = fake.last_name()
        email = firstName.lower() + "." + lastName.lower() + "@" + domain_name
        user = { "_id": user_id, "firstName":firstName, "lastName":lastName, "email":email }
        user_list.append(user)

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(user_list, f)

    return user_list

def seed_user_data_to_mongodb(user_count):
    client = MongoClient("localhost", 23037)
    db = client.pwc
    users_collection = db.users
    users_collection.delete_many({})
    user_list = generate_seed_data(user_count)
    users_collection.insert_many(user_list)

In [3]:
seed_user_data_to_mongodb(20)

## Pairing Business Requirements

- 2 users should be paired in one group
- The same pair won't happen 

### Approach 1: Random Sampling

In [217]:
from random import sample

# group id_1, id_2 must be sorted
def already_grouped(group, historical_groups):
    for h_group in historical_groups:
        h_group_id_1 = h_group[0]
        h_group_id_2 = h_group[1]
        group_id_1 = group[0]
        group_id_2 = group[1]
        if group_id_1 == h_group_id_1 and group_id_2 == h_group_id_2:
            return True

def generate_groups(user_ids, historical_groups):
    groups = []
    local_user_ids = [x for x in user_ids]
    while True:
        current_group = sample(local_user_ids, 2)
        current_group.sort()

        # if any group already exist in historical pairs, restart all the whole round
        if already_grouped(current_group, historical_groups) == True:
            groups = []
            local_user_ids = [x for x in user_ids]
            continue

        groups.append(current_group)
        local_user_ids = list(set(local_user_ids).difference(set(current_group)))
        if len(local_user_ids) == 0:
            break
    for group in groups:
        historical_groups.append(group)
    return groups

def get_user_ids_from_mongodb():
    client = MongoClient("localhost", 23037)
    db = client.pwc
    users_collection = db.users

    user_ids = [user["_id"] for user in users_collection.find({})]
    
    return user_ids

def get_grouping_records_from_mongodb():
    client = MongoClient("localhost", 23037)
    db = client.pwc
    groupings_collection = db.groupings

    return [[grouping["id_1"], grouping["id_2"]] for grouping in groupings_collection.find({})]

def replace_grouping_collection_in_mongodb(groupings):
    client = MongoClient("localhost", 23037)
    db = client.pwc
    groupings_collection = db.groupings

    groupings_collection.delete_many({})

    documents_to_insert = []
    grouping_id = generate_random_mongo_id()
    for grouping in groupings:
        document = {"id_1": grouping[0], "id_2": grouping[1], "group_id": grouping_id}
        documents_to_insert.append(document)
    groupings_collection.insert_many(documents_to_insert)

def generate_grouping_once(reset_grouping):
    user_ids = get_user_ids_from_mongodb()
    user_ids.sort()

    # read from database
    historical_groups = get_grouping_records_from_mongodb() if reset_grouping == False else []

    current_grouping = generate_groups(user_ids, historical_groups)

    # update database
    replace_grouping_collection_in_mongodb(historical_groups)

    return current_grouping