# Stable Roommates Matching for Pair Research
This notebook analyzes the [Stable Roomates Matching](http://www.dcs.gla.ac.uk/~pat/jchoco/roommates/papers/Comp_sdarticle.pdf) algorithm with previous [Pair Research](http://pairresearch.io/). 

# Load in Libraries and Stable Roommates Matching Module

In [18]:
%load_ext autoreload
%autoreload 2

import os
import sys

module_path = os.path.abspath(os.path.join(".."))
if module_path not in sys.path:
    sys.path.append(module_path)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
from tqdm.notebook import tqdm
from dotenv import load_dotenv
import os

load_dotenv()
MONGO_URI = os.getenv("MONGO_URI")

In [20]:
# analysis
import multiprocessing as mp
import random
import math
import pandas as pd

from copy import deepcopy
from pymongo import MongoClient

import warnings

warnings.filterwarnings("ignore")

In [21]:
# plotting
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [22]:
# load stable roommates and pair research modules
from stable_roommates import verify_stability
from stable_roommates import create_preference_matrix
from stable_roommates import compute_matching_cardinality

from pair_research import create_matching_output

# Analysis of Stable Roommates Matching on Pair Research Data
Below, we analyze the impact of using the Stable Roommates algorithm on previous pairings. 

We begin by seeing
1. How frequently can we find stable matchings? 
2. When stable matchings are not possible, for what reason do they fail?

## Fetch Pairing Data from [pairresearch.io](http://pairresearch.io/)

In [5]:
dbName = "pair-research"
client = MongoClient(MONGO_URI)
db = client[dbName]
db.list_collection_names()

['pairings',
 'affinities_history',
 'objectlabs-system',
 'affinities',
 'tasks_history',
 'groups',
 'pairs_history',
 'users',
 'tasks',
 'objectlabs-system.admin.collections',
 'meteor_accounts_loginServiceConfiguration']

In [6]:
users = pd.DataFrame(list(db.users.find({})))

print("Number of Users: {}".format(len(users)))
users.head()

Number of Users: 1910


Unnamed: 0,_id,createdAt,services,emails,profile,groups
0,3EuAwtcy65oC2v4ox,2018-05-02 15:34:32.218,{'password': {}},[{'address': 'GaryBland2014@U.NORTHWESTERN.EDU...,{'fullName': 'GaryBland2014@U.NORTHWESTERN.EDU'},"[{'groupId': 'TjwobnBm8xX9BTipw', 'role': {'_i..."
1,3Y7RFcwRG4fyDGJBM,2018-05-02 15:35:12.612,{'password': {}},[{'address': 'EricaMinor2015@U.NORTHWESTERN.ED...,{'fullName': 'EricaMinor2015@U.NORTHWESTERN.EDU'},"[{'groupId': 'TjwobnBm8xX9BTipw', 'role': {'_i..."
2,2Lefr8WhP7DMFSbgY,2017-09-27 17:06:02.503,{'password': {'bcrypt': '$2a$10$djvQf78jjBEAGs...,[{'address': 'allisonlu2018@u.northwestern.edu...,"{'fullName': 'Allison Lu', 'avatar': 'http://o...","[{'groupId': 'sM3z5FkZfsABqcj3g', 'role': {'ti..."
3,2HxiToJbYpPzjp22y,2016-12-09 01:55:40.646,{'password': {'bcrypt': '$2a$10$n5KHTA./KfKgM....,"[{'address': 'soya@kaist.ac.kr', 'verified': T...","{'fullName': 'Soya Park', 'avatar': 'http://or...",[]
4,328Ai3RCXvYfF7DPj,2019-06-17 01:54:34.941,{'password': {'bcrypt': '$2a$10$TSKpb1QRQKTVfl...,"[{'address': 'nikhil07prakash@gmail.com', 'ver...","{'fullName': 'Nikhil Prakash', 'avatar': 'http...","[{'groupId': 'BibLRuKtNNv7QEDqb', 'role': {'ti..."


In [7]:
groups = pd.DataFrame(list(db.groups.find({})))

# remove testing groups
group_creator_ignore_list = [
    "Demo Admin",
    "ykykykykykykykykykyk",
    "Stella",
    "Kevin Northwestern",
    "Kevin Chen",
    "Leesha",
    "Jennie",
    "Kapil Garg",
]
group_ignore_ids = groups[groups["creatorName"].isin(group_creator_ignore_list)][
    "_id"
].unique()

# subset groups by id
groups_orig_size = len(groups)
groups_new_size = 0

groups = groups[~groups["_id"].isin(group_ignore_ids)]
groups.reset_index(drop=True, inplace=True)

# print change in size
groups_new_size = len(groups)
print(
    "Number of Groups \nOriginal size: {} --> New size: {}".format(
        groups_orig_size, groups_new_size
    )
)

# display task history
groups.head()

Number of Groups 
Original size: 847 --> New size: 183


Unnamed: 0,_id,groupName,description,creatorId,creatorName,creationDate,roles,members,active,activePairing
0,8s3BgEkMgkBTmGRJF,Future Everyday_beta,testing,YcemXAvHqLB54i5b9,Yu-Ting,2019-01-30 12:56:25.646,"[{'title': 'Professor', '_id': 'qaAohZLygP6Eod...","[{'fullName': 'Yu-Ting', 'userId': 'YcemXAvHqL...",True,
1,2uDCYsTKttn4zuEY9,POEM Lab,Research group at NCSU,XXHPSMLDh75odCZQp,Chris Martens,2018-01-23 18:58:35.624,"[{'title': 'Professor', '_id': '6zcLDqjRofjDY5...","[{'fullName': 'Chris Martens', 'userId': 'XXHP...",True,
2,NDbRpfE8cnXMfRzHc,AY18 MSC 538-0,Graduate Class: Workplace Learning & Communiti...,QKt8uNR9BvNSGH3FT,Amy Hauenstein,2018-06-15 19:07:12.679,"[{'title': 'Graduate Student', '_id': 'hMnM78Y...","[{'fullName': 'Amy Hauenstein', 'userId': 'QKt...",True,csrJ2ihdeMwAQdFXG
3,jYf6GwPb6FiJdnxfk,CROMA Lab,UMich CROMA Lab,HRb63v7L3bR4MRMbk,Stephanie O'Keefe,2017-06-28 15:48:48.594,"[{'title': 'Professor', '_id': 'Dd6ronS9fnNhEG...","[{'fullName': 'Stephanie O'Keefe', 'userId': '...",True,
4,g65pw3MM5TD3qCE8T,Slalom NYC D&A Mentorship Group,Slalom NYC D&A Mentorship Group,Rwebd6jSzjFPzFvSk,Saurabh Rane,2020-07-21 21:02:45.072,"[{'title': 'Professor', '_id': 'AGY3fmpCuFG3mk...","[{'fullName': 'Saurabh Rane', 'userId': 'Rwebd...",True,girC7kwP87tRtoNR5


In [8]:
tasks_history = pd.DataFrame(list(db.tasks_history.find({})))

# remove bad groups
tasks_history_orig_size = len(tasks_history)
tasks_history_new_size = 0

tasks_history = tasks_history[~tasks_history["groupId"].isin(group_ignore_ids)]
tasks_history.reset_index(drop=True, inplace=True)

# add group_pairing_id
tasks_history["group_pairing_id"] = (
    tasks_history["groupId"] + "-" + tasks_history["pairingId"]
)

# print change in size
tasks_history_new_size = len(tasks_history)
print(
    "Number of Tasks\nOriginal size: {} --> New size: {}".format(
        tasks_history_orig_size, tasks_history_new_size
    )
)

# display task history
tasks_history.head()

Number of Tasks
Original size: 8349 --> New size: 8302


Unnamed: 0,_id,name,userId,groupId,task,pairingId,group_pairing_id
0,2AYpCGrankHaLTuTN,Yongsung Kim,EDEFWcagLwCfXP5Jg,9mdkMmj4pY8Q2TwqF,i need to help with quarterly/yearly plan,Hje6thvxpudhfy6C5,9mdkMmj4pY8Q2TwqF-Hje6thvxpudhfy6C5
1,2Gi7xcv8NDzQRGJdC,Yongsung Kim,6vpLhKvhfxn9vKP2f,5QXWCwAFBrdbLYGar,I need to recruit a lot of people (30-40) who ...,QEvCZCAQeE4WJuhAE,5QXWCwAFBrdbLYGar-QEvCZCAQeE4WJuhAE
2,23NBmRrQCWcyLWtFE,Kristine Lu,xQ4mPiD4TX9MJqiqj,9mdkMmj4pY8Q2TwqF,Hold me accountable for writing another iterat...,QLv7Kd3NFjstGYQ25,9mdkMmj4pY8Q2TwqF-QLv7Kd3NFjstGYQ25
3,2Cj9cgw74Xbj4ffEn,Bomani McClendon,u6DrqFSvdZnWDgjz8,sM3z5FkZfsABqcj3g,Meteor Tech Help: How to deal with async getti...,9cSpnw4oMBgvCf6gz,sM3z5FkZfsABqcj3g-9cSpnw4oMBgvCf6gz
4,2NwuggEdhbPHyt9a2,Stephanie Jones,QiiDySM5ehXjLoNRL,KaYHdgPnyHQx84CPL,Life,KDAzB3MBJfXPZRTTA,KaYHdgPnyHQx84CPL-KDAzB3MBJfXPZRTTA


In [9]:
pairings = pd.DataFrame(list(db.pairings.find({})))

# remove bad groups
pairings_orig_size = len(pairings)
pairings_new_size = 0

pairings = pairings[~pairings["groupId"].isin(group_ignore_ids)]

# add group_pair id
pairings["group_pairing_id"] = pairings["groupId"] + "-" + pairings["_id"]
pairings.reset_index(drop=True, inplace=True)

# print change in size
pairings_new_size = len(pairings)
print(
    "Number of Pairing Sessions\nOriginal size: {} --> New size: {}".format(
        pairings_orig_size, pairings_new_size
    )
)

# display current pairings
print(
    "Pairing count: {}, Unique group count: {}".format(
        len(pairings), len(pairings.groupId.unique())
    )
)
pairings.sort_values("timestamp", ascending=False, inplace=True)
pairings.head()

Number of Pairing Sessions
Original size: 1148 --> New size: 1055
Pairing count: 1055, Unique group count: 101


Unnamed: 0,_id,groupId,pairings,timestamp,group_pairing_id
1054,jEFzQKHgftXeHhsfu,3HSsYKw5m2CrAZtFs,"[{'firstUserId': 'f5Q6ebhramRfx7GQR', 'firstUs...",2025-09-25 08:32:51.549,3HSsYKw5m2CrAZtFs-jEFzQKHgftXeHhsfu
1053,jMBRdqYugAc5Y7NbG,3HSsYKw5m2CrAZtFs,"[{'firstUserId': 'd9T3ZrcKMRvi6kqih', 'firstUs...",2025-09-18 09:39:22.754,3HSsYKw5m2CrAZtFs-jMBRdqYugAc5Y7NbG
1052,LrvQ6xQfRzvjxSy9F,9mdkMmj4pY8Q2TwqF,"[{'firstUserId': 'Tmp7YeTw5ohABj8uK', 'firstUs...",2025-01-28 03:29:44.408,9mdkMmj4pY8Q2TwqF-LrvQ6xQfRzvjxSy9F
1051,PFZGAFC2RL6X3Q7qQ,9mdkMmj4pY8Q2TwqF,"[{'firstUserId': 'z5uM74YSjK5KEXYGC', 'firstUs...",2025-01-28 03:29:42.907,9mdkMmj4pY8Q2TwqF-PFZGAFC2RL6X3Q7qQ
1050,GpkZoHZo6BrFuKSCg,zakxD2tAeS5JEFPKx,"[{'firstUserId': 't8KcY69e6ddKo7qRJ', 'firstUs...",2024-10-30 21:43:17.896,zakxD2tAeS5JEFPKx-GpkZoHZo6BrFuKSCg


In [10]:
pairs_history = pd.DataFrame(list(db.pairs_history.find({})))

# remove bad groups
pairs_history_orig_size = len(pairs_history)
pairs_history_new_size = 0

pairs_history = pairs_history[~pairs_history["groupId"].isin(group_ignore_ids)]

# add group_pairing_id column
pairs_history["group_pairing_id"] = (
    pairs_history["groupId"] + "-" + pairs_history["pairingId"]
)
pairs_history.reset_index(drop=True, inplace=True)

# print change in size
pairs_history_new_size = len(pairs_history)
print(
    "Number of Pairs\nOriginal size: {} --> New size: {}".format(
        pairs_history_orig_size, pairs_history_new_size
    )
)

# display current pairs_history
print(
    "Unique group count: {}, Unique pairing count: {}".format(
        len(pairs_history.groupId.unique()),
        len(pairs_history.group_pairing_id.unique()),
    )
)
pairs_history.sort_values("timestamp", ascending=False, inplace=True)
pairs_history.head()

Number of Pairs
Original size: 5319 --> New size: 5288
Unique group count: 101, Unique pairing count: 1055


Unnamed: 0,_id,groupId,pairingId,firstUserId,firstUserName,firstUserRole,secondUserId,secondUserName,secondUserRole,timestamp,group_pairing_id
5287,E22H5DgSJLMje4kgK,3HSsYKw5m2CrAZtFs,jEFzQKHgftXeHhsfu,d9T3ZrcKMRvi6kqih,Gus Umbelino,Post Doc,,,,2025-09-25 08:32:51.549,3HSsYKw5m2CrAZtFs-jEFzQKHgftXeHhsfu
5286,gDzBoHRZ2ZH9A7YtL,3HSsYKw5m2CrAZtFs,jEFzQKHgftXeHhsfu,f5Q6ebhramRfx7GQR,M. Piano,Graduate Student,8D2ax9LuSi39d3qyC,Irina Lehner,Graduate Student,2025-09-25 08:32:51.549,3HSsYKw5m2CrAZtFs-jEFzQKHgftXeHhsfu
5285,ZJnJ4y73xFNFKYAMz,3HSsYKw5m2CrAZtFs,jMBRdqYugAc5Y7NbG,d9T3ZrcKMRvi6kqih,Gus Umbelino,Post Doc,NuJFYbray3MW4jQoC,Stefan Kalberer,Graduate Student,2025-09-18 09:39:22.754,3HSsYKw5m2CrAZtFs-jMBRdqYugAc5Y7NbG
5284,KgEpi95JnzE29HxDe,9mdkMmj4pY8Q2TwqF,LrvQ6xQfRzvjxSy9F,Tmp7YeTw5ohABj8uK,Yinmiao Li,PhD Student,z5uM74YSjK5KEXYGC,Melissa Chen,PhD Student,2025-01-28 03:29:44.408,9mdkMmj4pY8Q2TwqF-LrvQ6xQfRzvjxSy9F
5283,RRbwvkSMAPwnAEAkn,9mdkMmj4pY8Q2TwqF,PFZGAFC2RL6X3Q7qQ,z5uM74YSjK5KEXYGC,Melissa Chen,PhD Student,Tmp7YeTw5ohABj8uK,Yinmiao Li,PhD Student,2025-01-28 03:29:42.907,9mdkMmj4pY8Q2TwqF-PFZGAFC2RL6X3Q7qQ


In [11]:
tasks = pd.DataFrame(list(db.tasks.find({})))

# remove bad groups
tasks_orig_size = len(tasks)
tasks_new_size = 0

tasks = tasks[~tasks["groupId"].isin(group_ignore_ids)]
tasks.reset_index(drop=True, inplace=True)

# print change in size
tasks_new_size = len(tasks)
print(
    "Number of Tasks\nOriginal size: {} --> New size: {}".format(
        tasks_orig_size, tasks_new_size
    )
)

# display current tasks
tasks.head()

Number of Tasks
Original size: 1638 --> New size: 1439


Unnamed: 0,_id,name,userId,groupId,task
0,2QsjYvvBF59JvBq3y,Haoqi,P7EHknAnmFM3c5iXC,awCE87hpPLm3Hpyvq,Abc
1,6SWWJDq2Ku4YCx2d4,moliri,5QiYq9AN8CEPgWMkp,9bBK7o9t2nAqNjc4W,hihi
2,6gBn9ej23BMBgrWSb,Kevin,pApKBgCknH8dspddZ,qe3ip9Zspigz8mWve,thing!
3,MDCMw8XTCb8DTk64H,Luke S Murray,ZFCNCYSqfXtGigZjZ,XcEPz3nJaEfHGhn6x,review my incomplete related works
4,Nszt5do4pqafRLSCZ,David Karger,W8RgxxQM5k2kKLNZD,Ywbg3AeBPfzbAB74z,


In [12]:
affinities = pd.DataFrame(list(db.affinities.find({})))

# remove bad groups
affinities_orig_size = len(affinities)
affinities_new_size = 0

affinities = affinities[~affinities["groupId"].isin(group_ignore_ids)]
affinities.reset_index(drop=True, inplace=True)

# print change in size
affinities_new_size = len(affinities)
print(
    "Number of Current Affinities\nOriginal size: {} --> New size: {}".format(
        affinities_orig_size, affinities_new_size
    )
)

# display current affinities
affinities.head()

Number of Current Affinities
Original size: 5178 --> New size: 5135


Unnamed: 0,_id,helperId,helpeeId,groupId,value
0,2ftfXQtMgAAiczLJZ,PavTL8zD9664wvtfB,y5D7YfEuTMh8WaFzx,qPnf2DHHihugATnxD,1.0
1,34CtrtqTv6MQ8vxaF,p9MttJyv3nigGjYqx,ZiE5NmwE3Q2LmiKmS,LXsBj4mnCKbkYJmAX,2.0
2,2rkCWfsxNvzauqLx9,PkuiEYaGwChXy3LL6,MEsDmc4AeaAQ44RQL,NDbRpfE8cnXMfRzHc,-1.0
3,245JFYNd6XNwk5nbd,acoFQ6xXo29zBPcPp,kMEKB25ouoW8nM5JP,u4kjJC55DPMLpR8bC,0.33
4,2mjBwESgcvCcLZKb9,BdMQjxPpYw7ELEf3B,Jfk5kDRyPjnLaWpjD,ko6giLvq9cE842rHY,5.0


In [13]:
affinities_history = pd.DataFrame(list(db.affinities_history.find({})))

# remove bad groups
affinities_history_orig_size = len(affinities_history)
affinities_history_new_size = 0

affinities_history = affinities_history[
    ~affinities_history["groupId"].isin(group_ignore_ids)
]

# add group_pairing_id column
affinities_history["group_pairing_id"] = (
    affinities_history["groupId"] + "-" + affinities_history["pairingId"]
)

# remove duplicate ratings
affinities_history.sort_values(
    ["group_pairing_id", "helpeeId", "helperId"], inplace=True
)
affinities_history.drop_duplicates(
    subset=["group_pairing_id", "helpeeId", "helperId"], keep="last", inplace=True
)
affinities_history.reset_index(drop=True, inplace=True)

# print change in size
affinities_history_new_size = len(affinities_history)
print(
    "Number of Past Affinities\nOriginal size: {} --> New size: {}".format(
        affinities_history_orig_size, affinities_history_new_size
    )
)

# display affinity data
print(
    "Unique Group Pairings: {}".format(
        len(affinities_history.group_pairing_id.unique())
    )
)
affinities_history.head()

Number of Past Affinities
Original size: 89249 --> New size: 88612
Unique Group Pairings: 1029


Unnamed: 0,_id,helperId,helpeeId,groupId,value,pairingId,group_pairing_id
0,t29KL7evKrKmAc68s,SRrbNxcTrirkSbowD,2y7tESxwQFwro8Jca,27syMcotb279YaP2u,-1.0,3G6iXFybGhQdsjDct,27syMcotb279YaP2u-3G6iXFybGhQdsjDct
1,gER5DYezCoLibfHdz,sanGBbAp7tXpTMfHN,2y7tESxwQFwro8Jca,27syMcotb279YaP2u,0.33,3G6iXFybGhQdsjDct,27syMcotb279YaP2u-3G6iXFybGhQdsjDct
2,aThjZHs8Y8zqkuDab,2y7tESxwQFwro8Jca,66PYzPiXdpro84gaR,27syMcotb279YaP2u,0.33,3G6iXFybGhQdsjDct,27syMcotb279YaP2u-3G6iXFybGhQdsjDct
3,uJBpNF6f2Tn8a5e9Y,SRrbNxcTrirkSbowD,66PYzPiXdpro84gaR,27syMcotb279YaP2u,-1.0,3G6iXFybGhQdsjDct,27syMcotb279YaP2u-3G6iXFybGhQdsjDct
4,Dr7Gh7q2tj4MhHmdF,sanGBbAp7tXpTMfHN,66PYzPiXdpro84gaR,27syMcotb279YaP2u,-1.0,3G6iXFybGhQdsjDct,27syMcotb279YaP2u-3G6iXFybGhQdsjDct


## Run Stable Matching with All Previous Pairs

### Construction helper functions

In [14]:
def create_affinity_matrix(input_affinities, tasks):
    """
    Creates an n^2 affinity matrix.

    Input:
        input_affinities (pandas DataFrame): dataframe with helpeeId, helperId, and value columns.
        tasks (pandas DataFrame): current tasks for pairing. used to create superset of users in event some don't rate any others.

    Output:
        (list of list of numbers): matrix of affinities. 0 if no affinity between users.
        (dict): dict where keys are numbers and values are userIds mapping matrix index to users.
    """
    # dont modify original dataframe
    affinities = deepcopy(input_affinities)
    affinities.drop_duplicates(keep="last", inplace=True)

    # create user superset and user:index mapping
    user_superset = list(
        set(
            affinities["helperId"].tolist()
            + affinities["helpeeId"].tolist()
            + tasks["userId"].tolist()
        )
    )
    user_count = len(user_superset)
    user_index_dict = {user_superset[x]: x for x in range(user_count)}

    # create empty n^2 matrix
    affinity_matrix = [[0 for y in range(user_count)] for x in range(user_count)]

    # loop through data and populate matrix
    for index, row in affinities.iterrows():
        curr_helper_index = user_index_dict[row["helperId"]]
        curr_helpee_index = user_index_dict[row["helpeeId"]]
        curr_value = row["value"]

        affinity_matrix[curr_helper_index][curr_helpee_index] = curr_value

    # flip user and index in dict
    index_user_dict = {str(v): k for (k, v) in user_index_dict.items()}

    return affinity_matrix, index_user_dict


def create_pairing_dict(user_index_dict, pairing):
    """
    Creates a pairing dictionary based on previous pairing, ignoring unmatched users.

    Input:
        user_index_dict (dict): mapping from users in data to indices (1-indexed)
        pairing (list): list of pairings

    Output:
        (dict): dict containing bi-directional pairings from the pairing list with keys and values being numbers
            ex. { '1': '2', '2': '1'}
    """
    pairing_dict = {}
    for pairs in pairing:
        # only include cases where users are matched to one another (i.e. ignore odd people paired with -1)
        if "secondUserId" in pairs:
            # bi-directional representation in dictionary
            pairing_dict[user_index_dict[pairs["firstUserId"]]] = user_index_dict[
                pairs["secondUserId"]
            ]
            pairing_dict[user_index_dict[pairs["secondUserId"]]] = user_index_dict[
                pairs["firstUserId"]
            ]

    return pairing_dict


def get_recent_pairings(group_pair_id, limit):
    """
    Retrieve the most recent pairings for a group_id, up to limit, before pairing instance is run
        and return as a dictionary.

    Input:
        group_pair_id (string): group-pairing instance to get recent pairing data for.
        limit (number): number of most recent pairings to get.

    Output:
        (dict): dict mapping, bi-directionally, each pairing
    """
    group_id = group_pair_id.split("-")[0]

    # get timestamp of current pairing instance
    curr_timestamp = pairings[pairings.group_pairing_id == group_pair_id].iat[
        0, pairings.columns.get_loc("timestamp")
    ]

    # get pairs for group_id that occurred before group_pair_id did and also ignore group_pair_id
    relevant_pairings = pairings[
        (pairings["groupId"] == group_id)
        & (pairings["timestamp"] <= curr_timestamp)
        & (pairings["group_pairing_id"] != group_pair_id)
    ]
    pairing_instance_list = relevant_pairings.sort_values("timestamp", ascending=False)[
        0:limit
    ]["pairings"].tolist()
    output_list = []

    # create dictionaries and add to output
    for pairing_instance in pairing_instance_list:
        pairing_dict = {}
        for pairing in pairing_instance:
            # check if the user is paired with someone
            if "secondUserId" in pairing:
                # bi-directional representation in dictionary
                pairing_dict[pairing["firstUserId"]] = pairing["secondUserId"]
                pairing_dict[pairing["secondUserId"]] = pairing["firstUserId"]
            else:
                pairing_dict[pairing["firstUserId"]] = ""

        # add to output list
        output_list.append(pairing_dict)

    return output_list


def create_directed_graph(affinity_matrix, index_user_mapping, recent_pairings):
    """
    Converts an affinity matrix into a weighted matrix that represented a directed graph.
        Weight is calculated based on previous recent pairings and some random perturbation.

    Input:
        affinity_matrix (list of list of numbers): matrix of affinities. 0 if no affinity between users.
        index_user_mapping (dict): dict where keys are numbers and values are userIds mapping matrix index to users.
        recent_pairings (list of dict): up to 3 pairing sessions, ordered by recency,
            with each dict containing helper-helpee pairs

    Output:
        (list of list of numbers): weighted matrix
    """
    # dont modify original dataframe
    directed_graph = deepcopy(affinity_matrix)

    # iterate over each element and compute weighted value
    matrix_iterator = range(len(affinity_matrix))
    for row in matrix_iterator:
        for col in matrix_iterator:
            # ignore diagonal
            if row == col:
                continue

            # scale weight to be between -100 to 100
            weight = 1.0 + 99.0 * affinity_matrix[row][col]

            # Penalize recent pairings by increasing weight of pairs that have NOT occurred recently for last 3 pairings
            # ex. If A and B have not paired last time, increase their weight by 80 * 0.5^1
            # ex. If they also didn't pair time before, further increase their weight by 80 * 0.5^2 and so on (up to 3)
            # only give extra weight if rating is not -1
            if affinity_matrix[row][col] != -1:
                for index, pairing in enumerate(recent_pairings):
                    helper = index_user_mapping[str(row)]
                    helpee = index_user_mapping[str(col)]

                    # helper-helpee pairing does not exist in the current pairing
                    if helper in pairing and pairing[helper] != helpee:
                        weight += 80.0 * (0.5 ** (index + 1))

            # add some random perturbation, between 0-20, to guarentee strict ordering
            weight += random.random() * 20.0

            # store new edge weight
            directed_graph[row][col] = math.floor(weight)

    return directed_graph


def create_undirected_graph(affinity_matrix, index_user_mapping, recent_pairings):
    """
    Converts an affinity matrix into a weighted matrix that represented a directed graph.
        Weight is calculated based on previous recent pairings and some random perturbation.

    Input:
        affinity_matrix (list of list of numbers): matrix of affinities. 0 if no affinity between users.
        index_user_mapping (dict): dict where keys are numbers and values are userIds mapping matrix index to users.
        recent_pairings (list of dict): up to 3 pairing sessions, ordered by recency,
            with each dict containing helper-helpee pairs

    Output:
        (list of list of numbers): weighted matrix
    """
    # dont modify original dataframe
    undirected_graph = []

    # iterate over each element and compute weighted value
    matrix_iterator = range(len(affinity_matrix))
    for row in matrix_iterator:
        for col in matrix_iterator:
            # ignore diagonal and lower triangular portion of matrix
            if col <= row:
                continue

            # ignore edge if either has rated each other a -1
            if affinity_matrix[row][col] != -1 and affinity_matrix[col][row] != -1:
                # scale weight to be between -100 to 100. average the edges between two users.
                weight = (
                    1.0
                    + 99.0
                    * (affinity_matrix[row][col] + affinity_matrix[col][row])
                    / 2.0
                )

                # Penalize recent pairings by increasing weight of pairs that have NOT occurred recently for last 3 pairings
                # ex. If A and B have not paired last time, increase their weight by 80 * 0.5^1
                # ex. If they also didn't pair time before, further increase their weight by 80 * 0.5^2 and so on (up to 3)
                for index, pairing in enumerate(recent_pairings):
                    helper = index_user_mapping[str(row)]
                    helpee = index_user_mapping[str(col)]

                    # helper-helpee pairing does not exist in the current pairing
                    if helper in pairing and pairing[helper] != helpee:
                        weight += 80.0 * 0.5 ** (index + 1)

                # add some random perturbation, between 0-20, to guarentee strict ordering
                weight += random.random() * 20

                # store new edge weight
                undirected_graph.append([row, col, math.floor(weight)])

    return undirected_graph


def compute_mwm_stability(group_pair_id, user_index_dict, preference_matrix):
    """
    Computes the stability of a previous MWM matching, given a preference_matrix.

    Input:
        group_pair_id (string): pairing to determine stability for.
        user_index_dict (dict): mapping of users to index where indices are 1-indexed strings.
        preferences (matrix, list of lists of numbers): n-by-m preference matrix containing preferences for each person.
            m = n - 1, so each person has rated all other people.
            Each row is a 1-indexed ordered ranking of others in the pool.
            Therefore max(preferences[person]) <= number people and min(preferences[person]) = 1.

    Output:
        (boolean): whether MWM matching was stable. None if cannot determine.
    """
    # create a preference lookup table
    # person_number : [list of preferences]
    curr_pref_dict = {
        str(x + 1): [str(y) for y in preference_matrix[x]]
        for x in range(len(preference_matrix))
    }

    # create a dict of dicts holding index of each person ranked
    # person number : {person : rank_index }
    curr_ranks = {
        index: dict(zip(value, range(len(value))))
        for (index, value) in curr_pref_dict.items()
    }

    # attempt to create pairing dict and determine stability
    try:
        # create pairing dict
        curr_pairings = pairings[pairings["group_pairing_id"] == group_pair_id][
            "pairings"
        ].tolist()[0]
        curr_pairing_dict = create_pairing_dict(user_index_dict, curr_pairings)

        # determine and return stability
        return verify_stability(curr_pairing_dict, curr_ranks)
    except KeyError:
        # matching could not be computed since some data is missing
        return None


def get_previous_matching(group_pair_id):
    """
    Fetches and returns a previous matching.

    Input:
        group_pair_id (string): pairing to fetch
        user_index_dict (dict): mapping of users to index where indices are 1-indexed strings.

    Output:
        (dict): dict containing bi-directional pairings from the pairing list with keys and values being user ids
                ex. { 'user-id-a': 'user-id-b', 'user-id-b': 'user-id-a'}
    """
    # get pairings for matching session
    curr_pairings = pairings[pairings["group_pairing_id"] == group_pair_id][
        "pairings"
    ].tolist()[0]

    # create and return output dict
    output_dict = {}
    for curr_pairing in curr_pairings:
        if "secondUserId" in curr_pairing:
            output_dict[curr_pairing["firstUserId"]] = curr_pairing["secondUserId"]
            output_dict[curr_pairing["secondUserId"]] = curr_pairing["firstUserId"]
        else:
            output_dict[curr_pairing["firstUserId"]] = ""

    return output_dict


def create_userid_matching_dict(matching, index_user_mapping):
    """
    Converts a index_user_mapping ({'1': '2', '2': '1'}) into a user_id mapping ({'user-id-a': 'user-id-b', 'user-id-b': 'user-id-a'}),
        given a matching.

    Input:
        matching (list): list of indicces
        index_user_mapping (dict): dict where keys are numbers and values are userIds mapping matrix index to users.

    Output:
        (dict): user id matching dict ({'user-id-a': 'user-id-b', 'user-id-b': 'user-id-a'})
    """
    output_dict = {}

    for index, value in enumerate(matching):
        if value == -1:
            output_dict[index_user_mapping[str(index)]] = ""
        else:
            output_dict[index_user_mapping[str(index)]] = index_user_mapping[str(value)]

    return output_dict


def get_affinities_for_matching(input_affinities, user_id_matching_dict, remap=True):
    """
    Creates a dictionary mapping user id to affinity value under a matching.

    Input:
        input_affinities (pandas DataFrame): dataframe with helpeeId, helperId, and value columns.
        user_id_matching_dict (dict): matching where keys and values are user ids ({'user-id-a': 'user-id-b', 'user-id-b': 'user-id-a'})
        remap (boolean): optional whether affinity values should be remapped to interface values.

    Output:
        (dict): keys are all user id with the affinity they reported for.
            ex. {'user-id-a': 1, 'user-id-b': 0.33}
    """
    # dont modify original dataframe
    affinities = deepcopy(input_affinities)
    affinities.drop_duplicates(keep="last", inplace=True)

    # remap data values to UI values
    if remap:
        value_mappings = {"-1.0": 1, "0.0": 2, "0.33": 3, "0.66": 4, "1.0": 5}
        affinities["value"] = affinities["value"].astype(str)
        affinities.replace({"value": value_mappings}, inplace=True)

    # create output dict
    matching_affinity_dict = {}
    for helper, match in user_id_matching_dict.items():
        # ignore unmatched
        if match == "":
            continue

        # get current affinity for helper with a match
        curr_affinity = affinities[
            (affinities["helperId"] == helper) & (affinities["helpeeId"] == match)
        ]["value"].tolist()
        if len(curr_affinity) > 0:
            matching_affinity_dict[helper] = curr_affinity[0]
        else:
            matching_affinity_dict[helper] = (
                0  # TODO: what should you put for no affinity
            )

    return matching_affinity_dict


# def compute_mwm_stability(mwm_matching, preference_matrix):
#     """
#     Computes the stability of a MWM matching, given the matching and a preference_matrix.

#     Input:
#         mwm_matching (list): list of numbers indicating matching.
#         preferences (matrix, list of lists of numbers): n-by-m preference matrix containing preferences for each person.
#             m = n - 1, so each person has rated all other people.
#             Each row is a 1-indexed ordered ranking of others in the pool.
#             Therefore max(preferences[person]) <= number people and min(preferences[person]) = 1.

#     Output:
#         (boolean): whether MWM matching was stable. None if cannot determine.
#     """
#     # create a preference lookup table
#     # person_number : [list of preferences]
#     curr_pref_dict = {
#         str(x + 1): [str(y) for y in preference_matrix[x]] for x in range(len(preference_matrix))
#     }

#     # create a dict of dicts holding index of each person ranked
#     # person number : {person : rank_index }
#     curr_ranks = {index: dict(zip(value, range(len(value)))) for (index, value) in curr_pref_dict.items()}

#     # create 1-indexed pairing dict without unmatched pairings
#     curr_pairing_dict = {str(index): str(value) for index, value in enumerate(temp_matching) if value != -1}

#     # determine and return stability
#     try:
#         return verify_stability(curr_pairing_dict, curr_ranks)
#     except KeyError:
#     # stability could not be computed since some data is missing
#         return None

# Compute matching helper functions

In [15]:
def sr_matching_pair_research(
    group_pair_id, handle_odd_method="remove", remove_all=True
):
    """
    Runs stable matching on pair research data, given a group_pair_id to run matching for.

    Input:
        group_pair_id (string): group pairing to run matching on
        handle_odd_method (string): handling odd cases by either adding ('add') or removing ('remove') user
        remove_all (boolean): whether to try again if randomly removing a person fails

    Output:
        (dict): output of matching, along with matching metadata
    """
    # create affinity matrix and index-user dict
    curr_affinities = deepcopy(
        affinities_history[affinities_history["group_pairing_id"] == group_pair_id]
    )
    curr_tasks = deepcopy(
        tasks_history[tasks_history["group_pairing_id"] == group_pair_id]
    )
    curr_affinity_matrix, curr_index_user_mapping = create_affinity_matrix(
        curr_affinities[["helperId", "helpeeId", "value"]], curr_tasks
    )

    # transform index-user dict into user-index dict where indices are 1-indexed
    curr_user_index_dict = {
        str(v): str(int(k) + 1) for (k, v) in curr_index_user_mapping.items()
    }

    # get recent pairings and create input graphs
    curr_recent_pairings = get_recent_pairings(group_pair_id, 3)

    curr_directed_graph = create_directed_graph(
        curr_affinity_matrix, curr_index_user_mapping, curr_recent_pairings
    )
    curr_undirected_graph = create_undirected_graph(
        curr_affinity_matrix, curr_index_user_mapping, curr_recent_pairings
    )

    # create current preference matrix given the directed graph
    curr_pref_matrix = create_preference_matrix(curr_directed_graph)

    # run pair research matching algorithm
    matching_output = create_matching_output(
        {
            "directed_graph": curr_directed_graph,
            "undirected_graph": curr_undirected_graph,
        },
        handle_odd_method=handle_odd_method,
        remove_all=remove_all,
        debug=True,
    )

    # create user id matching dict with affinities
    mwm_userid_matching_dict = get_previous_matching(
        group_pair_id
    )  # TODO: use matching output
    mwm_affinities = get_affinities_for_matching(
        curr_affinities[["helperId", "helpeeId", "value"]], mwm_userid_matching_dict
    )

    sr_userid_matching_dict = create_userid_matching_dict(
        matching_output["stable_result"], curr_index_user_mapping
    )
    sr_mwm_userid_matching_dict = create_userid_matching_dict(
        matching_output["matching"], curr_index_user_mapping
    )

    # create affinity dictionaries
    sr_affinities = get_affinities_for_matching(
        curr_affinities[["helperId", "helpeeId", "value"]], sr_userid_matching_dict
    )
    sr_mwm_affinities = get_affinities_for_matching(
        curr_affinities[["helperId", "helpeeId", "value"]], sr_mwm_userid_matching_dict
    )

    # determine stability of MWM matching
    # TODO: use created MWM matching to determine if stable
    mwm_stability = compute_mwm_stability(
        group_pair_id, curr_user_index_dict, curr_pref_matrix
    )
    #     mwm_stability = compute_mwm_stability(matching_output['mwm_result_full'], curr_pref_matrix)

    # create metadata about the current affinity and add data to pairing_data
    group_id, pairing_id = group_pair_id.split("-")
    user_count = len(curr_affinity_matrix)
    curr_timestamp = pairs_history[pairs_history.group_pairing_id == group_pair_id].iat[
        0, pairs_history.columns.get_loc("timestamp")
    ]

    mwm_stable_text = "NA"
    if mwm_stability is not None:
        mwm_stable_text = "stable" if mwm_stability else "unstable"

    # create and return matching data
    matching_data = {
        "group_pair_id": group_pair_id,
        "group_id": group_id,
        "pairing_id": pairing_id,
        "timestamp": curr_timestamp,
        "user_count": user_count,
        "user_parity": "even" if user_count % 2 == 0 else "odd",
        "odd_handling": handle_odd_method,
        "final_matching": matching_output["matching"],
        "final_affinities": sr_mwm_affinities,  # TODO
        "sr_result": matching_output["stable_result"],
        "sr_affinities": sr_affinities,  # TODO
        "sr_stability": "stable" if matching_output["fully_stable"] else "unstable",
        "sr_debug": matching_output["stable_debug"],
        "sr_cardinality": compute_matching_cardinality(
            matching_output["stable_result"]
        ),
        "mwm_result": matching_output["mwm_result_full"],
        "mwm_affinities": mwm_affinities,  # TODO
        "mwm_stability": mwm_stable_text,
        "affinity_matrix": curr_affinity_matrix,
        "directed_graph": curr_directed_graph,
        "undirected_graph": curr_undirected_graph,
        "preference_matrix": curr_pref_matrix,
        "index_user_mapping": curr_index_user_mapping,
    }
    return matching_data


def sr_matching_pair_research_wrapper(exec_dicts):
    """
    Wrapper for sr_matching_pair_research that allows for changing optional parameters.

    Input:
        exec_dicts (list of dicts): contains group_pair_id, handle_odd_method, and remove_all

    Output:
        (dict): output of matching, along with matching metadata
    """
    return sr_matching_pair_research(
        exec_dicts["group_pair_id"],
        exec_dicts["handle_odd_method"],
        exec_dicts["remove_all"],
    )


def execute_sr_matching(
    group_pairing_ids, handle_odd_method="remove", remove_all=True, parallel=False
):
    """
    Wrapper for computing pair research matchings that calls sr_matching_pair_research_wrapper.

    Input:
        group_pairing_ids (list of string): unique group pairing ids to conduct matching on.
        remove_all (boolean): whether to try again if randomly removing a person fails
        parallel (boolean): run matching in parallel across all group_pairing_ids

    Output:
        (DataFrame): matchings computed for pair research data
    """
    pairing_data = []
    exec_dicts = [
        {
            "group_pair_id": group_pair_id,
            "handle_odd_method": handle_odd_method,
            "remove_all": remove_all,
        }
        for group_pair_id in group_pairing_ids
    ]

    # compute pairings
    if parallel:
        pool = mp.Pool(processes=mp.cpu_count())
        pairing_data = pool.map(sr_matching_pair_research_wrapper, exec_dicts)
        pool.close()
        pool.join()
    else:
        pairing_data = []
        for exec_dict in tqdm(exec_dicts):
            pairing_data.append(sr_matching_pair_research_wrapper(exec_dict))
        # pairing_data = [sr_matching_pair_research_wrapper(exec_dict) for exec_dict in exec_dicts]

    return pd.DataFrame(pairing_data)

### Remove One User Only

In [16]:
# get all pairing instances
group_pairing_ids = affinities_history.group_pairing_id.unique()

# compute pairings and create DataFrame of results
remove_one_pairings_df = execute_sr_matching(
    group_pairing_ids, handle_odd_method="remove", remove_all=False, parallel=False
)

# print stable matching results
sr_stable_count = len(
    remove_one_pairings_df[remove_one_pairings_df["sr_stability"] == "stable"]
)
sr_unstable_count = len(
    remove_one_pairings_df[remove_one_pairings_df["sr_stability"] == "unstable"]
)
sr_total = sr_stable_count + sr_unstable_count

output_string = "Stable Roommates Matching Results\nStable: {} ({:1.2f}%)\nUnstable: {} ({:1.2f}%)\nTotal: {} (100.00%)\n\n"
print(
    output_string.format(
        sr_stable_count,
        100 * sr_stable_count / sr_total,
        sr_unstable_count,
        100 * sr_unstable_count / sr_total,
        sr_total,
    )
)

# print mwm results
mwm_stable_count = len(
    remove_one_pairings_df[remove_one_pairings_df["mwm_stability"] == "stable"]
)
mwm_unstable_count = len(
    remove_one_pairings_df[remove_one_pairings_df["mwm_stability"] == "unstable"]
)
mwm_none_count = len(
    remove_one_pairings_df[remove_one_pairings_df["mwm_stability"] == "NA"]
)
mwm_total = mwm_stable_count + mwm_unstable_count + mwm_none_count

output_string = "Maximum Weighted Matching Results\nStable: {} ({:1.2f}%)\nUnstable: {} ({:1.2f}%)\nNA (could not determine stability): {} ({:1.2f}%)\nTotal: {} (100.00%)"
print(
    output_string.format(
        mwm_stable_count,
        100 * mwm_stable_count / mwm_total,
        mwm_unstable_count,
        100 * mwm_unstable_count / mwm_total,
        mwm_none_count,
        100 * mwm_none_count / mwm_total,
        mwm_total,
    )
)
remove_one_pairings_df.head()

  0%|          | 0/1029 [00:00<?, ?it/s]

Combined matching not valid. [2, 4, 0, -1, 1, -1, 7, 6, -1]
Combined matching not valid. [8, -1, 7, 4, 3, 9, -1, 2, 0, 5]
Combined matching not valid. [1, 0, 7, 5, -1, 3, -1, 2]
Combined matching not valid. [4, 9, -1, 11, 0, 6, 5, 10, -1, 1, 7, 3]
Combined matching not valid. [5, 3, -1, 1, -1, 0]
Combined matching not valid. [6, 4, 9, -1, 1, 10, 0, -1, -1, 2, 5]
Combined matching not valid. [-1, -1, 6, 4, 3, 9, 2, 8, 7, 5]
Combined matching not valid. [7, 3, 4, 1, 2, -1, -1, 0]
Combined matching not valid. [2, -1, 0, 8, 6, -1, 4, 9, 3, 7]
Combined matching not valid. [2, 4, 0, 6, 1, -1, 3, -1]
Combined matching not valid. [7, -1, 5, 6, -1, 2, 3, 0]
Combined matching not valid. [8, 2, 1, 6, 7, 10, 3, 4, 0, -1, 5, 12, 11, -1]
Combined matching not valid. [7, 2, 1, 6, 10, -1, 3, 0, 13, -1, 4, 12, 11, 8]
Combined matching not valid. [1, 0, 9, 11, 6, 17, 4, 8, 7, 2, 12, 3, 10, -1, -1, 16, 15, 5]
Combined matching not valid. [16, 15, 9, 6, 5, 4, 3, 12, 13, 2, -1, 17, 7, 8, -1, 1, 0, 11]
Comb

Unnamed: 0,group_pair_id,group_id,pairing_id,timestamp,user_count,user_parity,odd_handling,final_matching,final_affinities,sr_result,...,sr_debug,sr_cardinality,mwm_result,mwm_affinities,mwm_stability,affinity_matrix,directed_graph,undirected_graph,preference_matrix,index_user_mapping
0,27syMcotb279YaP2u-3G6iXFybGhQdsjDct,27syMcotb279YaP2u,3G6iXFybGhQdsjDct,2023-04-13 08:43:23.516,7,odd,remove,"[6, -1, 4, 5, 2, 3, 0]","{'TFpfXZLxwfzEorfGD': 0, '7Bu7DRa9Js5B2SkP2': ...","[6, -1, 4, 5, 2, 3, 0]",...,Stable matching found after Phase 1.,6,"[6, 2, 1, -1, 5, 4, 0]","{'66PYzPiXdpro84gaR': 0, 'TFpfXZLxwfzEorfGD': ...",stable,"[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],...","[[0, 65, 42, 63, 79, 72, 86], [64, 0, 82, 89, ...","[[0, 1, 79], [0, 2, 32], [0, 3, 59], [0, 5, 13...","[[7, 5, 6, 2, 4, 3], [4, 3, 6, 1, 5, 7], [2, 6...","{'0': 'TFpfXZLxwfzEorfGD', '1': 'SKMaq7QzxYcNd..."
1,27syMcotb279YaP2u-44wNFcfPQZ9PJGKFA,27syMcotb279YaP2u,44wNFcfPQZ9PJGKFA,2023-05-02 01:52:25.623,10,even,remove,"[3, 7, 6, 0, 5, 4, 2, 1, 9, 8]","{'TFpfXZLxwfzEorfGD': 1, '66PYzPiXdpro84gaR': ...","[3, 7, 6, 0, 5, 4, 2, 1, 9, 8]",...,Stable matching found after Phase 1.,10,"[4, 7, 6, 5, 0, 3, 2, 1, 9, 8]","{'sanGBbAp7tXpTMfHN': 3, '7Bu7DRa9Js5B2SkP2': ...",unstable,"[[0, -1.0, -1.0, -1.0, 0, 0.66, -1.0, -1.0, -1...","[[0, -96, -96, -84, 61, 108, -98, -97, -94, -8...","[[0, 4, 62], [0, 5, 71], [1, 2, 82], [1, 3, 93...","[[6, 5, 4, 10, 9, 2, 3, 8, 7], [8, 9, 3, 1, 5,...","{'0': 'TFpfXZLxwfzEorfGD', '1': '66PYzPiXdpro8..."
2,27syMcotb279YaP2u-A9peJrKgSA7qng696,27syMcotb279YaP2u,A9peJrKgSA7qng696,2023-04-24 03:47:02.227,10,even,remove,"[7, 9, 6, 5, 8, 3, 2, 0, 4, 1]","{'TFpfXZLxwfzEorfGD': 1, '66PYzPiXdpro84gaR': ...","[7, 9, 6, 5, 8, 3, 2, 0, 4, 1]",...,Stable matching found after Phase 1.,10,"[3, 9, 6, 0, 5, 4, 2, 8, 7, 1]","{'MP8Rx75gtnRvDZgFk': 0, 'wkEeKMNEDceiNrmnZ': ...",unstable,"[[0, -1.0, -1.0, 0.0, -1.0, -1.0, -1.0, -1.0, ...","[[0, -87, -98, 62, -94, -86, -87, -84, -84, -9...","[[0, 3, 115], [1, 2, 82], [1, 3, 89], [1, 4, 7...","[[4, 8, 9, 6, 2, 7, 10, 5, 3], [4, 10, 1, 7, 3...","{'0': 'TFpfXZLxwfzEorfGD', '1': '66PYzPiXdpro8..."
3,27syMcotb279YaP2u-GALL37sjanTe42yDq,27syMcotb279YaP2u,GALL37sjanTe42yDq,2023-04-13 08:42:46.621,7,odd,remove,"[5, -1, 6, 4, 3, 0, 2]","{'TFpfXZLxwfzEorfGD': 0, '7Bu7DRa9Js5B2SkP2': ...","[5, -1, 6, 4, 3, 0, 2]",...,Stable matching found after Phase 1.,6,"[5, 3, 6, 1, -1, 0, 2]","{'sanGBbAp7tXpTMfHN': 5, 'SKMaq7QzxYcNddSoT': ...",unstable,"[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],...","[[0, 42, 71, 62, 75, 83, 62], [45, 0, 65, 79, ...","[[0, 1, 34], [0, 2, 78], [0, 3, 74], [0, 5, 13...","[[6, 5, 3, 4, 7, 2], [4, 5, 7, 6, 3, 1], [2, 1...","{'0': 'TFpfXZLxwfzEorfGD', '1': 'SKMaq7QzxYcNd..."
4,27syMcotb279YaP2u-KQeEk3nih5NnRFRvn,27syMcotb279YaP2u,KQeEk3nih5NnRFRvn,2023-04-23 15:44:15.341,8,even,remove,"[6, 2, 1, 7, 5, 4, 0, 3]","{'TFpfXZLxwfzEorfGD': 0, 'SKMaq7QzxYcNddSoT': ...","[6, 2, 1, 7, 5, 4, 0, 3]",...,Stable matching found after Phase 2.,8,"[6, 7, 3, 2, 5, 4, 0, 1]","{'2y7tESxwQFwro8Jca': 4, 'sanGBbAp7tXpTMfHN': ...",unstable,"[[0, 0, 0, -1.0, -1.0, 0, 0, -1.0], [0, 0, 0, ...","[[0, 53, 53, -84, -88, 22, 51, -97], [75, 0, 8...","[[0, 1, 56], [0, 2, 119], [0, 5, 74], [0, 6, 8...","[[2, 3, 7, 6, 4, 5, 8], [8, 6, 3, 4, 7, 1, 5],...","{'0': 'TFpfXZLxwfzEorfGD', '1': 'SKMaq7QzxYcNd..."


### Remove another user (and attempt for all users) if Stable Matching isn't Found

In [17]:
# get all pairing instances
group_pairing_ids = affinities_history.group_pairing_id.unique()

# compute pairings and create DataFrame of results
remove_all_pairings_df = execute_sr_matching(
    group_pairing_ids, handle_odd_method="remove", remove_all=True, parallel=False
)

# print stable matching results
sr_stable_count = len(
    remove_all_pairings_df[remove_all_pairings_df["sr_stability"] == "stable"]
)
sr_unstable_count = len(
    remove_all_pairings_df[remove_all_pairings_df["sr_stability"] == "unstable"]
)
sr_total = sr_stable_count + sr_unstable_count

output_string = "Stable Roommates Matching Results\nStable: {} ({:1.2f}%)\nUnstable: {} ({:1.2f}%)\nTotal: {} (100.00%)\n\n"
print(
    output_string.format(
        sr_stable_count,
        100 * sr_stable_count / sr_total,
        sr_unstable_count,
        100 * sr_unstable_count / sr_total,
        sr_total,
    )
)

# print mwm results
mwm_stable_count = len(
    remove_all_pairings_df[remove_all_pairings_df["mwm_stability"] == "stable"]
)
mwm_unstable_count = len(
    remove_all_pairings_df[remove_all_pairings_df["mwm_stability"] == "unstable"]
)
mwm_none_count = len(
    remove_all_pairings_df[remove_all_pairings_df["mwm_stability"] == "NA"]
)
mwm_total = mwm_stable_count + mwm_unstable_count + mwm_none_count

output_string = "Maximum Weighted Matching Results\nStable: {} ({:1.2f}%)\nUnstable: {} ({:1.2f}%)\nNA (could not determine stability): {} ({:1.2f}%)\nTotal: {} (100.00%)"
print(
    output_string.format(
        mwm_stable_count,
        100 * mwm_stable_count / mwm_total,
        mwm_unstable_count,
        100 * mwm_unstable_count / mwm_total,
        mwm_none_count,
        100 * mwm_none_count / mwm_total,
        mwm_total,
    )
)
remove_all_pairings_df.head()

  0%|          | 0/1029 [00:00<?, ?it/s]

Combined matching not valid. [8, -1, 7, 4, 3, 9, -1, 2, 0, 5]
Combined matching not valid. [1, 0, 7, 5, -1, 3, -1, 2]
Combined matching not valid. [4, 9, -1, 11, 0, 6, 5, 10, -1, 1, 7, 3]
Combined matching not valid. [5, 3, -1, 1, -1, 0]
Combined matching not valid. [-1, -1, 6, 4, 3, 9, 2, 8, 7, 5]
Combined matching not valid. [7, 3, 4, 1, 2, -1, -1, 0]
Combined matching not valid. [2, -1, 0, 8, 6, -1, 4, 9, 3, 7]
Combined matching not valid. [2, 4, 0, 6, 1, -1, 3, -1]
Combined matching not valid. [7, -1, 5, 6, -1, 2, 3, 0]
Combined matching not valid. [8, 2, 1, 6, 7, 10, 3, 4, 0, -1, 5, 12, 11, -1]
Combined matching not valid. [7, 2, 1, 6, 10, -1, 3, 0, 13, -1, 4, 12, 11, 8]
Combined matching not valid. [1, 0, 9, 11, 6, 17, 4, 8, 7, 2, 12, 3, 10, -1, -1, 16, 15, 5]
Combined matching not valid. [16, 15, 9, 6, 5, 4, 3, 12, 13, 2, -1, 17, 7, 8, -1, 1, 0, 11]
Combined matching not valid. [2, -1, 0, -1]
Combined matching not valid. [7, 9, -1, 11, 5, 4, -1, 0, 13, 1, 12, 3, 10, 8]
Combined 

Unnamed: 0,group_pair_id,group_id,pairing_id,timestamp,user_count,user_parity,odd_handling,final_matching,final_affinities,sr_result,...,sr_debug,sr_cardinality,mwm_result,mwm_affinities,mwm_stability,affinity_matrix,directed_graph,undirected_graph,preference_matrix,index_user_mapping
0,27syMcotb279YaP2u-3G6iXFybGhQdsjDct,27syMcotb279YaP2u,3G6iXFybGhQdsjDct,2023-04-13 08:43:23.516,7,odd,remove,"[2, 5, 0, -1, 6, 1, 4]","{'TFpfXZLxwfzEorfGD': 0, 'SKMaq7QzxYcNddSoT': ...","[2, 5, 0, -1, 6, 1, 4]",...,Stable matching found after Phase 1.,6,"[6, 3, -1, 1, 5, 4, 0]","{'66PYzPiXdpro84gaR': 0, 'TFpfXZLxwfzEorfGD': ...",unstable,"[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],...","[[0, 65, 33, 53, 85, 81, 82], [61, 0, 72, 75, ...","[[0, 1, 78], [0, 2, 48], [0, 3, 58], [0, 5, 12...","[[5, 7, 6, 2, 4, 3], [6, 4, 3, 5, 1, 7], [2, 6...","{'0': 'TFpfXZLxwfzEorfGD', '1': 'SKMaq7QzxYcNd..."
1,27syMcotb279YaP2u-44wNFcfPQZ9PJGKFA,27syMcotb279YaP2u,44wNFcfPQZ9PJGKFA,2023-05-02 01:52:25.623,10,even,remove,"[3, 7, 6, 0, 5, 4, 2, 1, 9, 8]","{'TFpfXZLxwfzEorfGD': 1, '66PYzPiXdpro84gaR': ...","[3, 7, 6, 0, 5, 4, 2, 1, 9, 8]",...,Stable matching found after Phase 1.,10,"[4, 7, 6, 5, 0, 3, 2, 1, 9, 8]","{'sanGBbAp7tXpTMfHN': 3, '7Bu7DRa9Js5B2SkP2': ...",unstable,"[[0, -1.0, -1.0, -1.0, 0, 0.66, -1.0, -1.0, -1...","[[0, -96, -96, -84, 61, 108, -98, -97, -94, -8...","[[0, 4, 62], [0, 5, 71], [1, 2, 82], [1, 3, 93...","[[6, 5, 4, 10, 9, 2, 3, 8, 7], [8, 9, 3, 1, 5,...","{'0': 'TFpfXZLxwfzEorfGD', '1': '66PYzPiXdpro8..."
2,27syMcotb279YaP2u-A9peJrKgSA7qng696,27syMcotb279YaP2u,A9peJrKgSA7qng696,2023-04-24 03:47:02.227,10,even,remove,"[7, 9, 6, 5, 8, 3, 2, 0, 4, 1]","{'TFpfXZLxwfzEorfGD': 1, '66PYzPiXdpro84gaR': ...","[7, 9, 6, 5, 8, 3, 2, 0, 4, 1]",...,Stable matching found after Phase 1.,10,"[3, 9, 6, 0, 5, 4, 2, 8, 7, 1]","{'MP8Rx75gtnRvDZgFk': 0, 'wkEeKMNEDceiNrmnZ': ...",unstable,"[[0, -1.0, -1.0, 0.0, -1.0, -1.0, -1.0, -1.0, ...","[[0, -87, -98, 62, -94, -86, -87, -84, -84, -9...","[[0, 3, 115], [1, 2, 82], [1, 3, 89], [1, 4, 7...","[[4, 8, 9, 6, 2, 7, 10, 5, 3], [4, 10, 1, 7, 3...","{'0': 'TFpfXZLxwfzEorfGD', '1': '66PYzPiXdpro8..."
3,27syMcotb279YaP2u-GALL37sjanTe42yDq,27syMcotb279YaP2u,GALL37sjanTe42yDq,2023-04-13 08:42:46.621,7,odd,remove,"[5, -1, 6, 4, 3, 0, 2]","{'TFpfXZLxwfzEorfGD': 0, '7Bu7DRa9Js5B2SkP2': ...","[5, -1, 6, 4, 3, 0, 2]",...,Stable matching found after Phase 1.,6,"[5, 3, 6, 1, -1, 0, 2]","{'sanGBbAp7tXpTMfHN': 5, 'SKMaq7QzxYcNddSoT': ...",unstable,"[[0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0],...","[[0, 42, 71, 62, 75, 83, 62], [45, 0, 65, 79, ...","[[0, 1, 34], [0, 2, 78], [0, 3, 74], [0, 5, 13...","[[6, 5, 3, 4, 7, 2], [4, 5, 7, 6, 3, 1], [2, 1...","{'0': 'TFpfXZLxwfzEorfGD', '1': 'SKMaq7QzxYcNd..."
4,27syMcotb279YaP2u-KQeEk3nih5NnRFRvn,27syMcotb279YaP2u,KQeEk3nih5NnRFRvn,2023-04-23 15:44:15.341,8,even,remove,"[6, 2, 1, 7, 5, 4, 0, 3]","{'TFpfXZLxwfzEorfGD': 0, 'SKMaq7QzxYcNddSoT': ...","[6, 2, 1, 7, 5, 4, 0, 3]",...,Stable matching found after Phase 2.,8,"[6, 7, 3, 2, 5, 4, 0, 1]","{'2y7tESxwQFwro8Jca': 4, 'sanGBbAp7tXpTMfHN': ...",unstable,"[[0, 0, 0, -1.0, -1.0, 0, 0, -1.0], [0, 0, 0, ...","[[0, 53, 53, -84, -88, 22, 51, -97], [75, 0, 8...","[[0, 1, 56], [0, 2, 119], [0, 5, 74], [0, 6, 8...","[[2, 3, 7, 6, 4, 5, 8], [8, 6, 3, 4, 7, 1, 5],...","{'0': 'TFpfXZLxwfzEorfGD', '1': 'SKMaq7QzxYcNd..."


## Analyzing Instability

### TODO
- why wasnt a person proposed to? --> see this

In [None]:
remove_all_pairings_df[["sr_stability", "sr_debug", "group_pair_id"]].groupby(
    ["sr_stability", "sr_debug"]
).count()

In [None]:
remove_all_pairings_df[
    ["sr_stability", "sr_debug", "user_parity", "group_pair_id"]
].groupby(["sr_stability", "sr_debug", "user_parity"]).count()

### Unstable Case 1--Failed at Phase 1: not everyone was proposed to.	

In [None]:
unstable_cases_1 = remove_all_pairings_df[
    remove_all_pairings_df["sr_debug"]
    == "Failed at Phase 1: not everyone was proposed to."
]
unstable_cases_1.head()

In [None]:
# compute proportion of return matching that was stable
unstable_cases_1["partial_stability_proportion"] = unstable_cases_1.apply(
    lambda x: 100.0 * (x["sr_cardinality"] / x["user_count"]), axis=1
)
unstable_cases_1["partial_stability_proportion"].describe()

### Unstable Case 2--Failed at Phase 2: could not find an all-or-nothing cycle len > 3.

In [None]:
unstable_cases_2 = remove_all_pairings_df[
    remove_all_pairings_df["sr_debug"]
    == "Failed at Phase 2: could not find an all-or-nothing cycle len > 3."
]
unstable_cases_2.head()

In [None]:
# compute proportion of return matching that was stable
unstable_cases_2["partial_stability_proportion"] = unstable_cases_2.apply(
    lambda x: 100.0 * (x["sr_cardinality"] / x["user_count"]), axis=1
)
unstable_cases_2["partial_stability_proportion"].describe()

### Unstable Case 3--Failed at Verification after Phase 2: matching computed, but not valid.

In [None]:
unstable_cases_3 = remove_all_pairings_df[
    remove_all_pairings_df["sr_debug"]
    == "Failed at Verification after Phase 2: matching computed, but not valid."
]
unstable_cases_3

In [None]:
# compute proportion of return matching that was stable
unstable_cases_3["partial_stability_proportion"] = unstable_cases_3.apply(
    lambda x: 100.0 * (x["sr_cardinality"] / x["user_count"]), axis=1
)
unstable_cases_3["partial_stability_proportion"].describe()

## Analyzing Matching Affinities

In [None]:
# get affinities
sr_affinties = [
    inner
    for outer in remove_one_pairings_df["sr_affinities"].tolist()
    for inner in outer.values()
]
sr_affinities_df = (
    pd.DataFrame({"affinities": sr_affinties})
    .groupby("affinities")["affinities"]
    .count()
    .reset_index(name="count")
)
sr_affinities_df["proportion"] = sr_affinities_df["count"] / sum(
    sr_affinities_df["count"]
)
sr_affinities_df["percentage"] = 100 * sr_affinities_df["proportion"]
sr_affinities_df["algorithm"] = "stable only"

mwm_affinities = [
    inner
    for outer in remove_one_pairings_df["mwm_affinities"].tolist()
    for inner in outer.values()
]
mwm_affinities_df = (
    pd.DataFrame({"affinities": mwm_affinities})
    .groupby("affinities")["affinities"]
    .count()
    .reset_index(name="count")
)
mwm_affinities_df["proportion"] = mwm_affinities_df["count"] / sum(
    mwm_affinities_df["count"]
)
mwm_affinities_df["percentage"] = 100 * mwm_affinities_df["proportion"]
mwm_affinities_df["algorithm"] = "mwm only"

sr_mwm_affinities = [
    inner
    for outer in remove_one_pairings_df["final_affinities"].tolist()
    for inner in outer.values()
]
sr_mwm_affinities_df = (
    pd.DataFrame({"affinities": sr_mwm_affinities})
    .groupby("affinities")["affinities"]
    .count()
    .reset_index(name="count")
)
sr_mwm_affinities_df["proportion"] = sr_mwm_affinities_df["count"] / sum(
    sr_mwm_affinities_df["count"]
)
sr_mwm_affinities_df["percentage"] = 100 * sr_mwm_affinities_df["proportion"]
sr_mwm_affinities_df["algorithm"] = "stable + mwm"

In [None]:
fig = plt.subplots(figsize=(20, 8))
ax = sns.barplot(
    data=pd.concat([mwm_affinities_df, sr_affinities_df, sr_mwm_affinities_df]),
    x="affinities",
    y="percentage",
    hue="algorithm",
)

ax.set_title("Affinity Given to Matched Partner")
ax.set_xlabel("Affinity Value")
ax.set_ylabel("Percentage")
ax.set_ylim(0, 41)
ax.set_yticks(range(0, 41, 2))

for p in ax.patches:
    height = p.get_height()
    if math.isnan(height):
        height = 0
        ax.text(p.get_x() + p.get_width() / 2, height + 0.5, "", ha="center")
    else:
        ax.text(
            p.get_x() + p.get_width() / 2,
            height + 1,
            "{:1.2f}%".format(height),
            ha="center",
        )

## Analyze Specific Pairing Instances

In [None]:
prefs = [
    [11, 2, 7, 4, 10, 12, 9, 5, 8, 6, 3],
    [4, 1, 7, 11, 9, 12, 5, 3, 6, 8, 10],
    [8, 1, 7, 5, 9, 6, 4, 2, 12, 11, 10],
    [7, 1, 2, 11, 6, 12, 8, 5, 10, 9, 3],
    [1, 2, 9, 4, 7, 6, 11, 12, 3, 10, 8],
    [12, 10, 4, 1, 2, 7, 9, 5, 11, 8, 3],
    [4, 2, 5, 11, 12, 1, 8, 9, 3, 10, 6],
    [3, 10, 12, 5, 4, 11, 6, 9, 1, 7, 2],
    [11, 10, 5, 12, 1, 3, 4, 7, 6, 8, 2],
    [2, 4, 6, 1, 9, 5, 12, 8, 3, 7, 11],
    [9, 10, 1, 4, 3, 7, 2, 12, 6, 8, 5],
    [11, 7, 10, 1, 6, 9, 8, 3, 2, 4, 5],
]

preferences_dict = {str(x + 1): [str(y) for y in prefs[x]] for x in range(len(prefs))}
ranks = {
    index: dict(zip(value, range(len(value))))
    for (index, value) in preferences_dict.items()
}

In [None]:
tester_group_pair_id = "2rFoGTfRa9LFdpQNA-2EPbA6HkydPTdxCWD"
temp_affinities = affinities_history[
    affinities_history["group_pairing_id"] == tester_group_pair_id
]
temp_affinities

In [None]:
temp_prev_matching = get_previous_matching(tester_group_pair_id)
temp_prev_matching

In [None]:
output_dict = {}
for helper, match in temp_prev_matching.items():
    curr_affinity = temp_affinities[
        (temp_affinities["helperId"] == helper) & (temp_affinities["helpeeId"] == match)
    ]["value"].tolist()

    if len(curr_affinity) > 0:
        output_dict[helper] = curr_affinity[0]
    else:
        print("no affinity")
        output_dict[helper] = 0

output_dict

In [None]:
# create affinity matrix and index-user dict
curr_affinities = deepcopy(
    affinities_history[affinities_history["group_pairing_id"] == tester_group_pair_id]
)
curr_tasks = deepcopy(
    tasks_history[tasks_history["group_pairing_id"] == tester_group_pair_id]
)
curr_affinity_matrix, curr_index_user_mapping = create_affinity_matrix(
    curr_affinities[["helperId", "helpeeId", "value"]], curr_tasks
)

In [None]:
temp = sr_matching_pair_research("2rFoGTfRa9LFdpQNA-2EPbA6HkydPTdxCWD")
temp

In [None]:
temp["final_matching"]

In [None]:
naming_dict = {
    k: users[users._id == v]["profile"].tolist()[0]["fullName"]
    for k, v in temp["index_user_mapping"].items()
}
naming_dict["-1"] = ""
naming_dict

In [None]:
{
    naming_dict[str(index)]: naming_dict[str(value)]
    for index, value in enumerate(temp["final_matching"])
}

In [None]:
[
    (pairing["firstUserName"], pairing["secondUserName"])
    for pairing in pairings[
        pairings["group_pairing_id"] == "9mdkMmj4pY8Q2TwqF-KPcJQn2ximvZmjEhf"
    ]["pairings"].tolist()[0]
]

In [None]:
[
    (pairing["firstUserName"], pairing["secondUserName"])
    for pairing in pairings[
        pairings["group_pairing_id"] == "9mdkMmj4pY8Q2TwqF-7DyTNMHyXyHsSPTBY"
    ]["pairings"].tolist()[0]
]