# Implementing a Baseline Model

In [161]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random

Goal: create a baseline date-matching model to compare our optimization approach against

In [86]:
# preferences of gender 0 for individuals of gender 1 (mean-imputed)
preferences_0 = pd.read_csv("preferences_0.csv")
# preferences of gender 1 for individuals of gender 0 (mean-imputed)
preferences_1 = pd.read_csv("preferences_1.csv")

In [87]:
# creating arrays of iids for people in each gender
iids_gender0 = np.array(preferences_0["iid"])
iids_gender1 = np.array(preferences_1["iid"])

Idea: have baseline be random assignment of individuals from opposite genders

In [107]:
def random_match(iids_gender0, iids_gender1, seed):
    """Inputs:
       - iids_gender0: list of iids for gender 0
       - iids_gender1: list of iids for gender 1
       - seed: random seed for reproducible random results
       Outputs:
       - nxm df of binary variables.
         n = number of iids for gender 0
         m = number of iids for gender 1
         entry z_ij = 1 if person i is matched with person j, 0 otherwise
    """
    # set seed
    np.random.seed(seed)
    
    # note the gender with more people
    gender0_count = len(iids_gender0)
    gender1_count = len(iids_gender1)
    # make that gender the one we loop over
    if gender0_count >= gender1_count:
        iids_a = iids_gender0
        iids_b = iids_gender1
    else:
        iids_a = iids_gender1
        iids_b = iids_gender0
        
    # initialize Z matrix
    Z = np.zeros((gender0_count, gender1_count))
    Z = pd.DataFrame(Z)
    Z.index = iids_gender0
    Z.columns = iids_gender1.astype(int)
    # note the people from the other gender who are available
    available = iids_b
    # for each person in the looping gender,
    for person in iids_a:
        # randomly match to someone of the other gender who is available
        match = np.random.choice(available)
        # make that entry in the Z matrix 1
        Z.at[person, match] = 1
        # remove that person from the list of available people in the other gender
        np.delete(available, np.where(available==match))
    # return the binary matrix
    return Z

In [108]:
baseline_matches = random_match(iids_gender0, iids_gender1, 42)

Computing the Objective:

In [158]:
def compute_objective(match_df, rho):
    """Inputs:
       - rho: scalar, penalty term for unmatched single
       - nxm df of binary variables.
         n = number of iids for gender 0
         m = number of iids for gender 1
         entry z_ij = 1 if person i is matched with person j, 0 otherwise
       Outputs:
       - objective value of this matching scheme (scalar)"""
    # convert match_df to matrix
    Z = match_df.values
    # recall preference data
    pref0 = preferences_0
    pref1 = preferences_1
    # store gender iid data
    iids_gender0 = preferences_0["iid"]
    iids_gender1 = preferences_1["iid"]
    # calculate whether anyone is unmatched.
    # Given how the baseline matches people (as many as possible), 
    # num_unmatched can be calculated like this:
    greater_n = max(Z.shape)
    smaller_n = min(Z.shape)
    num_unmatched = greater_n - smaller_n
    
    # sum everything up!
    objective = 0
    for i in iids_gender0:
        for j in iids_gender1:
            z_ij = match_df.loc[i,j]
            l_ij = float(pref0[pref0["iid"]==i][str(j)+".0"])
            l_ji = float(pref1[pref1["iid"]==j][str(i)+".0"])
            term = z_ij*(l_ij + l_ji)
            objective -= term
    objective += rho*num_unmatched
    return objective

# Beat This vvv

In [160]:
print("baseline objective value:")
compute_objective(baseline_matches,0.5)

baseline objective value:


2.1727820975251566