In [None]:
import distributions
import fair_secretary as fs
import unfair_secretary as ufs 
import synthetic_data as syn_data
import numpy as np
import secretary_eval as se
from utils import SecretaryInstance, GetThreshold, ComputeThreshold
from secretary_data import GetSecretaryInputBank, GetSecretaryInputPokec, GetSecretaryInputUfrgs

from tqdm import tqdm
import matplotlib.pyplot as plt
import pandas as pd
import pickle 


def SecretaryExperiment(sizes, prob, num_rep, mode, data_mode):

    if data_mode == "synth":
        instance = syn_data.GetSecretaryInput(sizes, prob)
    
    if data_mode == "bank":
        instance = GetSecretaryInputBank(100000)

    if data_mode == "pokec":
        # Create new instance (slower)
        # instance = GetSecretaryInputPokec(40000)

        # Load instance from file (faster)
        instance = pickle.load(open('data/pokec_instance.dat', 'rb'))

    if data_mode == "ufrgs":
        instance = GetSecretaryInputUfrgs(100000)
    
    num_colors = len(sizes)
    threshold = [single_prob * sum(sizes) for single_prob in GetThreshold(prob)]

    answer = []    
    
    for _ in tqdm(range(num_rep)):
        np.random.shuffle(instance)

        # Fair
        if mode == "F":
            answer.append(fs.ComputeSolution(instance, num_colors, threshold))
        
        # Unfair SA
        if mode == "U":
            answer.append(ufs.ComputeSolution(instance))

        # Unfair SCSA
        if mode == "S":
            answer.append(ufs.ComputeSolutionSingleColor(instance, prob))

    return se.Eval(instance, answer, num_colors)


def PlotSecretary(num_rep, sizes, prob, file_name, data_mode):
    
    data = {"Input" : sizes}
    num_colors = len(sizes)

    # Generate data for plot
    for mode in ['F', 'U', 'S']:
        _, num_answer, correct_answer = SecretaryExperiment(sizes, prob, num_rep, mode, data_mode)

        # Add results to data
        data[mode + "-Pick"] = num_answer
        data[mode + "-Max"] = correct_answer

    # Create plot
    plt.figure(figsize=(15, 8))

    # Plot labels and colors to corresponding plot type
    if data_mode == 'synth':
        df = pd.DataFrame.from_dict(data, orient='index', 
            columns=['Color ' + str(i+1) for i in range(num_colors)])
        ax = df.plot.bar(color=['cornflowerblue', 'salmon', 'wheat', 'dimgrey'])
    
    if data_mode == 'bank':
        df = pd.DataFrame.from_dict(data, orient='index', 
            columns=['Before 30', '31-40', '41-50', '51-60', 'After 60'])
        ax = df.plot.bar(color=['cornflowerblue', 'salmon', 'wheat', 'dimgrey', 'rebeccapurple'])
        
    if data_mode == 'pokec':
        df = pd.DataFrame.from_dict(data, orient='index', 
            columns=['Under', 'Normal', 'Over', 'Obese 1', 'Obese 2'])
        ax = df.plot.bar(color=['cornflowerblue', 'salmon', 'wheat', 'dimgrey', 'rebeccapurple'])

    if data_mode == 'ufrgs':
        df = pd.DataFrame.from_dict(data, orient='index', 
            columns=['Female', 'Male'])
        ax = df.plot.bar(color=['cornflowerblue', 'salmon'])

    ax.set_ylabel('Number of Occurrences')
    plt.savefig(file_name, bbox_inches='tight', dpi=400)
    plt.show()

# Number of experiment repetitions 
num_rep = 20000

# Variables for synthetic experiments (equal p, general p)
sizes = [10, 100, 1000, 10000]
equal_prob = [0.25, 0.25, 0.25, 0.25]
general_prob = [0.3, 0.25, 0.25, 0.2]

# Variables for maximization dataset experiments (bank, pokec)
max_prob = [0.2, 0.2, 0.2, 0.2, 0.2]
sizes_bank = [7383, 16385, 10240, 6270, 910]
sizes_pokec = [8278, 26629, 4409, 382, 302]

# Variables for research extention dataset experiment (ufrgs)
ufrgs_prob = [0.5, 0.5]
sizes_ufrgs = [20968, 22335]

# Uncomment to run experiment
# PlotSecretary(num_rep, sizes, equal_prob, 'plots/Secretaryplot_equal.png', 'synth')
# PlotSecretary(num_rep, sizes, general_prob, 'plots/Secretaryplot_general.png', 'synth')
# PlotSecretary(num_rep, sizes_bank, max_prob, 'plots/Secretaryplot_bank.png', 'bank')
PlotSecretary(num_rep, sizes_pokec, max_prob, 'plots/Secretaryplot_pokec.png', 'pokec')
# PlotSecretary(num_rep, sizes_ufrgs, ufrgs_prob, 'plots/Secretaryplot_ufrgs.png', 'ufrgs')

Nodes and ids constructed.
Degrees constructed.
5 [129266, 363597, 91263, 10334, 4861]
  1%|          | 165/20000 [01:11<2:22:59,  2.31it/s]


KeyboardInterrupt: 

In [None]:
# INDIVIDUAL EXPERIMENTS

# _____________________ FAIR ________________________
# Synthetic dataset, equal  p  values (F-Pick / F-Max equal p values)
# SecretaryExperiment(sizes, equal_prob, num_rep, mode="fair")

# # Synthetic dataset, general p values (F-Pick / F-Max general p values)
# SecretaryExperiment(sizes, general_prob, num_rep, mode="fair")

# _____________________ UNFAIR SA ____________________ 
# Synthetic dataset, equal  p values (U-Pick / U-Max equal p values)
# SecretaryExperiment(sizes, equal_prob, num_rep, mode="unfair_SA")

# # Synthetic dataset, general p values (U-Pick / U-Max general p values)
# SecretaryExperiment(sizes, general_prob, num_rep, mode="unfair_SA")

# _____________________ UNFAIR SCSA _____________________
# Synthetic dataset, equal  p values (S-Pick / S-Max equal p values)
# SecretaryExperiment(sizes, equal_prob, num_rep, mode="unfair_SCSA")

# # Synthetic dataset, general p values (S-Pick / S-Max general p values)
# SecretaryExperiment(sizes, general_prob, num_rep, mode="unfair_SCSA")                                                                                                                                                                                                                                                  

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=aa043478-e839-47cf-996f-1ea34897d0a7' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>