In [1]:
import pandas as pd
import numpy as np
np.random.seed(0)
from scipy.stats import beta as sp_beta
from collections import Counter
from math import factorial

import matplotlib.pyplot as plt
plt.style.use('seaborn-darkgrid')
plt.rcParams['figure.figsize'] = 20, 13
plt.rcParams['font.size'] = 20
plt.rcParams['lines.linewidth'] = 2
import seaborn as sns

import sys
sys.path.insert(0, '../../..')
import assignment
import leximin_assignment
import matrix_gen
import kde_utils

from tqdm import tqdm

import warnings

In [2]:
df = pd.read_csv('../../../data/data.csv', index_col=0)
types = ['ES', 'TH', 'RRH', 'PREV']
df = df[df['Real'] != 2]
df = df.drop(labels='PSH', axis=1)

data = df[types].to_numpy().flatten()
kde = kde_utils.KDEHelper(np.log(data) - np.log(1 - data))

In [None]:
N = 100
N_INTVS = 5
N_EXPERIMENTS = 500
ORDER_MODE_LEVELS = [10, 20, 30, 40, 60, 80, 100]

CAPACITIES = np.ones((N_INTVS,), dtype=int) * (N // N_INTVS)

matrix_generator = matrix_gen.MatrixGenerator(
    np.random.uniform, (0, 1), N, N_INTVS
)

change_order_pof_df = pd.DataFrame(columns=['PoF', 'Order proportion'])

for level in ORDER_MODE_LEVELS:
    print(level)
    
    success_count = 0
    while success_count < N_EXPERIMENTS:
        (matrix, sorted_matrix), (increase_matrix, sorted_increase_matrix) = matrix_generator.partially_sorted_matrix_pair(
            n_rows_to_sort=level
        )
        
        
        ### Leximin
        lex_costs = []
        
        # Original matrix
        lex_assigner = leximin_assignment.LeximinAssignmentHelperV3(
            increase_matrix * 1000, CAPACITIES
        )
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            lex_assignments = lex_assigner.solve()
        
        ### Code-breaking error - skip if encounter for now
        if lex_assignments is False:
            # print('Other')
            continue
        
        lex_cost = lex_assigner.get_cost(
            lex_assignments, cost_matrix=matrix
        )
        lex_costs.append(lex_cost)
        
        # Row-sorted matrix
        lex_assigner = leximin_assignment.LeximinAssignmentHelperV3(
            sorted_increase_matrix * 1000, CAPACITIES
        )
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            lex_assignments = lex_assigner.solve()
            
        ### Code-breaking error - skip if encounter for now
        if lex_assignments is False:
            # print('Other')
            continue
             
        lex_cost = lex_assigner.get_cost(
            lex_assignments, cost_matrix=sorted_matrix
        )
        lex_costs.append(lex_cost)
        
        
        
        ### Efficient
        eff_costs = []
        
        # Original matrix
        eff_assigner = assignment.AssignmentHelperV2(
            increase_matrix, CAPACITIES
        )
        eff_assignments = eff_assigner.ip_solve()
        eff_cost = eff_assigner.get_cost(
            eff_assignments, cost_matrix=matrix
        )
        eff_costs.append(eff_cost)
        
        # Row-sorted matrix
        eff_assigner = assignment.AssignmentHelperV2(
            sorted_increase_matrix, CAPACITIES
        )
        eff_assignments = eff_assigner.ip_solve()
        eff_cost = eff_assigner.get_cost(
            eff_assignments, cost_matrix=sorted_matrix
        )
        eff_costs.append(eff_cost)
        
        
        # Update statistics
        orders = [np.array2string(order) for order in np.argsort(matrix, axis=1)]
        sorted_orders = [np.array2string(order) for order in np.argsort(sorted_matrix, axis=1)]
        # true_level = Counter(orders).most_common(1)[0][1]
        
        change_order_pof_df.loc[change_order_pof_df.shape[0]] = [
            lex_costs[0] / eff_costs[0],
            Counter(orders).most_common(1)[0][1]
        ]
        change_order_pof_df.loc[change_order_pof_df.shape[0]] = [
            lex_costs[1] / eff_costs[1],
            Counter(sorted_orders).most_common(1)[0][1]
        ]
        success_count += 1
        print(level, success_count)

10


In [5]:
change_order_pof_df

Unnamed: 0,PoF,Order proportion
0,1.0,4.0
1,1.0,14.0
2,1.0,4.0
3,1.002277,11.0
4,1.007693,4.0
5,1.00262,11.0
6,1.0,4.0
7,1.000999,10.0
8,1.0,4.0
9,1.001475,10.0


In [6]:
def get_range_str(number):
    for i, level in enumerate(ORDER_MODE_LEVELS):
        if number < level:
            if i == 0:
                return f'1 - {level}'
            return f'{ORDER_MODE_LEVELS[i - 1]} - {level}'
        
    return 'All sorted'

In [7]:
change_order_pof_df['Range'] = change_order_pof_df['Order proportion'].apply(get_range_str)
change_order_pof_df

Unnamed: 0,PoF,Order proportion,Range
0,1.0,4.0,1 - 10
1,1.0,14.0,10 - 20
2,1.0,4.0,1 - 10
3,1.002277,11.0,10 - 20
4,1.007693,4.0,1 - 10
5,1.00262,11.0,10 - 20
6,1.0,4.0,1 - 10
7,1.000999,10.0,10 - 20
8,1.0,4.0,1 - 10
9,1.001475,10.0,10 - 20
