In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
import sys
from pathlib import Path
import json

import numpy as np
import pandas as pd

home = str(Path.home())

RPLIB_DATA_PREFIX = os.environ.get("RPLIB_DATA_PREFIX")

if RPLIB_DATA_PREFIX is None: # Set default
    RPLIB_DATA_PREFIX=f'{home}/RPLib/data'
    
try:
    import pyrankability as pyrankability
    import pyrplib as pyrplib
except:
    print('Looking for packages in home directory')
    sys.path.insert(0,f"{home}") # Add the home directory relevant paths to the PYTHONPATH
    sys.path.insert(0,f"{home}/ranking_toolbox") # Add the home directory relevant paths to the PYTHONPATH
    sys.path.insert(0,f"{home}/RPLib") # Add the home directory relevant paths to the PYTHONPATH
    import pyrankability
    import pyrplib

import pandas as pd

Looking for packages in home directory


In [4]:
def banded_matrix(N):
  arr = np.zeros((N,N))
  for d in range(-N, N):
    arr += np.diag(np.repeat(abs(d), N - abs(d)), d)
  return np.matrix(arr)

def weighted_matrix(N):
  return np.matrix([[1 / i for _ in range(1, N + 1)] for i in range(1, N + 1)])

def beta(Xstar_r_r, normalize = True):
  Xstar_r_r = Xstar_r_r.copy()
  #Xstar_r_r.values[:,:] = np.ceil(Xstar_r_r.values)
  Xstar_r_r.values[:,:] = ((Xstar_r_r.values > 0) & (Xstar_r_r.values < 1)).astype(int)
  n = len(Xstar_r_r)
  worst_case_Xstar_r_r = np.ones(Xstar_r_r.shape)
  def _beta(Xstar_r_r,n):
    return (Xstar_r_r * banded_matrix(n) * weighted_matrix(n)).sum().sum()
  if normalize == True:
    return _beta(Xstar_r_r,n)/_beta(worst_case_Xstar_r_r,n)
  else:
    return _beta(Xstar_r_r,n)

In [5]:
data = pyrplib.data.Data(RPLIB_DATA_PREFIX)

In [6]:
input_dir = f"lop_cards" 

from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir(input_dir) if isfile(join(input_dir, f))]

info_df = pd.DataFrame(columns=["Percentage of Noise",'Dataset ID']).set_index('Dataset ID')

cards_df = data.lop_cards_df.set_index('Dataset ID')
processed_df = data.processed_datasets_df.set_index('Dataset ID')

unprocessed = None
cards = []
for file in onlyfiles:
    card = pyrplib.card.LOP.from_json(f"{input_dir}/{file}")
    cards.append(card)
    
    processed_dataset_id = card.source_dataset_id
    processed_dataset = processed_df.loc[processed_dataset_id]
    collection = processed_dataset['Collection']
    
    processed = pyrplib.dataset.ProcessedD.from_json(f"{RPLIB_DATA_PREFIX}/{collection}/{processed_dataset_id}.json").load()
    if unprocessed is None:
        unprocessed = pyrplib.dataset.load_unprocessed(processed.source_dataset_id,data.datasets_df)
    item = unprocessed.data().loc[processed_dataset['Index']]
    info_df = info_df.append(pd.Series([item['percentage']],index=info_df.columns,name=card.dataset_id))

In [7]:
info_df['Percentage of Noise'] = info_df['Percentage of Noise'].astype(int)

In [8]:
for card in cards:
    Xstar = pd.DataFrame(card.centroid_x)
    perm = card.solutions[0] # select one permutation
    Xstar_r_r = Xstar.iloc[perm,perm]
    info_df.loc[card.dataset_id,'number_of_solutions'] = len(card.solutions)
    info_df.loc[card.dataset_id,'beta'] = beta(Xstar_r_r)
    info_df.loc[card.dataset_id,'k'] = card.obj #beta(Xstar_r_r)
    info_df.loc[card.dataset_id,'num_solutions'] = len(card.solutions) #beta(Xstar_r_r)
#    info_df.loc[card.dataset_id,'dist_between_two'] = len(card.solutions) #beta(Xstar_r_r)

In [9]:
info_df

Unnamed: 0_level_0,Percentage of Noise,number_of_solutions,beta,k,num_solutions
Dataset ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
436,20,2.0,2.673264e-06,800.0,2.0
445,20,6.0,1.336632e-05,784.0,6.0
450,20,54.0,2.097235e-05,781.0,54.0
368,10,2.0,7.985075e-07,813.0,2.0
452,20,2.0,3.818949e-06,778.0,2.0
...,...,...,...,...,...
426,20,6.0,7.847459e-06,770.0,6.0
405,60,2.0,4.703337e-07,692.0,2.0
618,50,5.0,4.439239e-06,707.0,5.0
491,30,4.0,1.609300e-06,729.0,4.0


In [10]:
info_df.corr()

Unnamed: 0,Percentage of Noise,number_of_solutions,beta,k,num_solutions
Percentage of Noise,1.0,0.105755,0.353758,-0.946183,0.105755
number_of_solutions,0.105755,1.0,0.28698,-0.129338,1.0
beta,0.353758,0.28698,1.0,-0.350188,0.28698
k,-0.946183,-0.129338,-0.350188,1.0,-0.129338
num_solutions,0.105755,1.0,0.28698,-0.129338,1.0


In [36]:
import altair as alt

means = alt.Chart(info_df).mark_circle(size=60).encode(
    x='Percentage of Noise',
    y=alt.Y('mean(beta)')#, scale=alt.Scale(domain=[0.00206, 0.00210]))
)
means