In [112]:
import pandas as pd 
import numpy as np

# 1. The example with place of work 

Let's generate some fake data about salary, duration of vacation in days and time spent to get to work

In [133]:
data = np.array([[12000,18,60],
        [15000,14,30],
         [10000,7,15],
         [30000,21,180],
         [5000,7,10],
         [9000,17,5],
         [30000,16,8],
         [16000,8,5]])

Let's now cast everything to a pandas data frame for the sake of simplicity

In [134]:
df = pd.DataFrame(data,columns=['Salary','Vacation','Time'],
             index=['A','B','C','D','E','F','G','H'])

In [135]:
df

Unnamed: 0,Salary,Vacation,Time
A,12000,18,60
B,15000,14,30
C,10000,7,15
D,30000,21,180
E,5000,7,10
F,9000,17,5
G,30000,16,8
H,16000,8,5


dict_opt contains names of columns and a way we want to optimize with respect to this columns

In [136]:
dict_opt = {'Salary' : 'max','Vacation' : 'max',
           'Time' : 'min'}

The function pareto_comparison compares the variants between each other and returns the list of those,
that are dominated by others.

In [137]:
def pareto_comparison(df,dict_opt):
    to_delete = set()
    for i in df.iterrows():
        for z in df.iterrows():
            first = dict(zip(list(i[1].index),i[1].values))
            second = dict(zip(list(z[1].index),z[1].values))
            holder = []
            for column,mode in dict_opt.items():
                if mode=='max':
                    if first[column]>second[column]:
                        holder.append('strong')
                    else:
                        holder.append('broken')
                else:
                    if first[column]<second[column]:
                        holder.append('strong')
                    else:
                        holder.append('broken')
            if 'strong' in holder and not 'broken' in holder:
                to_delete.add(z[0])
    print("Companies that shouldn't be included due to Pareto algorithm : {}".format(to_delete))
    return to_delete

In [138]:
to_delete = pareto_comparison(df,dict_opt)

Companies that shouldn't be included due to Pareto algorithm : {'E', 'C', 'B'}


In [139]:
df = df[~df.index.isin(to_delete)]

In [140]:
df

Unnamed: 0,Salary,Vacation,Time
A,12000,18,60
D,30000,21,180
F,9000,17,5
G,30000,16,8
H,16000,8,5


The function ideal_dot implements the ideal dot algorithm for finding the best variant or set of variants. Also applying additional weights is possible.

In [141]:
def ideal_dot(df,weights_dict=None):
    if not weights_dict:
        weights_dict = dict(zip(list(df.columns),np.repeat(1,len(df.columns))))
    for i in df.columns:
        df[i] = df[i].apply(lambda x: weights_dict[i]*(df[i].max()-x)/(df[i].max()-df[i].min()))
    min_list = []
    max_list = []
    for i in df.index:
        min_list.append(df.loc[i].mean())
        max_list.append(df.loc[i].max())
    df['ideal_dot1'] = np.array(min_list).reshape(-1,1)
    df['ideal_dot2'] = np.array(max_list).reshape(-1,1)
    ideal_variant = df[(df['ideal_dot1']==df['ideal_dot1'].min()) & (df['ideal_dot2']==df['ideal_dot2'].min())]
    if ideal_variant.values.size>0:
        return ideal_variant.index[0]
    else:
        ideal_variants = df[(df['ideal_dot1']==df['ideal_dot1'].min()) | (df['ideal_dot2']==df['ideal_dot2'].min())]
        return list(ideal_variants.index)

In [142]:
ideal_variant = ideal_dot(df.copy())

In [143]:
print('Ideal variant is a company : {0}'.format(ideal_variant))

Ideal variant is a company : D


# 2. The example with choosing cellphones

In [152]:
phones = np.array([[5,4,5,1,3],
        [5,3,5,1,3],
        [2,5,2,1,2],
        [1,5,5,3,5],
        [5,2,1,3,4],
        [1,5,2,3,5]])

In [154]:
columns = ['Cost-effectiveness','Reliability','Economical use','Resistance','Design']

In [155]:
df = pd.DataFrame(phones,columns=columns,
                 index=['Nokia','Xiaomi','Samsung','Google Pixel','Honor','Iphone'])

In [156]:
df

Unnamed: 0,Cost-effectiveness,Reliability,Economical use,Resistance,Design
Nokia,5,4,5,1,3
Xiaomi,5,3,5,1,3
Samsung,2,5,2,1,2
Google Pixel,1,5,5,3,5
Honor,5,2,1,3,4
Iphone,1,5,2,3,5


In [157]:
dict_opt = dict(zip(columns,np.repeat('max',len(columns))))

In [158]:
to_delete = pareto_comparison(df,dict_opt)

Companies that shouldn't be included due to Pareto algorithm : set()


In [159]:
ideal_variant = ideal_dot(df.copy())

In [160]:
print('Ideal variant is a phone : {0}'.format(ideal_variant))

Ideal variant is a phone : Google Pixel
