In [1]:
import pandas as pd

In [122]:
input_data_dict = {
    'K1': [10, 5, 3, 2, 1, 2, 3, 4, 6, 5],
    'K2': ['good', 'good', 'medium', 'medium', 'very good', 'good', 'bad', 'medium', 'medium', 'medium'],
    'K3': ['yes', 'no', 'no', 'no', 'yes', 'no', 'no', 'no', 'no', 'yes'],
    'K4': [24, 15, 16, 18, 20, 22, 16, 30, 18, 19],
    'K5': [3, 3, 5, 1, 2, 1, 1, 2, 3, 4]
}

input_df = pd.DataFrame.from_dict(input_data_dict, orient='index', columns=['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10'])

In [156]:
def normalize(input_df):
    
    df = input_df.copy()
    
    criteria_count = df.shape[0]
    
    _normalize_quality_criteria_rules = {
        1: {
            'very good': 0.9,
            'good': 0.7,
            'medium': 0.5,
            'bad': 0.3,
            'very bad': 0.1
        },
        2: {
            'yes': 0.67,
            'no': 0.33
        }
    }
    
    criteria_to_minimize = [3]
    criteria_to_maximize = [0, 4]
    
        
    def _normalize_quality_criteria():
        for crit_index in _normalize_quality_criteria_rules.keys():
            df.iloc[crit_index] = df.iloc[crit_index].apply(lambda x: _normalize_quality_criteria_rules[crit_index][x])
        
    def _minimize_selected_criteria():
        for crit_index in criteria_to_minimize:
            df.iloc[crit_index] = df.iloc[crit_index].apply(lambda x: min(df.iloc[crit_index]) / x)
    
        
    def _maximize_selected_criteria():
        for crit_index in criteria_to_maximize:
            df.iloc[crit_index] = df.iloc[crit_index].apply(lambda x: x / max(df.iloc[crit_index]))
            
    
    _normalize_quality_criteria()
    _minimize_selected_criteria()
    _maximize_selected_criteria()
    
    return df

In [427]:
def get_paretto(input_df):
    
    df = input_df.copy()
    
    optim_variants = []
    
    def variant_is_better_than(col1, col2):
        return all(col1 >= col2)
    
    def is_variant_optimal(df, current_variant):
        for variant in df:
            if current_variant == variant:
                continue
            if (variant_is_better_than(df[variant], df[current_variant])):
                print('{} is worse than {}'.format(current_variant, variant))
                return False
        return True
    
    for variant in df:
        if is_variant_optimal(df, variant):
            optim_variants.append(variant)
    
    return df[optim_variants]

In [452]:
def express_analisys(df, count_to_return):
    best_n_alternatives = df.apply(min, axis=0).sort_values(ascending=False)[:count_to_return]
    return df[best_n_alternatives.index]

In [408]:
def vector_scalar(input_df):
    
    df = input_df.copy()
    criteria_count = df.shape[0]
    
    std = ((df.sub(df.mean(axis=1), axis=0)).abs().sum(axis=1)).div(df.shape[1] * df.mean(axis=1))

    print('Mean:')
    print(df.mean(axis=1))
    print()
    print('Std:')
    print(std)
    print()
    print('Std sum:')
    print(sum(std))
    print()
    print('Criteria weights:')
    print(std / sum(std))
    print()
    weighted_estimations = (1 / df).mul(std / sum(std), axis=0)
    print('Weighted estimations:')
    print(weighted_estimations)
    print()
    print('Complex estimations:')
    print(weighted_estimations.sum())

In [439]:
def comparsion(input_df):
    df = input_df.copy()
    criteria_price = [4, 1, 1, 2, 3]
    
    max_weight = max(criteria_price)
    crits_weight = list(map(lambda x: max_weight + 1 - x, criteria_price))
    print('Criteria weights:')
    print(crits_weight)
    alt_est = []
    for i in range(df.shape[0]):
        print('S{} = {}'.format(i+1, (max(df.iloc[i, 0], df.iloc[i, 1]) / min(df.iloc[i, 0], df.iloc[i, 1]))))
        print('C{} = {}'.format(i+1, (max(df.iloc[i, 0], df.iloc[i, 1]) / min(df.iloc[i, 0], df.iloc[i, 1])) ** criteria_price[i]))

        alt_est.append((max(df.iloc[i, 0], df.iloc[i, 1]) / min(df.iloc[i, 0], df.iloc[i, 1])) ** criteria_price[i])

    D1_list = []
    D2_list = []
    for i in range(df.shape[0]):
        if df.iloc[i, 0] > df.iloc[i, 1]:
            D1_list.append(alt_est[i])
        elif df.iloc[i, 0] < df.iloc[i, 1]:
            D2_list.append(alt_est[i])
    D1 = 1
    D2 = 1
    for d in D1_list:
        D1 *= d
    for d in D2_list:
        D2 *= d
    print('D1: {}'.format(D1))
    print('D2: {}'.format(D2))
    if D1 > D2:
        print('First criteria is better')
    elif D2 > D1:
        print('Second criteria is better')
    else:
        print('Nothing')

In [425]:
normalized_df = normalize(input_df)
normalized_df

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10
K1,1.0,0.5,0.3,0.2,0.1,0.2,0.3,0.4,0.6,0.5
K2,0.7,0.7,0.5,0.5,0.9,0.7,0.3,0.5,0.5,0.5
K3,0.67,0.33,0.33,0.33,0.67,0.33,0.33,0.33,0.33,0.67
K4,0.625,1.0,0.9375,0.833333,0.75,0.681818,0.9375,0.5,0.833333,0.789474
K5,0.6,0.6,1.0,0.2,0.4,0.2,0.2,0.4,0.6,0.8


In [428]:
optim_pareto_df = get_paretto(normalized_df)
optim_pareto_df

X4 is worse than X2
X6 is worse than X2
X7 is worse than X2
X8 is worse than X1


Unnamed: 0,X1,X2,X3,X5,X9,X10
K1,1.0,0.5,0.3,0.1,0.6,0.5
K2,0.7,0.7,0.5,0.9,0.5,0.5
K3,0.67,0.33,0.33,0.67,0.33,0.67
K4,0.625,1.0,0.9375,0.75,0.833333,0.789474
K5,0.6,0.6,1.0,0.4,0.6,0.8


In [445]:
express_analized = express_analisys(optim_pareto_df, 4)
express_analized

X1     0.60
X2     0.33
X3     0.30
X5     0.10
X9     0.33
X10    0.50
dtype: float64


Unnamed: 0,X1,X10,X9,X2
K1,1.0,0.5,0.6,0.5
K2,0.7,0.5,0.5,0.7
K3,0.67,0.67,0.33,0.33
K4,0.625,0.789474,0.833333,1.0
K5,0.6,0.8,0.6,0.6


In [446]:
vector_scalar(express_analized)

Mean:
K1    0.650000
K2    0.600000
K3    0.500000
K4    0.811952
K5    0.650000
dtype: float64

Std:
K1    0.269231
K2    0.166667
K3    0.340000
K4    0.128967
K5    0.115385
dtype: float64

Std sum:
1.0202489655291818

Criteria weights:
K1    0.263887
K2    0.163359
K3    0.333252
K4    0.126407
K5    0.113095
dtype: float64

Weighted estimations:
          X1       X10        X9        X2
K1  0.263887  0.527775  0.439812  0.527775
K2   0.23337  0.326718  0.326718   0.23337
K3  0.497391  0.497391   1.00985   1.00985
K4  0.202252  0.160116  0.151689  0.126407
K5  0.188491  0.141368  0.188491  0.188491

Complex estimations:
X1     1.385391
X10    1.653367
X9     2.116564
X2     2.085897
dtype: float64


In [450]:
best_two = express_analisys(normalized_df, 2)

X1     0.60
X2     0.33
X3     0.30
X4     0.20
X5     0.10
X6     0.20
X7     0.20
X8     0.33
X9     0.33
X10    0.50
dtype: float64


In [448]:
comparsion(best_two)

Criteria weights:
[1, 4, 4, 3, 2]
S1 = 2.0
C1 = 16.0
S2 = 1.4
C2 = 1.4
S3 = 1.0
C3 = 1.0
S4 = 1.263157894736842
C4 = 1.595567867036011
S5 = 1.3333333333333335
C5 = 2.370370370370371
D1: 22.4
D2: 3.782086795937212
First criteria is better


In [451]:
comparsion(normalized_df[['X1', 'X2']])

Criteria weights:
[1, 4, 4, 3, 2]
S1 = 2.0
C1 = 16.0
S2 = 1.0
C2 = 1.0
S3 = 2.0303030303030303
C3 = 2.0303030303030303
S4 = 1.6
C4 = 2.5600000000000005
S5 = 1.0
C5 = 1.0
D1: 32.484848484848484
D2: 2.5600000000000005
First criteria is better


In [444]:
input_df

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10
K1,10,5,3,2,1,2,3,4,6,5
K2,good,good,medium,medium,very good,good,bad,medium,medium,medium
K3,yes,no,no,no,yes,no,no,no,no,yes
K4,24,15,16,18,20,22,16,30,18,19
K5,3,3,5,1,2,1,1,2,3,4
