# Porządkowanie liniowe 

Import potrzebnych bibliotek

In [1]:
import math
import numpy as np
import pandas as p
from sklearn.preprocessing import scale

---

### Wagi 

System wag to wektor $$ w = {w_1, w_2, .. w_n} $$ 

który spełnia warunki:

$$ w_i >= 0 $$ 
$$ \sum{w_i} = 1 $$

In [2]:
def verify_weights(weights):
    if sum(weights.values()) != 1:
        raise ValueError('Weights has to sum to 1')
    if any(w<0 for w in weights.values()):
        raise ValueError('Weights have to non negative')
    return True

---

### Zamiana nominanty i destymulanty na stymulantę

$$ symulanta = dystymulnta * -1 $$

![](Nom.png)

In [3]:
def nominant_to_stimulant(N):
    
    def calculate(x):
        if x > N:
            return 1 / (x-N+1)
        if x < N:
            return -1 / (x-N-1)
        return 1
    
    return calculate

def change_to_stimulants(df, columns):
    df = df.copy()
    
    for col in df:
        if columns[col] == 'min':
            df[col] = df[col].apply(lambda x: x*-1)          
        elif columns[col] == 'max':
            pass 
        elif isinstance(columns[col], float):
            df[col] = df[col].apply(nominant_to_stimulant(columns[col]))
        else:
            raise ValueError('Column type has to be "min" or "max" or float')
    
    return df

---

## Metoda sum rang 

![](Sum_of_ranks.jpg)

1) Zamiana nominanty i dystymulanty na stymulanty

2) Rangujemy obserwacje dla każdej zmiennej

In [4]:
def generate_ranks(df):
    df = df.copy()
    
    for col in df:
        df[col] = df[col].rank(na_option='bottom', ascending=False)
        
    return df

3) Wyliczamy miernik syntetyczny - średnia ważona rang

In [5]:
def calculate_weighted_average(df, weights = None):
    df = df.copy()
    
    if weights is None:
        weights = {col: 1/len(df.columns) for col in df}
    
    for col in df:
        df[col] = df[col].apply(lambda x: x*weights[col])
        
    return df.sum(axis=1)

4) Pełna funkcja - Metody sum rang 

In [41]:
def sum_of_ranks(df, columns, weights):
    
    # 1.
    df_stimulants = change_to_stimulants(df, columns)
    
    # 2.
    df_ranks = generate_ranks(df_stimulants)
    
    # 3.
    indicators = calculate_weighted_average(df_ranks, weights)
    
    return indicators.sort_values(ascending=True).rename('Sum of ranks')

---

## Mrtoda standaryzowanych sum

![](Standardized_sum_method.jpg)

1) Zamiana nominaty i destymulanty na stymulantę - istniejąca funkcja change_to_stimulants

2) Standaryzacja wszystkich danych 

In [7]:
def standardize(df):
    df_scaled = p.DataFrame(scale(df), index=df.index, columns=df.columns)
    return df_scaled

3) Sumujemy oszacowania uzyskane w ramach obiktów - średnia ważona - istniejąca funkcja calculate_weighted_average

4) Finalna standaryzacja uzyskanych wartości według wzoru 

$$ s_i = \frac{ s_i - min(s) }{ max\{ s_i - min(s) \} } $$

In [8]:
def final_standardize(df_weights):
    df_weights = df_weights.copy()
    
    minimum = min(df_weights)
    maximum = max(df_weights.apply(lambda x: x-minimum))
    
    return df_weights.apply(lambda x: (x - minimum) / maximum)

5) Pełna Funkcja metody standaryzacji sum

In [42]:
def standardized_sums(df, columns, weights):
    
    # 1.
    df_stimulants = change_to_stimulants(df, columns)
    
    # 2.
    df_standardized = standardize(df_stimulants)
    
    # 3.
    df_weights = calculate_weighted_average(df_standardized, weights)
    
    # 4.
    indicators = final_standardize(df_weights)
    
    return indicators.sort_values(ascending=False).rename('Standardized sums')

## Metoda Hellwiga 

1) Zamina nominanty i destymulany na stymulanty

2) Standaryzacja wszytskich danych - funkcja standardize

3.1) Uwzględnienie wag

In [10]:
def apply_weights(df, weights):
    df = df.copy()
    
    for col in df:
        df[col] *= weights[col]
    
    return df

3.2) Wyznaczanie wzorca  

In [11]:
def calculate_pattern(df):
    return df.max()

4) Wyznaczanie odległości od wzorca 

$$ d_i = \sqrt{\sum_i{(x_{ij}-d_j)^2}} $$

In [12]:
def calculate_distances(df, pattern):
    return ((df - pattern) ** 2).sum(axis=1) ** (1/2)

5) Wyznaczenie maxymalniej odległości od wzorca 

In [13]:
def calculate_max_distance(distances):
    return np.mean(distances) + 2 * np.std(distances)

6) Wyliczanie wartości miary dla obiektu 

In [43]:
def calculate_hellwig(df, columns, weights):
    
    # 1.
    df_stimulants = change_to_stimulants(df, columns)
    
    # 2.
    df_standalized = standardize(df_stimulants)
    
    # 3.1.
    df_weighted = apply_weights(df_standalized, weights)
    
    # 3.2.
    pattern = calculate_pattern(df_weighted)
    
    # 4.
    distances = calculate_distances(df_weighted, pattern)
    
    # 5.
    max_distance = calculate_max_distance(distances)
    
    indicators = 1 - distances/max_distance
    
    return indicators.sort_values(ascending=False).rename('Hellwig')

---

## Metoda TOPSIS

1) Zamina nominanty i destymulany na stymulanty

2) Normalizacja danych oraz wartości nominat 

$$ z_{ij} = \frac{ x_{ij} }{ \sqrt{ \sum_{i=1}^{m}{x_{ij}^2} } } $$

In [15]:
def normalize(df):
    df = df.copy()
    
    for col in df:
        divisor = (sum(df[col] ** 2)) ** (1/2)
        df[col] /= divisor
        
    return df

3) Uwzględnienie wag

4) Wyznaczenie wzorca i antywzorca

In [16]:
def calculate_antipattern(df):
    return df.min()

5) Wyliczenie odległości od wzorca i antywzorca

6) Wyznaczenie współczynnika rangowego
$$ R_i = \frac{d_i^-}{d_i^- + d_i^+} $$

In [17]:
def calculate_rank_coefficient(distance_pattern, distance_antipattern):
    return distance_antipattern / (distance_antipattern + distance_pattern)

In [44]:
def calculate_TOPSIS(df, columns, weights):
    
    # 1.
    df_stimulants = change_to_stimulants(df, columns)
    
    # 2.
    df_normalized = normalize(df_stimulants)
    
    # 3.
    df_weighted = apply_weights(df_normalized, weights)
    
    # 4.
    pattern = calculate_pattern(df_weighted)
    antipattern = calculate_antipattern(df_weighted)
    
    # 5.
    distances_pattern = calculate_distances(df_weighted, pattern)
    distances_antipattern = calculate_distances(df_weighted, antipattern)
    
    # 6.
    indicators = calculate_rank_coefficient(distances_pattern, distances_antipattern)
    
    return indicators.sort_values(ascending=False).rename('TOPSIS')

---

# Przykłady 

In [45]:
columns = {
    'Zawartość alk.': 6.0, 
    'Cena': 'min', 
    'Dostępność': 'max', 
    'Znajomość': 'max', 
    'Preferencje': 'max'
}

df = p.read_csv("beer.csv", sep=';', index_col=0)

weights = {
    'Zawartość alk.': 0.2 , 
    'Cena': 0.2, 
    'Dostępność': 0.2, 
    'Znajomość': 0.2, 
    'Preferencje': 0.2}

In [46]:
sum_of_ranks(df, columns, weights)

Marka
Tyskie           5.2
Kasztelan        5.9
Zywiec           6.0
Perła            6.7
Harnas           6.8
Żubr             8.0
Warka            8.5
Lech             8.6
Carlsberg        8.6
Desperados       9.9
Heineken        10.0
Tatra Pils      10.5
Łomża           11.9
Specjal         13.1
Okocim mocne    13.8
Redds           14.3
Wojak           14.7
Somersby        15.2
Tatra mocne     15.9
Debowe mocne    16.4
Name: Sum of ranks, dtype: float64

In [47]:
standardized_sums(df, columns, weights)

Marka
Zywiec          1.000000
Kasztelan       0.945604
Perła           0.864830
Tyskie          0.846348
Harnas          0.819757
Żubr            0.739338
Carlsberg       0.715129
Warka           0.627205
Lech            0.618597
Łomża           0.559007
Tatra Pils      0.554094
Desperados      0.528954
Heineken        0.433429
Okocim mocne    0.392488
Redds           0.199985
Tatra mocne     0.175540
Debowe mocne    0.140617
Somersby        0.115532
Wojak           0.110556
Specjal         0.000000
Name: Standardized sums, dtype: float64

In [48]:
calculate_hellwig(df, columns, weights)

Marka
Kasztelan       0.768087
Zywiec          0.719130
Perła           0.677431
Tyskie          0.628033
Harnas          0.546528
Żubr            0.499118
Warka           0.492610
Lech            0.463128
Carlsberg       0.452628
Łomża           0.420213
Heineken        0.358702
Okocim mocne    0.353301
Tatra Pils      0.338975
Desperados      0.310622
Redds           0.245092
Tatra mocne     0.217864
Debowe mocne    0.206264
Somersby        0.204260
Wojak           0.139016
Specjal        -0.017852
Name: Hellwig, dtype: float64

In [49]:
calculate_TOPSIS(df, columns, weights)

Marka
Zywiec          0.836912
Kasztelan       0.800064
Perła           0.669891
Tyskie          0.628023
Harnas          0.543614
Carlsberg       0.502245
Lech            0.501382
Żubr            0.491145
Desperados      0.486037
Warka           0.478746
Heineken        0.471256
Tatra Pils      0.437687
Łomża           0.437408
Redds           0.393043
Okocim mocne    0.390117
Somersby        0.374240
Tatra mocne     0.302149
Specjal         0.296703
Debowe mocne    0.295903
Wojak           0.265948
Name: TOPSIS, dtype: float64

## Podsumowanie

In [57]:
methods = [sum_of_ranks, standardized_sums, calculate_hellwig, calculate_TOPSIS]
names = [f.__name__ for f in methods]

df_summary = p.DataFrame(index=df.index)

for f in methods:
    df_summary = df_summary.merge(f(df, columns, weights), left_index=True, right_index=True)

df_summary    

Unnamed: 0_level_0,Sum of ranks,Standardized sums,Hellwig,TOPSIS
Marka,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Zywiec,6.0,1.0,0.71913,0.836912
Desperados,9.9,0.528954,0.310622,0.486037
Kasztelan,5.9,0.945604,0.768087,0.800064
Wojak,14.7,0.110556,0.139016,0.265948
Tyskie,5.2,0.846348,0.628033,0.628023
Heineken,10.0,0.433429,0.358702,0.471256
Warka,8.5,0.627205,0.49261,0.478746
Łomża,11.9,0.559007,0.420213,0.437408
Lech,8.6,0.618597,0.463128,0.501382
Perła,6.7,0.86483,0.677431,0.669891
