In [1]:
import pandas as pd
import numpy as np
import seaborn as sns

# Entropy Method

The entropy weight method (EWM) is an important information weight model that has been extensively studied and practiced. Compared with various subjective weighting models, the biggest advantage of the EWM is the avoidance of the interference of human factors on the weight of indicators, thus enhancing the objectivity of the comprehensive evaluation results. In this study, the criteria will be weighted with the ENTROPY method and the alternatives will be ranked with TOPSİS.

In [2]:
df = sns.load_dataset('mpg')
df.head()

Unnamed: 0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year,origin,name
0,18.0,8,307.0,130.0,3504,12.0,70,usa,chevrolet chevelle malibu
1,15.0,8,350.0,165.0,3693,11.5,70,usa,buick skylark 320
2,18.0,8,318.0,150.0,3436,11.0,70,usa,plymouth satellite
3,16.0,8,304.0,150.0,3433,12.0,70,usa,amc rebel sst
4,17.0,8,302.0,140.0,3449,10.5,70,usa,ford torino


In [3]:
# Decision Matrix
df.index = df['name']
new_df = df.drop(['origin', 'name' ], axis = 1)
new_df.head()

Unnamed: 0_level_0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,70
buick skylark 320,15.0,8,350.0,165.0,3693,11.5,70
plymouth satellite,18.0,8,318.0,150.0,3436,11.0,70
amc rebel sst,16.0,8,304.0,150.0,3433,12.0,70
ford torino,17.0,8,302.0,140.0,3449,10.5,70


## Step 1 

$$p_{ij}=\frac{x_{ij}}{{\sum_{i = 1}^{m} x_{ij}}}$$

In [4]:
#Normalize Decision matrix

def norm(X):
    return X/X.sum()


norm_df = new_df.apply(norm)
norm_df.head()



Unnamed: 0_level_0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
chevrolet chevelle malibu,0.001923,0.003685,0.003988,0.003174,0.002964,0.001937,0.002314
buick skylark 320,0.001603,0.003685,0.004546,0.004029,0.003124,0.001856,0.002314
plymouth satellite,0.001923,0.003685,0.004131,0.003663,0.002906,0.001775,0.002314
amc rebel sst,0.00171,0.003685,0.003949,0.003663,0.002904,0.001937,0.002314
ford torino,0.001816,0.003685,0.003923,0.003419,0.002917,0.001695,0.002314


## Step 2 

$$e_{ij}= - k  {{\sum_{i = 1}^{m} p_{ij} ln(p_{ij})}}$$

m = number of alternatives 

$$k=\frac{1}{{\ln{(m)}}}$$

In [6]:
#Entropy Values

k = -(1/np.log(norm_df.shape[0]))

def entropy(X):
    return (X*np.log(X)).sum()*k

entropy = norm_df.apply(entropy)

#degree of differentiation

dod = 1 - entropy

w = dod/dod.sum()
w.sort_values(ascending = False)

displacement    0.369241
horsepower      0.208400
mpg             0.145895
cylinders       0.125612
weight          0.105799
acceleration    0.041895
model_year      0.003158
dtype: float64

# TOPSIS

It is a multi-criteria decision analysis method that is based on the concept that the chosen alternative should have the shortest geometric distance to the Positive Ideal Solution (PIS) and the longest geometric solution from the Negative Ideal Solution

In [7]:
new_df.head()

Unnamed: 0_level_0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
chevrolet chevelle malibu,18.0,8,307.0,130.0,3504,12.0,70
buick skylark 320,15.0,8,350.0,165.0,3693,11.5,70
plymouth satellite,18.0,8,318.0,150.0,3436,11.0,70
amc rebel sst,16.0,8,304.0,150.0,3433,12.0,70
ford torino,17.0,8,302.0,140.0,3449,10.5,70


## Step 1 

$$r_{ij}=\frac{x_{ij}}{\sqrt{\sum_{i = 1}^{m} x_{ij}^2}}$$

where $i = 1, 2, \ldots, m$ and $j = 1, 2, \ldots, n$.

In [8]:
def norm(X):
    return X/np.sqrt((X**2).sum())

norm_matrix = new_df.apply(norm)
norm_matrix.head()

Unnamed: 0_level_0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
chevrolet chevelle malibu,0.036416,0.070189,0.07005,0.058984,0.056869,0.038046,0.046108
buick skylark 320,0.030347,0.070189,0.079862,0.074865,0.059937,0.036461,0.046108
plymouth satellite,0.036416,0.070189,0.07256,0.068059,0.055766,0.034876,0.046108
amc rebel sst,0.03237,0.070189,0.069366,0.068059,0.055717,0.038046,0.046108
ford torino,0.034393,0.070189,0.068909,0.063521,0.055976,0.03329,0.046108


## Step 2

$$v_{ij} = w_j r_{ij}$$

where $i = 1, 2, \ldots, m$ and $j = 1, 2, \ldots, n$.

In [9]:
w_norm_matrix = norm_matrix*w
w_norm_matrix.head()

Unnamed: 0_level_0,mpg,cylinders,displacement,horsepower,weight,acceleration,model_year
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
chevrolet chevelle malibu,0.005313,0.008817,0.025865,0.012292,0.006017,0.001594,0.000146
buick skylark 320,0.004427,0.008817,0.029488,0.015602,0.006341,0.001528,0.000146
plymouth satellite,0.005313,0.008817,0.026792,0.014183,0.0059,0.001461,0.000146
amc rebel sst,0.004723,0.008817,0.025613,0.014183,0.005895,0.001594,0.000146
ford torino,0.005018,0.008817,0.025444,0.013238,0.005922,0.001395,0.000146


## Step 3

$$
\begin{align}
v_j^* &=
\begin{cases}
\max{(v_{ij})}, \text{ if} j \in J_1 \\
\min{(v_{ij})}, \text{ if} j \in J_2
\end{cases}
\\
v_j^- &=
\begin{cases}
\min{(v_{ij})}, \text{ if} j \in J_1 \\
\max{(v_{ij})}, \text{ if} j \in J_2
\end{cases}
\\
\end{align}
$$

where $i = 1, 2, \ldots, m$ and $j = 1, 2, \ldots, n$.

In [10]:
V_plus = w_norm_matrix.apply(max)
V_minus = w_norm_matrix.apply(min)
V_plus

mpg             0.013755
cylinders       0.008817
displacement    0.038335
horsepower      0.021748
weight          0.008826
acceleration    0.003294
model_year      0.000171
dtype: float64

## Step 4

$$
\begin{align}
S_i^* &= \sqrt{\sum_{j = 1}^n \left(v_{ij} - v^*_j\right)^2} \\
S_i^- &= \sqrt{\sum_{j = 1}^n \left(v_{ij} - v^-_j\right)^2} \\
\end{align}
$$

where $i = 1, 2, \ldots, m$ and $j = 1, 2, \ldots, n$.

We also calculate

$$
C^*_i = \frac{S_i^-}{S_i^* + S_i^-},\text{ where }i = 1, 2, \ldots, m
$$

In [11]:
S_plus = np.sqrt(((w_norm_matrix - V_plus)**2).apply(sum, axis = 1))
S_minus = np.sqrt(((w_norm_matrix - V_minus)**2).apply(sum, axis = 1))

In [13]:
p_score = S_minus/(S_plus + S_minus)
p_score.sort_values(ascending = False).head(20)

name
pontiac catalina                0.790950
chevrolet impala                0.788325
buick electra 225 custom        0.783470
buick estate wagon (sw)         0.781055
plymouth fury iii               0.779876
chrysler new yorker brougham    0.777923
ford galaxie 500                0.769354
pontiac grand prix              0.763569
mercury marquis                 0.758324
mercury marquis brougham        0.758314
chrysler cordoba                0.741217
pontiac grand prix lj           0.733877
chrysler newport royal          0.731166
pontiac catalina                0.726640
amc ambassador dpl              0.724021
pontiac catalina brougham       0.722612
pontiac catalina                0.722415
pontiac safari (sw)             0.720975
ford country squire (sw)        0.715314
ford country                    0.709291
dtype: float64

The best performing car was the pontiac catalina