In [1]:
import numpy as np
import pandas as pd
from scipy.spatial import distance

# data
data = {
    'Price': [100000, 800000, 650000, 700000,
              860000, 730000, 400000, 870000,
              780000, 400000],
    'Distance': [16000, 60000, 300000, 10000,
                 252000, 350000, 260000, 510000,
                 2000, 5000],
    'Emission': [300, 400, 1230, 300, 400, 104,
                 632, 221, 142, 267],
    'Performance': [60, 88, 90, 87, 83, 81, 72,
                     91, 90, 93],
    'Mileage': [76, 89, 89, 57, 79, 84, 78, 99,
                 97, 99]
}

# Creating dataset
df = pd.DataFrame(data, columns=['Price', 'Distance',
                                 'Emission', 'Performance',
                                 'Mileage'])

# Calculate the Mahalanobis distance for each row
cov_matrix = np.cov(df.values, rowvar=False)
inv_cov_matrix = np.linalg.inv(cov_matrix)

mahalanobis_distances = []
for row in df.values:
    mahalanobis_dist = distance.mahalanobis(row, np.mean(df.values, axis=0), inv_cov_matrix)
    mahalanobis_distances.append(mahalanobis_dist)

df['MahalanobisDistance'] = mahalanobis_distances

# Display the dataframe
print(df)


    Price  Distance  Emission  Performance  Mileage  MahalanobisDistance
0  100000     16000       300           60       76             2.460846
1  800000     60000       400           88       89             1.605946
2  650000    300000      1230           90       89             2.600486
3  700000     10000       300           87       57             2.735447
4  860000    252000       400           83       79             1.424043
5  730000    350000       104           81       84             1.542937
6  400000    260000       632           72       78             1.465424
7  870000    510000       221           91       99             2.217056
8  780000      2000       142           90       97             2.006377
9  400000      5000       267           93       99             2.573532


In [2]:
import numpy as np
import pandas as pd
from scipy.spatial import distance


data = {'score': [91, 93, 72, 87, 86, 73, 68, 87, 78, 99, 95, 76, 84, 96, 76, 80, 83, 84, 73, 74],
        'hours': [16, 6, 3, 1, 2, 3, 2, 5, 2, 5, 2, 3, 4, 3, 3, 3, 4, 3, 4, 4],
        'prep': [3, 4, 0, 3, 4, 0, 1, 2, 1, 2, 3, 3, 3, 2, 2, 2, 3, 3, 2, 2],
        'grade': [70, 88, 80, 83, 88, 84, 78, 94, 90, 93, 89, 82, 95, 94, 81, 93, 93, 90, 89, 89]
        }

df = pd.DataFrame(data,columns=['score', 'hours', 'prep','grade'])

cov_matrix = np.cov(df.values, rowvar=False)
inv_cov_matrix = np.linalg.inv(cov_matrix)

mahalanobis_distances = []
for row in df.values:
    mahalanobis_dist = distance.mahalanobis(row, np.mean(df.values, axis=0), inv_cov_matrix)
    mahalanobis_distances.append(mahalanobis_dist)

df['MahalanobisDistance'] = mahalanobis_distances

# Display the dataframe
print(df)


    score  hours  prep  grade  MahalanobisDistance
0      91     16     3     70             4.062261
1      93      6     4     88             1.624588
2      72      3     0     80             2.202453
3      87      1     3     83             2.280627
4      86      2     4     88             1.956715
5      73      3     0     84             2.022514
6      68      2     1     78             2.069693
7      87      5     2     94             1.555594
8      78      2     1     90             1.285285
9      99      5     2     93             2.378618
10     95      2     3     89             1.991451
11     76      3     3     82             1.713189
12     84      4     3     95             1.676368
13     96      3     2     94             2.090047
14     76      3     2     81             1.249406
15     80      3     2     93             1.208101
16     83      4     3     93             1.422876
17     84      3     3     90             0.866172
18     73      4     2     89  

In [5]:
import numpy as np
import pandas as pd

# data
data = {
    'Price': [100000, 800000, 650000, 700000,
              860000, 730000, 400000, 870000,
              780000, 400000],
    'Distance': [16000, 60000, 300000, 10000,
                 252000, 350000, 260000, 510000,
                 2000, 5000],
    'Emission': [300, 400, 1230, 300, 400, 104,
                 632, 221, 142, 267],
    'Performance': [60, 88, 90, 87, 83, 81, 72,
                     91, 90, 93],
    'Mileage': [76, 89, 89, 57, 79, 84, 78, 99,
                 97, 99]
}

# Creating dataset
df = pd.DataFrame(data, columns=['Price', 'Distance',
                                 'Emission', 'Performance',
                                 'Mileage'])
print("df.values",df.values)
# Calculate the Mahalanobis distance for each row
mean_vector = np.mean(df.values, axis=0)
print("mean_vector",mean_vector)
cov_matrix = np.cov(df.values, rowvar=False)
print("covariance_mat",cov_matrix)
inv_cov_matrix = np.linalg.inv(cov_matrix)
print("inv_cov",inv_cov_matrix)

mahalanobis_distances = []
for row in df.values:
    print("row",row)
    diff = row - mean_vector
    mahalanobis_dist = np.sqrt(np.dot(np.dot(diff, inv_cov_matrix),diff.T))
    mahalanobis_distances.append(mahalanobis_dist)


df['MahalanobisDistance'] = mahalanobis_distances

# Display the dataframe
print(df)


df.values [[100000  16000    300     60     76]
 [800000  60000    400     88     89]
 [650000 300000   1230     90     89]
 [700000  10000    300     87     57]
 [860000 252000    400     83     79]
 [730000 350000    104     81     84]
 [400000 260000    632     72     78]
 [870000 510000    221     91     99]
 [780000   2000    142     90     97]
 [400000   5000    267     93     99]]
mean_vector [6.290e+05 1.765e+05 3.996e+02 8.350e+01 8.470e+01]
covariance_mat [[ 6.26544444e+10  1.83216667e+10 -7.93711111e+06  1.82944444e+06
   6.26333333e+05]
 [ 1.83216667e+10  3.29073889e+10  1.23727778e+07  1.97055556e+05
   5.66388889e+05]
 [-7.93711111e+06  1.23727778e+07  1.07403600e+05  1.31555556e+02
  -1.97022222e+02]
 [ 1.82944444e+06  1.97055556e+05  1.31555556e+02  1.06055556e+02
   6.21666667e+01]
 [ 6.26333333e+05  5.66388889e+05 -1.97022222e+02  6.21666667e+01
   1.68677778e+02]]
inv_cov [[ 5.66645493e-11 -3.43145245e-11  1.01150138e-08 -1.11915226e-06
   3.29096803e-07]
 [-3.431452