### Importing libraries and 'glass.csv' to a numpy array

In [None]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
import math

# Importing Heart.csv to a numpy array
data = pd.read_csv("glass.csv").to_numpy()
X, y = data[:,:9], data[:,9]

### Defining custom functions

In [2]:
def mahalanobis(inv_cov, X1, X2):
    diff = X1 - X2
    return np.sqrt(np.dot(np.dot(diff, inv_cov), diff.T))

def lrd(reach_matrix, neigh_dist):
    lrd_matrix = np.zeros((214))
    for i in range(214):
        sum = 0
        for j in range(1,3):
            sum += reach_matrix[i, neigh_dist[1][i,j]]
        lrd_matrix[i] = 2/sum
    return lrd_matrix
  
def lof(lrd_matrix, neigh_dist):
    lof_matrix = np.zeros((214))
    for i in range(214):
        sum = 0
        for j in range(1,3):
            sum += lrd_matrix[neigh_dist[1][i,j]]
        sum = sum/lrd_matrix[i]
        sum /= 2
        lof_matrix[i] = sum
    return lof_matrix

In [3]:
cov = np.cov(X.T)
inv_cov = np.linalg.inv(cov)
mu = np.mean(X, axis=0)
mahalanobis_dist = np.zeros((214))

for i in range(214):
    mahalanobis_dist[i] = mahalanobis(inv_cov, X[i], mu)

In [4]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X, y)

neigh_dist = knn.kneighbors(X)

reach_matrix = np.zeros((214,214))

for i in range(214):
    for j in range(214):
        reach_matrix[i,j] = max(neigh_dist[0][i,2], np.linalg.norm(X[i]-X[j]))
        
lrd_matrix = lrd(reach_matrix, neigh_dist)
lof_matrix = lof(lrd_matrix, neigh_dist)

lof_matrix_sorted = sorted(lof_matrix)
mahalanobis_sorted = sorted(mahalanobis_dist)

In [5]:
val1 = 0
val2 = np.sum(lof_matrix_sorted)

sigma_w_lof = []
sigma_w_mahalanobis = []

for i in range(213):
    w0 = i/214
    w1 = 1 - w0
    
    val1 += lof_matrix_sorted[i]
    val2 -= lof_matrix_sorted[i]
    
    avg1 = val1/(i+1)
    avg2 = val2/(213-i)
    
    # Otsu thresholding using LOF
    
    var1 = 0
    var2 = 0
    
    for j in range(i+1):
        var1 += (lof_matrix_sorted[j]-avg1)*(lof_matrix_sorted[j]-avg1)
    var1 /= (i+1)
    
    for j in range(i+1, 214):
        var2 += (lof_matrix_sorted[j]-avg2)*(lof_matrix_sorted[j]-avg2)
    var2 /= (213-i)
    
    sigma_w_lof.append(w0*var1 + w1*var2)
    
    # Otsu thresholding using Mahalanobis distance
    
    var1 = 0
    var2 = 0
    
    for j in range(i+1):
        var1 += (mahalanobis_sorted[j]-avg1)*(mahalanobis_sorted[j]-avg1)
    var1 /= (i+1)
    
    for j in range(i+1, 214):
        var2 += (mahalanobis_sorted[j]-avg2)*(mahalanobis_sorted[j]-avg2)
    var2 /= (213-i)
    
    sigma_w_mahalanobis.append(w0*var1 + w1*var2)


### Number of outliers when Otsu Thresholding is performed on LOF values

In [6]:
outliers_lof = 214 - sigma_w_lof.index(sorted(sigma_w_lof)[0])
outliers_lof
# comes out to be 2

2

### Number of outliers when Otsu Thresholding is performed on Mahalanobis values

In [7]:
outliers_mahalanobis = 214 - sigma_w_mahalanobis.index(sorted(sigma_w_mahalanobis)[0])
outliers_mahalanobis
# comes out to be 18

18