# Project 2 Health Anomoly Detection

In [2]:
import warnings
warnings.simplefilter('ignore')

In [4]:
# Importing libraries

import os 
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from scipy.io import loadmat

In [76]:
#Impoer libraries from pyod.models

from pyod.models.pca import PCA
from pyod.models.mcd import MCD
from pyod.models.ocsvm import OCSVM
from pyod.models.lof import LOF
from pyod.models.cblof import CBLOF
from pyod.models.knn import KNN
from pyod.models.hbos import HBOS
from pyod.models.abod import ABOD
from pyod.models.iforest import IForest
from pyod.models.feature_bagging import FeatureBagging

In [8]:
#For evaluation

from pyod.utils.utility import standardizer
from pyod.utils.utility import precision_n_scores
from sklearn.metrics import roc_auc_score

In [36]:
data = loadmat('C:\Sharath\DATA SETS\data\cardio.mat')
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X', 'y'])

# Exploring Mat file

In [135]:
from time import time
x = data['X']
y = data['y'].ravel()
outliers_fraction = np.count_nonzero(y) / len(y)
outliers_percentage = round(outliers_fraction * 100,4)

In [68]:
# 60% data for training and 40% for testing
random_state = 42
xtrain,xtest,ytrain,ytest = train_test_split(x, y, test_size=0.3,
                                                        random_state=42)

In [65]:
# standardizing data for processing
x_train_norm, x_test_norm = standardizer(xtrain,xtest)

In [78]:
classifiers = {'Angle-based Outlier Detector (ABOD)': ABOD(
        contamination=outliers_fraction),
        'Cluster-based Local Outlier Factor': CBLOF(
            contamination=outliers_fraction, check_estimator=False,
            random_state=random_state),
        'Feature Bagging': FeatureBagging(contamination=outliers_fraction,
                                          random_state=random_state),
        'Histogram-base Outlier Detection (HBOS)': HBOS(
            contamination=outliers_fraction),
        'Isolation Forest': IForest(contamination=outliers_fraction,
                                    random_state=random_state),
        'K Nearest Neighbors (KNN)': KNN(contamination=outliers_fraction),
        'Local Outlier Factor (LOF)': LOF(
            contamination=outliers_fraction),
        'Minimum Covariance Determinant (MCD)': MCD(
            contamination=outliers_fraction, random_state=random_state),
        'One-class SVM (OCSVM)': OCSVM(contamination=outliers_fraction),
        'Principal Component Analysis (PCA)': PCA(
            contamination=outliers_fraction, random_state=random_state),
    }

In [136]:
for clf_name, clf in classifiers.items():
        time0 = time()
        clf.fit(x_train_norm)
        time1 = time()
        duration = t1 - t0
        test_scores = clf.decision_function(x_test_norm)

        roc = round(roc_auc_score(ytest, test_scores), ndigits=4)
        prn = round(precision_n_scores(ytest, test_scores), ndigits=4)

        print('{clf_name} : \n ROC : {roc}\n precision @ rank n : {prn}'.format(
            clf_name=clf_name, roc=roc, prn=prn))
        print()

Angle-based Outlier Detector (ABOD) : 
 ROC : 0.5939
 precision @ rank n : 0.25

Cluster-based Local Outlier Factor : 
 ROC : 0.8673
 precision @ rank n : 0.6

Feature Bagging : 
 ROC : 0.6308
 precision @ rank n : 0.1833

Histogram-base Outlier Detection (HBOS) : 
 ROC : 0.8721
 precision @ rank n : 0.55

Isolation Forest : 
 ROC : 0.953
 precision @ rank n : 0.6167

K Nearest Neighbors (KNN) : 
 ROC : 0.7713
 precision @ rank n : 0.45

Local Outlier Factor (LOF) : 
 ROC : 0.6363
 precision @ rank n : 0.1667

Minimum Covariance Determinant (MCD) : 
 ROC : 0.8457
 precision @ rank n : 0.5167

One-class SVM (OCSVM) : 
 ROC : 0.9512
 precision @ rank n : 0.6167

Principal Component Analysis (PCA) : 
 ROC : 0.9658
 precision @ rank n : 0.6833

