In [6]:
import random
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.neighbors import LocalOutlierFactor
from sklearn.ensemble import IsolationForest
from pyod.models import abod

In [2]:
n = 284807 - 1 #number of records in file (excludes header)
s = 10000 #desired sample size
skip = sorted(random.sample(range(1,n+1),n-s)) #the 0-indexed header will not be included in the skip list

data = pd.read_csv('creditcard.csv', skiprows=skip)
X = data.drop('Class', axis=1)
print(str(X.shape) + '\n')
print(np.unique(data['Class'], return_counts=True))
print('\nInbalance ratio (class0/class1) : ' + str(492/284315)+'\n')

data.head(2)

(10001, 30)

(array([0, 1], dtype=int64), array([9986,   15], dtype=int64))

Inbalance ratio (class0/class1) : 0.0017304750013189597



Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,7,-0.644269,1.417964,1.07438,-0.492199,0.948934,0.428118,1.120631,-3.807864,0.615375,...,1.943465,-1.015455,0.057504,-0.649709,-0.415267,-0.051634,-1.206921,-1.085339,40.8,0
1,22,-2.074295,-0.121482,1.322021,0.410008,0.295198,-0.959537,0.543985,-0.104627,0.475664,...,-0.403639,-0.227404,0.742435,0.398535,0.249212,0.274404,0.359969,0.243232,26.43,0


# LocalOutlierFactor

In [3]:
lof = LocalOutlierFactor(n_neighbors=20, contamination=0.002, n_jobs=-1)
y_pred = lof.fit_predict(X)

# convert label
y_pred[y_pred == 1] = 0
y_pred[y_pred == -1] = 1

print(np.unique(y_pred, return_counts=True))

print(metrics.classification_report(y_true=data['Class'], y_pred=y_pred))

(array([0, 1]), array([9981,   20], dtype=int64))
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9986
           1       0.00      0.00      0.00        15

    accuracy                           1.00     10001
   macro avg       0.50      0.50      0.50     10001
weighted avg       1.00      1.00      1.00     10001



# IsolationForest

In [4]:
IF = IsolationForest(contamination=0.005)
y_pred = IF.fit_predict(X)

# convert label
y_pred[y_pred == 1] = 0
y_pred[y_pred == -1] = 1

print(np.unique(y_pred, return_counts=True))

print(metrics.classification_report(y_true=data['Class'], y_pred=y_pred))



(array([0, 1]), array([9951,   50], dtype=int64))
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9986
           1       0.12      0.40      0.18        15

    accuracy                           0.99     10001
   macro avg       0.56      0.70      0.59     10001
weighted avg       1.00      0.99      1.00     10001



# ABOD

In [5]:
abod_od = abod.ABOD(n_neighbors=20, contamination=0.002)
y_pred = abod_od.fit_predict(X)

print(np.unique(y_pred, return_counts=True))

print(metrics.classification_report(y_true=data['Class'], y_pred=y_pred))



(array([0, 1]), array([9981,   20], dtype=int64))
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      9986
           1       0.05      0.07      0.06        15

    accuracy                           1.00     10001
   macro avg       0.52      0.53      0.53     10001
weighted avg       1.00      1.00      1.00     10001

