In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("creditcard.csv")

In [None]:
data

In [None]:
sample = data.sample(frac=0.2, random_state=0)

In [None]:
data.shape

In [None]:
sample.shape

In [None]:
fraud = data[data['Class']==1]
normal = data[data['Class']==0]

In [None]:
outlier = len(fraud)/len(normal)
outlier

In [None]:
data['Class'].value_counts().plot(kind= 'bar')

In [None]:
sns.distplot(data['Class'])

In [None]:
data['Class'].value_counts().plot(kind='bar')
plt.title('Distinction b/w fraud and normal')
plt.xlabel('class-fraud(2)/normal(0)')
plt.ylabel('number of transactions')f

In [None]:
fraud.Amount.describe()

In [None]:
data.info()

In [None]:
matrix = data.corr()
top_feature = matrix.index
plt.figure(figsize=[30,30])
sns.heatmap(data[top_feature].corr(), annot=True, cbar=False, cmap='RdYlGn')


In [None]:
independenFeature = [column for column in data.columns if column != 'Class']

In [None]:
dependentFeature = [column for column in data.columns if column == 'Class']

In [None]:
independenFeature

In [None]:
dependentFeature

In [None]:
X = sample[independenFeature]
y =sample[dependentFeature]

In [None]:
X = sample.drop(['Class'], axis = 1)
y = sample['Class']

In [None]:
classifiers = dict(IsolationForest = IsolationForest(n_estimators=100, max_samples=len(X), 
                                       contamination=outlier,random_state=0, verbose=0),
                 LocalOutlierFactor = LocalOutlierFactor(n_neighbors=20, algorithm='auto', 
                                              leaf_size=30, metric='minkowski',
                                              p=2, metric_params=None, contamination=outlier),
                 SupportVectorMachine = OneClassSVM(kernel='rbf', degree=3, gamma=0.1,nu=0.05, 
                                         max_iter=-1, random_state=0)
                  )

In [None]:
from sklearn.metrics import accuracy_score, classification_report

In [None]:
n_outliers = len(fraud)
for i, (clf_name,clf) in enumerate(classifiers.items()):
    #Fit the data and tag outliers
    if clf_name == "LocalOutlierFactor":
        y_pred = clf.fit_predict(X)
        scores_prediction = clf.negative_outlier_factor_
    elif clf_name == "SupportVectorMachine":
        clf.fit(X)
        y_pred = clf.predict(X)
    else:    
        clf.fit(X)
        scores_prediction = clf.decision_function(X)
        y_pred = clf.predict(X)
    #Reshape the prediction values to 0 for Valid transactions , 1 for Fraud transactions
    y_pred[y_pred == 1] = 0
    y_pred[y_pred == -1] = 1
    n_errors = (y_pred != y).sum()
    # Run Classification Metrics
    print("{}: {}".format(clf_name,n_errors))
    print("Accuracy Score :")
    print(accuracy_score(y,y_pred))
    print("Classification Report :")
    print(classification_report(y,y_pred))