In [None]:
# -*- coding: utf-8 -*-
"""
Created on Fri Apr 14 18:39:53 2023

@author: tarandeep singh gujral 
"""
''' IMPORTING MODULES '''

import numpy as np 
import pandas as pd 
import sklearn 
import scipy 
import matplotlib.pyplot as plt 
import seaborn as sns
import seaborn as sns
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM
from pylab import rcParams
rcParams['figure.figsize']= 14, 8
LABELS = ["Normal", "Fraud"]

''' Connected to .csv file '''

df = pd.read_csv(r"f:\Projects\Credit Card Fraud Detection\creditcard.csv", sep=',')
print()
print("Top 5 lines of creditcard.csv dataset\n")
print(df.head())

''' INFO ABOUT THE DATASET'''
print(df.info()) 

'''EXPLORATORY DATA ANALYSIS
    -> CHECKING NULL VALUES'''
    
print(df.isnull().values.any())

count = pd.value_counts(df['Class'],sort=True)
count.plot(kind="bar",rot=0)
plt.title("Transaction Class Details")
plt.xticks(range(2),LABELS)
plt.ylabel("Frequency")
plt.xlabel("Class")

''' GET THE FRAUD AND NORMAL DATASET'''
fraud = df[df['Class']==1]
normal = df[df['Class']==0]

print(fraud.shape, normal.shape)

print(fraud.Amount.describe())
print(normal.Amount.describe())

f, (ax1,ax2) = plt.subplots(2,1,sharex=True)
f.suptitle("AMOUNT PER TRANSCATION PER CLASS")
bins = 50
ax1.hist(fraud.Amount,bins=bins) 
ax1.set_title("Fraud")
ax2.hist(normal.Amount,bins=bins) 
ax2.set_title("Normal")
plt.xlabel("Amount $")
plt.ylabel("Number of Transactions")
plt.xlim((0,20000))
plt.yscale("log")
plt.show()

f, (ax1,ax2) = plt.subplots(2,1,sharex=True)
f.suptitle("NUMBER OF TRANSACTIONS v/s AMOUNT BY CLASS")
ax1.scatter(fraud.Time, fraud.Amount)
ax1.set_title("Fraud")
ax2.scatter(normal.Time, normal.Amount)
ax2.set_title("Normal")
plt.xlabel("Time (seconds)")
plt.ylabel("Amount")
plt.show()

''' TAKE SOME SAMPLE OF THE DATA'''
'''15% of data'''
df1=df.sample(frac=0.15, random_state=1)
print(df1.shape)

fraud1=df1[df1["Class"]==1]
normal1=df1[df1["Class"]==0]
outlier_frac = len(fraud1)/float(len(normal1))

print("OUTLIER FRACTION::",outlier_frac)
print("Fraud Cases:",len(fraud1))
print("Normal Cases:",len(normal1))

#GET CORRELATION OF EACH FEATURES IN DATASET 
corrmat=df1.corr()
top_corr = corrmat.index
plt.figure(figsize=(20,20))
g=sns.heatmap(df[top_corr].corr(),annot=True,cmap='RdYlGn')

columns=df1.columns.tolist()
columns = [c for c in columns if c not in ["Class"]]
target='Class'
state = np.random.RandomState(42)
X= df1[columns]
Y= df1[target]
X_out=state.uniform(low=0,high=1,size=(X.shape[0],X.shape[1]))
print(X.shape)
print(Y.shape)

classifiers = {
    "Isolation Forest":IsolationForest(n_estimators=100, max_samples=len(X), 
                                       contamination=outlier_frac,random_state=state, verbose=0),
    "Local Outlier Factor":LocalOutlierFactor(n_neighbors=20, algorithm='auto', 
                                              leaf_size=30, metric='minkowski',
                                              p=2, metric_params=None, contamination=outlier_frac),
    "Support Vector Machine":OneClassSVM(kernel='rbf', degree=3, gamma=0.1,nu=0.05, 
                                          max_iter=-1#, random_state=state
                                         )
}

print(type(classifiers))

n_outliers = len(fraud1)
for i, (clf_name,clf) in enumerate(classifiers.items()):
    #Fit the data and tag outliers
    if clf_name == "Local Outlier Factor":
        y_pred = clf.fit_predict(X)
        scores_prediction = clf.negative_outlier_factor_
    elif clf_name == "Support Vector Machine":
        clf.fit(X)
        y_pred = clf.predict(X)
    else:    
        clf.fit(X)
        scores_prediction = clf.decision_function(X)
        y_pred = clf.predict(X)
    #Reshape the prediction values to 0 for Valid transactions , 1 for Fraud transactions
    y_pred[y_pred == 1] = 0
    y_pred[y_pred == -1] = 1
    n_errors = (y_pred != Y).sum()
    # Run Classification Metrics
    print("{}: {}".format(clf_name,n_errors))
    print("Accuracy Score :")
    print(accuracy_score(Y,y_pred))
    print("Classification Report :")
    print(classification_report(Y,y_pred))