# Cedit Card Fraud Detection

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# Import Dataset
df = pd.read_csv('../input/creditcardfraud/creditcard.csv')
df

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe().T

In [None]:
# target counts
sns.countplot(x = df.Class, data = df, palette="Greens_d")

In [None]:
(df[df['Class']==1]).shape

In [None]:
(df[df['Class']==0]).shape

In [None]:
# plotting the distributions
df.hist(figsize=(20,20))
plt.show()

In [None]:
# Correlation matrix
correlation_matrix = df.corr()
fig = plt.figure(figsize=(12,9))
sns.heatmap(correlation_matrix,vmax=0.8,square = True)
plt.show()

In [None]:
df.describe()

In [None]:
df.columns

In [None]:
X = df[['Time', 'V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10',
       'V11', 'V12', 'V13', 'V14', 'V15', 'V16', 'V17', 'V18', 'V19', 'V20',
       'V21', 'V22', 'V23', 'V24', 'V25', 'V26', 'V27', 'V28', 'Amount']]
y = df.Class

In [None]:
X.shape

In [None]:
y.shape

In [None]:
# splitting the data
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.1 ,random_state = 42)

Oversampling

In [None]:
# Oversampling the data to make the data balanced
from imblearn.over_sampling import SMOTE
ros = SMOTE(random_state=0)#, ratio={1:100000})
X_res, y_res = ros.fit_sample(X_train, y_train)

In [None]:
X_res.shape, y_res.shape

In [None]:
(y_res[y_res==1]).shape

# Random Forest

In [None]:
# Defining Classifier
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(verbose = 1, n_jobs=-1)

In [None]:
# Training
clf.fit(X_res,y_res)

In [None]:
# prediction
y_pred = clf.predict(X_test)

In [None]:
# CV Score
from sklearn.model_selection import cross_val_score
training_score = cross_val_score(clf, X_res, y_res, cv=5)

In [None]:
round(training_score.mean()*100)

Model Evaluation

In [None]:
# Model performances
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
accuracy_score(y_pred, y_test)
print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))

In [None]:
sns.heatmap(confusion_matrix(y_pred, y_test), annot=True)

In [None]:
# Plotting th Roc-auc curve
from sklearn.metrics import roc_curve

fpr, tpr, thresholds = roc_curve(y_test, y_pred)
plt.plot(fpr, tpr, linewidth=2)
plt.plot([0, 1], [0, 1], "k--")
plt.axis([-0.01, 1, 0, 1])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.figure(figsize=(20,20)); 
plt.show();

# Anamoly Detection

Detecting outliers with Isolation Forest and LocalOutlierFactor

In [None]:
# Importing Lib's
from sklearn.ensemble.iforest import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
    

In [None]:
# Defining Model
model = IsolationForest(n_estimators=100, max_samples=len(X), contamination=0.45,random_state=42, verbose=1)
model.fit(X)

In [None]:
# Predictions
y_p = model.predict(X)

In [None]:
y_p[y_p == 1] = 0
y_p[y_p == -1] = 1
n_errors = (y_p != y).sum()

Performances

In [None]:
print("{}: {}".format('Anamolies by ISOLATION FOREST',n_errors))
print("Accuracy Score :")
print(accuracy_score(y,y_p))
print("Classification Report :")
print(classification_report(y,y_p))

In [None]:
# Model
model = LocalOutlierFactor(n_neighbors=20, algorithm='auto', 
                                              leaf_size=30, metric='minkowski',
                                              p=2, metric_params=None, contamination=0.45, novelty = True)

In [None]:
# Training
model.fit(X)

In [None]:
# Prediction
y_p = model.predict(X)
y_p[y_p == 1] = 0
y_p[y_p == -1] = 1
n_errors = (y_p != y).sum()

In [None]:
# Performances
print("{}: {}".format('anamolies by Local Outlier factor',n_errors))
print("Accuracy Score :")
print(accuracy_score(y,y_p))
print("Classification Report :")
print(classification_report(y,y_p))

In [None]:
from IPython.core.display import Image, display
display(Image('../input/ppvvpp/New ss/abc (3).jpg', width=1200, unconfined=True))
    

In [None]:
from IPython.core.display import Image, display
display(Image('../input/ppvvpp/New ss/abc (4).jpg', width=1200, unconfined=True))
    

In [None]:
from IPython.core.display import Image, display
display(Image('../input/ppvvpp/New ss/abc (5).jpg', width=1200, unconfined=True))
    

In [None]:
from IPython.core.display import Image, display
display(Image('../input/ppvvpp/New ss/abc (6).jpg', width=1200, unconfined=True))
    

In [None]:
from IPython.core.display import Image, display
display(Image('../input/ppvvpp/New ss/abc (7).jpg', width=1200, unconfined=True))
    

In [None]:
from IPython.core.display import Image, display
display(Image('../input/ppvvpp/New ss/abc (8).jpg', width=1200, unconfined=True))
    

In [None]:
from IPython.core.display import Image, display
display(Image('../input/ppvvpp/New ss/abc (9).jpg', width=1200, unconfined=True))
    

In [None]:
from IPython.core.display import Image, display
display(Image('../input/ppvvpp/New ss/abc (10).jpg', width=1200, unconfined=True))
    

In [None]:
from IPython.core.display import Image, display
display(Image('../input/ppvvpp/New ss/abc (11).jpg', width=1200, unconfined=True))
    

In [None]:
from IPython.core.display import Image, display
display(Image('../input/ppvvpp/New ss/abc (12).jpg', width=1200, unconfined=True))
    