In [1]:
import numpy as np
import pandas as pd

card = pd.read_csv('creditcard.csv')

In [2]:
# Import various visulization packages

import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
from sklearn.preprocessing import RobustScaler, StandardScaler

rob_scaler = RobustScaler()

card['Scaled Amount'] = rob_scaler.fit_transform(card['Amount'].values.reshape(-1, 1))
card['Scaled Time'] = rob_scaler.fit_transform(card['Time'].values.reshape(-1, 1))

card.drop(['Amount', 'Time'], axis = 1, inplace = True)

In [29]:
duplicate = card.sample(frac = 0.1, random_state = 0, replace = True)

In [30]:
y = duplicate['Class']


In [31]:
X = duplicate.drop(['Class'], axis = 1)

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [13]:
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score, accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score

In [14]:
from sklearn.model_selection import GridSearchCV

In [33]:
log_reg_params = {"penalty": ['l1', 'l2'], 'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000]}
dr = LogisticRegression()
lr = GridSearchCV(dr, log_reg_params)
lr.fit(X, y)
log_reg = lr.best_estimator_





In [36]:
knears_params = {"n_neighbors": list(range(2,5,1)), 'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute']}

grid_knears = GridSearchCV(KNeighborsClassifier(), knears_params)
grid_knears.fit(X, y)
# KNears best estimator
knears_neighbors = grid_knears.best_estimator_

# Support Vector Classifier
svc_params = {'C': [0.5, 0.7, 0.9, 1], 'kernel': ['rbf', 'poly', 'sigmoid', 'linear']}
grid_svc = GridSearchCV(SVC(), svc_params)
grid_svc.fit(X, y)

# SVC best estimator
svc = grid_svc.best_estimator_

# DecisionTree Classifier
tree_params = {"criterion": ["gini", "entropy"], "max_depth": list(range(2,4,1)), 
              "min_samples_leaf": list(range(5,7,1))}
grid_tree = GridSearchCV(DecisionTreeClassifier(), tree_params)
grid_tree.fit(X, y)

# tree best estimator
tree_clf = grid_tree.best_estimator_





In [37]:
y_new = card['Class']
X_new = card.drop(['Class'], axis = 1)

In [38]:
y_predict = log_reg.predict(X_new)

print(classification_report(y_new, y_predict))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00    284315
           1       0.83      0.58      0.68       492

   micro avg       1.00      1.00      1.00    284807
   macro avg       0.91      0.79      0.84    284807
weighted avg       1.00      1.00      1.00    284807



In [39]:
confusion_matrix(y_new,  y_predict)

array([[284256,     59],
       [   205,    287]], dtype=int64)

In [41]:
y_predict1 = knears_neighbors.predict(X_new)

print(classification_report(y_new, y_predict1))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00    284315
           1       0.86      0.66      0.75       492

   micro avg       1.00      1.00      1.00    284807
   macro avg       0.93      0.83      0.87    284807
weighted avg       1.00      1.00      1.00    284807



In [42]:
confusion_matrix(y_new, y_predict1)

array([[284260,     55],
       [   167,    325]], dtype=int64)

In [43]:
y_predict2 = svc.predict(X_new)

print(classification_report(y_new, y_predict2))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00    284315
           1       0.84      0.80      0.82       492

   micro avg       1.00      1.00      1.00    284807
   macro avg       0.92      0.90      0.91    284807
weighted avg       1.00      1.00      1.00    284807



In [44]:
confusion_matrix(y_new, y_predict2)

array([[284239,     76],
       [    99,    393]], dtype=int64)

In [45]:
y_predict3 = tree_clf.predict(X_new)

print(classification_report(y_new, y_predict3))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00    284315
           1       0.80      0.68      0.74       492

   micro avg       1.00      1.00      1.00    284807
   macro avg       0.90      0.84      0.87    284807
weighted avg       1.00      1.00      1.00    284807



In [46]:
confusion_matrix(y_new, y_predict3)

array([[284231,     84],
       [   156,    336]], dtype=int64)