## Credit Card Fraud Detection(Unbalanced Data)

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [3]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv1D,MaxPool1D,Flatten,Dense,Dropout,BatchNormalization

In [4]:
data = pd.read_csv('creditcard.csv')

In [5]:
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [6]:
# Checking null values are present?
data.isnull().sum()
# as you can see no null values will be there

Time      0
V1        0
V2        0
V3        0
V4        0
V5        0
V6        0
V7        0
V8        0
V9        0
V10       0
V11       0
V12       0
V13       0
V14       0
V15       0
V16       0
V17       0
V18       0
V19       0
V20       0
V21       0
V22       0
V23       0
V24       0
V25       0
V26       0
V27       0
V28       0
Amount    0
Class     0
dtype: int64

In [7]:
data.shape

(284807, 31)

In [8]:
# Total Non-fraud and fraud counts

data['Class'].value_counts()

# output => Non-fraud cases = 284315
#             Fraud cases   = 492
# Totaly Unbalanced Data

0    284315
1       492
Name: Class, dtype: int64

### Conversion into Balanced Data

In [9]:
nonfraud = data[data['Class']==0]
fraud = data[data['Class']==1]

In [10]:
nonfraud.shape,fraud.shape

((284315, 31), (492, 31))

In [11]:
nonfraud = nonfraud.sample(fraud.shape[0])
nonfraud.shape

(492, 31)

In [12]:
data = fraud.append(nonfraud,ignore_index=1)

In [13]:
data.shape

(984, 31)

In [14]:
data

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,406.0,-2.312227,1.951992,-1.609851,3.997906,-0.522188,-1.426545,-2.537387,1.391657,-2.770089,...,0.517232,-0.035049,-0.465211,0.320198,0.044519,0.177840,0.261145,-0.143276,0.00,1
1,472.0,-3.043541,-3.157307,1.088463,2.288644,1.359805,-1.064823,0.325574,-0.067794,-0.270953,...,0.661696,0.435477,1.375966,-0.293803,0.279798,-0.145362,-0.252773,0.035764,529.00,1
2,4462.0,-2.303350,1.759247,-0.359745,2.330243,-0.821628,-0.075788,0.562320,-0.399147,-0.238253,...,-0.294166,-0.932391,0.172726,-0.087330,-0.156114,-0.542628,0.039566,-0.153029,239.93,1
3,6986.0,-4.397974,1.358367,-2.592844,2.679787,-1.128131,-1.706536,-3.496197,-0.248778,-0.247768,...,0.573574,0.176968,-0.436207,-0.053502,0.252405,-0.657488,-0.827136,0.849573,59.00,1
4,7519.0,1.234235,3.019740,-4.304597,4.732795,3.624201,-1.357746,1.713445,-0.496358,-1.282858,...,-0.379068,-0.704181,-0.656805,-1.632653,1.488901,0.566797,-0.010016,0.146793,1.00,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
979,127516.0,-0.611220,1.385288,1.130830,3.042945,1.426461,-0.117286,1.206899,0.008731,-2.417095,...,0.030497,-0.042989,-0.442153,0.054156,1.008005,0.206303,-0.124749,-0.077904,3.79,0
980,80561.0,0.894258,-0.327235,-0.166174,1.135400,-0.030057,0.119024,0.240464,0.094359,0.039812,...,0.099504,0.059479,-0.289118,-0.303268,0.692327,-0.256227,-0.012894,0.020037,151.51,0
981,126965.0,1.869709,-0.115020,-1.790085,1.232976,0.513432,-0.680475,0.573279,-0.314851,0.198695,...,0.141929,0.275398,-0.045358,0.565499,0.344915,-0.551500,-0.037231,-0.034268,106.40,0
982,154182.0,1.365868,-1.794915,-2.958424,-0.229897,-0.033721,-1.077060,1.087094,-0.649714,-1.495915,...,0.205736,0.216725,-0.533729,-0.289763,0.349186,0.976984,-0.205007,-0.027694,454.96,0


In [15]:
X = data.drop(['Class'],axis=1)
y  =data['Class']

In [16]:
X.shape,y.shape

((984, 30), (984,))

In [17]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.20,random_state=0,stratify=y)

In [18]:
X_train.shape,X_test.shape

((787, 30), (197, 30))

### Standardization [To remove too much variance in columns values ]


In [19]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test =scaler.transform(X_test)

In [20]:
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [21]:
X_train.shape,X_test.shape

((787, 30), (197, 30))

In [22]:
X_train = X_train.reshape(787, 30,1)
X_test = X_test.reshape(197,30,1)

### Build CNN Model

In [23]:
model = Sequential()
model.add(Conv1D(filters=32,kernel_size=2,activation='relu',input_shape=(30,1)))
model.add(BatchNormalization())
model.add(Dropout(0.30))
          
model.add(Conv1D(filters=64,kernel_size=2,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.50))
          
model.add(Flatten())
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.50))

model.add(Dense(1,activation='sigmoid'))



In [24]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 29, 32)            96        
_________________________________________________________________
batch_normalization (BatchNo (None, 29, 32)            128       
_________________________________________________________________
dropout (Dropout)            (None, 29, 32)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 28, 64)            4160      
_________________________________________________________________
batch_normalization_1 (Batch (None, 28, 64)            256       
_________________________________________________________________
dropout_1 (Dropout)          (None, 28, 64)            0         
_________________________________________________________________
flatten (Flatten)            (None, 1792)              0

In [25]:
from tensorflow.keras.optimizers import Adam

In [27]:
model.compile(optimizer=(Adam(lr=0.0001)),loss='binary_crossentropy',metrics=['accuracy'])

In [28]:
history = model.fit(X_train,y_train,epochs=20,validation_data=(X_test,y_test))

Train on 787 samples, validate on 197 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Here we achieve accuracy of 91.3%