In [16]:
# main libraries
import pandas as pd
import numpy as np
import time

# sklearn libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import normalize
from sklearn.metrics import confusion_matrix,accuracy_score,precision_score,recall_score,f1_score,classification_report
import joblib
from sklearn.preprocessing import StandardScaler

#Keras libraries
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM

In [2]:
df = pd.read_csv('D:\myProject\project\creditcard.csv')
df.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [3]:
df.shape

(284807, 31)

In [5]:
#0 = non fraud and 1 = fraud; calculate the proportion of each type.
All = df.shape[0]
fraud = df[df['Class'] == 1]
nonFraud = df[df['Class'] == 0]

frauds : 0.1727485630620034 %
non frauds : 99.82725143693798 %


In [6]:
# Standardizing the features
df['Vamount'] = StandardScaler().fit_transform(df['Amount'].values.reshape(-1,1))
df['Vtime'] = StandardScaler().fit_transform(df['Time'].values.reshape(-1,1))

df = df.drop(['Time','Amount'], axis = 1)
df.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V22,V23,V24,V25,V26,V27,V28,Class,Vamount,Vtime
0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,0.090794,...,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,0,0.244964,-1.996583
1,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,-0.166974,...,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,0,-0.342475,-1.996583
2,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,0.207643,...,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,0,1.160686,-1.996562
3,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,-0.054952,...,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,0,0.140534,-1.996562
4,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,0.753074,...,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,0,-0.073403,-1.996541


In [7]:
X = df.drop(['Class'], axis = 1)
y = df['Class']

# Prepare the data

In [9]:
# Shuffle the data before creating the subsamples
df = df.sample(frac=1)

frauds = df[df['Class'] == 1]
non_frauds = df[df['Class'] == 0]

new_df = pd.concat([non_frauds, frauds])
# Shuffle dataframe rows
new_df = new_df.sample(frac=1, random_state=42)

new_df.head()

Unnamed: 0,V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,...,V22,V23,V24,V25,V26,V27,V28,Class,Vamount,Vtime
57662,0.975173,-0.399043,1.097372,1.226934,-0.33057,1.859953,-1.020192,0.831408,1.327835,-0.386771,...,0.076821,0.176523,-1.020961,0.052343,-0.319662,0.1312,0.015249,0,-0.349231,-0.985697
177323,-2.101217,-0.943018,0.149028,-2.993337,-0.805064,-0.344694,1.253764,0.15101,1.062363,-2.476512,...,0.760655,0.082654,0.64728,0.383357,-0.345808,-0.022971,-0.036708,0,1.054217,0.5963
253291,-4.249949,4.452832,-2.384361,-2.22105,0.610058,-1.849599,2.110387,-0.797302,3.548006,6.052674,...,0.79786,-0.177738,-0.081775,0.505965,-0.046863,0.601072,-0.613889,0,-0.347392,1.292222
172727,1.894274,-0.485439,-1.761262,0.462008,2.0798,4.163423,-0.868749,1.079294,1.021814,0.00095,...,-1.342503,0.465605,0.617168,-0.287021,-1.085852,0.071345,-0.034373,0,-0.26747,0.555995
1246,-0.648844,1.172894,0.940058,-0.487177,0.726667,0.356874,0.329655,-0.670178,-0.635306,-0.091041,...,-0.916087,-0.111763,-1.035125,0.009053,0.141971,0.311876,0.102662,0,-0.335278,-1.976304


In [10]:
# prepare the data
features = new_df.drop(['Class'], axis = 1)
labels = pd.DataFrame(new_df['Class'])

feature_array = features.values
label_array = labels.values

In [67]:
# splitting the faeture array and label array keeping 80% for the trainnig sets
X_train,X_test,y_train,y_test = train_test_split(feature_array,label_array,test_size=0.20)

# normalize: Scale input vectors individually to unit norm (vector length).
X_train=normalize(X_train)
X_test=normalize(X_test)

# LSTM

In [68]:
import tensorflow as tf
model = Sequential()

In [69]:
print(X_train,y_train)
X_train = np.expand_dims(X_train, 1)
print(X_train.shape,y_train.shape)

[[ 0.45941944 -0.22558894 -0.44957494 ... -0.01354423  0.0863179
   0.29446379]
 [ 0.00562537  0.22786346  0.03804224 ...  0.02614854 -0.10974677
   0.48793587]
 [-0.38743517  0.3153059   0.01103683 ... -0.11097851 -0.03312617
  -0.10487478]
 ...
 [ 0.4219756   0.12511409  0.10391058 ...  0.01091565 -0.11152117
  -0.46203994]
 [-0.30511563  0.25123746  0.4813758  ... -0.10090071 -0.06067632
  -0.21904617]
 [ 0.46727931  0.08354431 -0.54867266 ... -0.0131828  -0.07593817
   0.31955539]] [[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]
(227845, 1, 30) (227845, 1)


In [75]:
#Set up a 5 layer network with last layer being the output layer
#First layer has input shape as number of columns in X_train;Each layer has dropout at 0.2 (20% of data used at each layer)
#Last layer is with softmax activation
model.add(LSTM(50,return_sequences=True,input_shape=(X_train.shape[1:])))
model.add(Dropout(0.2))

model.add(LSTM(50,return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(50,return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(50,return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(50,return_sequences=True))
model.add(Dropout(0.2))

model.add(Dense(256))
model.add(Dropout(0.2))

model.add(Dense(128))
model.add(Dropout(0.2))

model.add(Dense(64))
model.add(Dropout(0.2))

model.add(Dense(16))
model.add(Dropout(0.2))

model.add(Dense(1, activation='softmax'))

In [76]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_30 (LSTM)              (None, 1, 50)             16200     
                                                                 
 dropout_36 (Dropout)        (None, 1, 50)             0         
                                                                 
 lstm_31 (LSTM)              (None, 1, 50)             20200     
                                                                 
 dropout_37 (Dropout)        (None, 1, 50)             0         
                                                                 
 lstm_32 (LSTM)              (None, 1, 50)             20200     
                                                                 
 dropout_38 (Dropout)        (None, 1, 50)             0         
                                                                 
 lstm_33 (LSTM)              (None, 1, 50)            

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])#Fit the compiled model on training data.
print(X_train.shape,y_train.shape)
model.fit(X_train,y_train,batch_size=30,epochs=30)

(227845, 1, 30) (227845, 1)
Epoch 1/30
Epoch 2/30
 862/7595 [==>...........................] - ETA: 1:48 - loss: 15.2239 - accuracy: 0.0017

In [None]:
X_test = np.expand_dims(X_test, 1)
y_pred=model.predict(X_test)

In [None]:
print(confusion_matrix(y_test,y_predict))
print(recall_score(y_test, new_y_pred))
print(accuracy_score(y_test, new_y_pred))
print(precision_score(y_test, new_y_pred))
print(f1_score(y_test, new_y_pred))