In [198]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten,Dense,BatchNormalization,Dropout
from tensorflow.keras.layers import Conv1D,MaxPool1D
from tensorflow.keras.optimizers import Adam

In [199]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [200]:
print(pd.__version__)

1.1.3


In [201]:
df=pd.read_csv("E:/Fraud.csv")

In [202]:
df.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,PAYMENT,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,0,0
1,1,PAYMENT,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,0,0
2,1,TRANSFER,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,1,0
3,1,CASH_OUT,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,1,0
4,1,PAYMENT,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,0,0


In [203]:
df.shape

(6362620, 11)

In [204]:
#checking if the data has any null value
df.isnull().sum()

step              0
type              0
amount            0
nameOrig          0
oldbalanceOrg     0
newbalanceOrig    0
nameDest          0
oldbalanceDest    0
newbalanceDest    0
isFraud           0
isFlaggedFraud    0
dtype: int64

In [205]:
df["isFraud"].value_counts()

0    6354407
1       8213
Name: isFraud, dtype: int64

In [206]:
#Balancing the data
valid=df[df["isFraud"]==0]
fraud=df[df["isFraud"]==1]


In [207]:
#Sampling the data
valid=valid.sample(fraud.shape[0])
valid.shape

(8213, 11)

In [208]:
#Creating a balanced dataset
df1=fraud.append(valid,ignore_index=True)

In [209]:
df1.head

<bound method NDFrame.head of        step      type    amount     nameOrig  oldbalanceOrg  newbalanceOrig  \
0         1  TRANSFER    181.00  C1305486145         181.00            0.00   
1         1  CASH_OUT    181.00   C840083671         181.00            0.00   
2         1  TRANSFER   2806.00  C1420196421        2806.00            0.00   
3         1  CASH_OUT   2806.00  C2101527076        2806.00            0.00   
4         1  TRANSFER  20128.00   C137533655       20128.00            0.00   
...     ...       ...       ...          ...            ...             ...   
16421   214   PAYMENT   4279.39   C109385579         618.00            0.00   
16422   140   CASH_IN  28400.13   C837829566      318567.00       346967.13   
16423    19   CASH_IN  88590.50  C1564572877     7110169.94      7198760.44   
16424   324   PAYMENT  41758.82  C1334411921       20504.00            0.00   
16425   177   PAYMENT  19708.90   C782017708       17365.00            0.00   

          nameDest  o

In [210]:
X=df1.drop("isFraud",axis=1)
y=df1["isFraud"]
print(X.shape)
print(y.shape)

(16426, 10)
(16426,)


In [211]:
#Dropping the nameOrig column
X=X.drop("nameOrig",axis=1)
X.head()

Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFlaggedFraud
0,1,TRANSFER,181.0,181.0,0.0,C553264065,0.0,0.0,0
1,1,CASH_OUT,181.0,181.0,0.0,C38997010,21182.0,0.0,0
2,1,TRANSFER,2806.0,2806.0,0.0,C972765878,0.0,0.0,0
3,1,CASH_OUT,2806.0,2806.0,0.0,C1007251739,26202.0,0.0,0
4,1,TRANSFER,20128.0,20128.0,0.0,C1848415041,0.0,0.0,0


In [212]:
#Converting the names with M to 1 and with C to 0
X["nameDest"]=[1 if x[0]=="M" else 0 for x in X["nameDest"]]
X["type"]=[x.replace("CASH_OUT","1") for x in X["type"]]
X.head()
print(X.shape)


(16426, 9)


In [213]:
X["type"]=[x.replace("TRANSFER","2") for x in X["type"]]
X.head()

Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFlaggedFraud
0,1,2,181.0,181.0,0.0,0,0.0,0.0,0
1,1,1,181.0,181.0,0.0,0,21182.0,0.0,0
2,1,2,2806.0,2806.0,0.0,0,0.0,0.0,0
3,1,1,2806.0,2806.0,0.0,0,26202.0,0.0,0
4,1,2,20128.0,20128.0,0.0,0,0.0,0.0,0


In [214]:
X["type"]=[x.replace("PAYMENT","3") for x in X["type"]]
X.head()

Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFlaggedFraud
0,1,2,181.0,181.0,0.0,0,0.0,0.0,0
1,1,1,181.0,181.0,0.0,0,21182.0,0.0,0
2,1,2,2806.0,2806.0,0.0,0,0.0,0.0,0
3,1,1,2806.0,2806.0,0.0,0,26202.0,0.0,0
4,1,2,20128.0,20128.0,0.0,0,0.0,0.0,0


In [215]:
X["type"]=[x.replace("CASH_IN","4") for x in X["type"]]
X.head()

Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFlaggedFraud
0,1,2,181.0,181.0,0.0,0,0.0,0.0,0
1,1,1,181.0,181.0,0.0,0,21182.0,0.0,0
2,1,2,2806.0,2806.0,0.0,0,0.0,0.0,0
3,1,1,2806.0,2806.0,0.0,0,26202.0,0.0,0
4,1,2,20128.0,20128.0,0.0,0,0.0,0.0,0


In [216]:
X["type"]=[x.replace("DEBIT","5") for x in X["type"]]
X.head()

Unnamed: 0,step,type,amount,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFlaggedFraud
0,1,2,181.0,181.0,0.0,0,0.0,0.0,0
1,1,1,181.0,181.0,0.0,0,21182.0,0.0,0
2,1,2,2806.0,2806.0,0.0,0,0.0,0.0,0
3,1,1,2806.0,2806.0,0.0,0,26202.0,0.0,0
4,1,2,20128.0,20128.0,0.0,0,0.0,0.0,0


In [217]:
X["type"]=X["type"].astype(int)
X.head()
print(X.shape)


(16426, 9)


In [218]:
#Splitting the data into training and testing sets
X_train,X_test,Y_train,Y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [219]:
X_train.shape, X_test.shape,Y_train.shape,Y_test.shape

((13140, 9), (3286, 9), (13140,), (3286,))

In [220]:
X["type"].value_counts()

1    6966
2    4761
3    2794
4    1851
5      54
Name: type, dtype: int64

In [221]:
#Scaling the data
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)

In [222]:
X_train.shape

(13140, 9)

In [223]:
Y_train=np.array(Y_train)
Y_test=np.array(Y_test)
X_train=np.array(X_train)
X_test=np.array(X_test)

In [224]:
X_train=X_train.reshape(X_train.shape[0],X_train.shape[1],1)
X_test=X_test.reshape(X_test.shape[0],X_test.shape[1],1)

In [225]:
X_train.shape, X_test.shape,Y_train.shape,Y_test.shape

((13140, 9, 1), (3286, 9, 1), (13140,), (3286,))

Building the model

In [226]:
epochs=20
model=Sequential()
model.add(Conv1D(32,2,activation='relu',input_shape=X_train[0].shape))
model.add(BatchNormalization())
model.add(Dropout(0.2))

model.add(Conv1D(64,2,activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(64,activation='relu'))
model.add(Dropout(0.2))

model.add(Dense(1,activation="sigmoid"))

In [227]:
#Compiling the model
model.compile(optimizer=Adam(lr=0.001),loss='binary_crossentropy',metrics=["accuracy"])

In [228]:
#training the model
history=model.fit(X_train,Y_train,epochs=epochs,validation_data=(X_test,Y_test),verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
