In [66]:
import numpy as np
import pandas as pd
from keras.models import load_model
from keras.optimizers import Adam
from keras.layers import Dense,Input
from keras import backend as K
from keras import losses,metrics
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn import preprocessing


In [67]:
data = pd.read_csv("creditcard.csv")

In [68]:
np.random.seed(42)

In [69]:
# Here the stratified shuffle split is done
X = data.iloc[:,1:-1]
Y = data['Class']

# have used 50 splits and test data proportion equal to 0.2
sss= StratifiedShuffleSplit(n_splits=50, test_size=0.2, random_state=42)

# equating the index of all the shuffled index to the data.
for train_index, test_index in sss.split(X, Y):                           
        x_train, x_test = X.iloc[train_index], X.iloc[test_index]            
        y_train, y_test = Y.iloc[train_index], Y.iloc[test_index]

In [80]:
scaler = preprocessing.StandardScaler()
scaler.fit_transform(x_train)

array([[ 0.58541873, -0.15944589, -0.23730703, ...,  0.00337683,
         0.06338125, -0.03706768],
       [-0.45852227, -0.20459649,  2.09675774, ...,  1.06079977,
         0.50687099, -0.22302591],
       [-0.77414488,  0.01826225, -0.28219557, ..., -2.45284616,
         0.36068451, -0.3346455 ],
       ...,
       [-1.91753722,  2.46493627, -1.58980655, ...,  3.2231603 ,
         1.97856056, -0.33133412],
       [-0.30002061,  0.20814354,  0.89977904, ...,  0.25112369,
         0.19206192, -0.28590494],
       [ 1.01532061, -0.21081474, -0.23366075, ...,  0.12373851,
        -0.10480629, -0.33345489]])

In [81]:

hidden_size = 16 #size of the hidden layer in encoder and decoder
latent_dim = 2 #number of latent variables to learn

input_dim = 29

# input layer
input_layer = Input(shape=(input_dim,))

# hidden layer
encoder_hidden = Dense(hidden_size, activation='relu' , name='encoder_hidden')(input_layer)
z_mean = Dense(latent_dim, name='z_mean')(encoder_hidden)
z_log_var = Dense(latent_dim, name='z_log_var')(encoder_hidden)


def rec_loss(y_true, y_pred):
    return K.sum(K.square(y_true - y_pred), axis=-1)

def kl_loss(y_true, y_pred):
    return - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)

def vae_loss(y_true, y_pred):
    rec_loss = K.sum(K.square(y_true - y_pred), axis=-1)
    kl_loss = - 0.5 * K.sum(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
    return K.mean((rec_loss + kl_loss) / 2)



In [82]:
customized_dict = {'vae_loss' : vae_loss,'z_log_var':z_log_var,'rec_loss': rec_loss,'kl_loss': kl_loss,}
model = load_model('best_vae.h5',custom_objects=customized_dict)
model.compile(optimizer=Adam(),loss=vae_loss,metrics=[rec_loss,kl_loss])

In [83]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 29)           0                                            
__________________________________________________________________________________________________
encoder_hidden (Dense)          (None, 16)           480         input_1[0][0]                    
__________________________________________________________________________________________________
z_mean (Dense)                  (None, 2)            34          encoder_hidden[0][0]             
__________________________________________________________________________________________________
z_log_var (Dense)               (None, 2)            34          encoder_hidden[0][0]             
__________________________________________________________________________________________________
z_sampled 

In [75]:
inputs=[]
f = open('my_file.txt', 'r+')
for line in f.readlines():
    inputs.append([float(x) for x in line.split(',')])
f.close()

In [87]:
inputs_scaled=scaler.transform(inputs)

In [88]:
predictions=[]
for i in inputs_scaled:
    iarray=np.array(i).reshape(1,-1)
    ypred=model.predict(iarray)
    reconstruction_error = np.mean(np.power(i - ypred, 2), axis=1)
    if reconstruction_error > 2:
        predictions.append('FRAUD')
    else:
        predictions.append('NON-FRAUD')

In [89]:
predictions

['FRAUD', 'FRAUD']