In [1]:
import numpy as np
import pandas as pd
import tensorflow

from tensorflow.keras import regularizers, optimizers
from tensorflow.keras.layers import Input, Conv1D, Dense, Flatten, Activation, UpSampling1D, MaxPooling1D, ZeroPadding1D
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
import seaborn as sns
import matplotlib

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline

In [3]:
class Visualization:
    labels = ["Normal", "Anomaly"]

    def draw_confusion_matrix(self, y, ypred):
        matrix = confusion_matrix(y, ypred)

        plt.figure(figsize=(10, 8))
        colors=[ "orange","green"]
        sns.heatmap(matrix, xticklabels=self.labels, yticklabels=self.labels, cmap=colors, annot=True, fmt="d")
        plt.title("Confusion Matrix")
        plt.ylabel('Actual')
        plt.xlabel('Predicted')
        plt.show()


    def draw_anomaly(self, y, error, threshold):
        groupsDF = pd.DataFrame({'error': error,
                                 'true': y}).groupby('true')

        figure, axes = plt.subplots(figsize=(12, 8))

        for name, group in groupsDF:
            axes.plot(group.index, group.error, marker='x' if name == 1 else 'o', linestyle='',
                    color='r' if name == 1 else 'g', label="Anomaly" if name == 1 else "Normal")

        axes.hlines(threshold, axes.get_xlim()[0], axes.get_xlim()[1], colors="b", zorder=100, label='Threshold')
        axes.legend()
        
        plt.title("Anomalies")
        plt.ylabel("Error")
        plt.xlabel("Data")
        plt.show()

    def draw_error(self, error, threshold):
            plt.plot(error, marker='o', ms=3.5, linestyle='',
                     label='Point')

            plt.hlines(threshold, xmin=0, xmax=len(error)-1, colors="b", zorder=100, label='Threshold')
            plt.legend()
            plt.title("Reconstruction error")
            plt.ylabel("Error")
            plt.xlabel("Data")
            plt.show()

In [4]:
df = pd.read_csv('data/creditcard.csv', sep=',', index_col=None)
print(df.shape)
df.head()

(284807, 31)


Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [5]:
df['Amount'] = StandardScaler().fit_transform(df['Amount'].values.reshape(-1, 1))

df['Time'] = StandardScaler().fit_transform(df['Time'].values.reshape(-1, 1))

df.tail()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
284802,1.641931,-11.881118,10.071785,-9.834783,-2.066656,-5.364473,-2.606837,-4.918215,7.305334,1.914428,...,0.213454,0.111864,1.01448,-0.509348,1.436807,0.250034,0.943651,0.823731,-0.350151,0
284803,1.641952,-0.732789,-0.05508,2.03503,-0.738589,0.868229,1.058415,0.02433,0.294869,0.5848,...,0.214205,0.924384,0.012463,-1.016226,-0.606624,-0.395255,0.068472,-0.053527,-0.254117,0
284804,1.641974,1.919565,-0.301254,-3.24964,-0.557828,2.630515,3.03126,-0.296827,0.708417,0.432454,...,0.232045,0.578229,-0.037501,0.640134,0.265745,-0.087371,0.004455,-0.026561,-0.081839,0
284805,1.641974,-0.24044,0.530483,0.70251,0.689799,-0.377961,0.623708,-0.68618,0.679145,0.392087,...,0.265245,0.800049,-0.163298,0.123205,-0.569159,0.546668,0.108821,0.104533,-0.313249,0
284806,1.642058,-0.533413,-0.189733,0.703337,-0.506271,-0.012546,-0.649617,1.577006,-0.41465,0.48618,...,0.261057,0.643078,0.376777,0.008797,-0.473649,-0.818267,-0.002415,0.013649,0.514355,0


In [6]:
anomalies = df[df['Class'] == 1]
normal = df[df['Class'] == 0]

anomalies.shape, normal.shape

((492, 31), (284315, 31))

In [7]:
for f in range(0, 20):
    normal = normal.iloc[np.random.permutation(len(normal))]

data_set = pd.concat([normal[:2000], anomalies])

In [8]:
from sklearn.model_selection import train_test_split

x_train, x_test = train_test_split(data_set, test_size=0.4, random_state=42)

x_train = x_train.sort_values(by=['Time'])
x_test = x_test.sort_values(by=['Time'])
y_train = x_train['Class']
y_test = x_test['Class']

x_train.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
55,-1.995783,0.158332,0.872687,0.965525,1.802127,0.139022,0.110863,0.559079,-0.029071,-0.394912,...,0.11627,0.691685,0.121113,0.116142,-1.112725,-0.303369,0.327533,0.255621,-0.266231,0
570,-1.987591,-0.847312,0.854261,0.338816,0.890137,0.804751,1.165501,-0.081408,0.879014,-0.394737,...,-0.04669,-0.075301,-0.308479,-1.733137,0.087036,-0.129209,0.294334,0.071198,-0.307811,0
623,-1.986644,-3.043541,-3.157307,1.088463,2.288644,1.359805,-1.064823,0.325574,-0.067794,-0.270953,...,0.661696,0.435477,1.375966,-0.293803,0.279798,-0.145362,-0.252773,0.035764,1.761758,1
659,-1.986096,-0.753769,1.098287,1.835453,0.614571,-0.317859,-0.101055,0.039454,-0.656824,-0.048761,...,1.101142,0.686059,0.005954,0.361744,-0.457852,-0.352576,0.094602,0.17048,-0.333239,0
698,-1.985464,-0.378417,0.751515,1.772256,0.31102,-0.32913,-0.746206,0.719034,-0.081805,-0.152417,...,-0.120891,-0.240394,-0.057803,0.733812,-0.049448,0.207357,0.023386,0.057469,-0.251638,0


In [9]:
x_train = np.array(x_train).reshape(x_train.shape[0], x_train.shape[1], 1)
x_test = np.array(x_test).reshape(x_test.shape[0], x_test.shape[1], 1)

input_shape = (x_train.shape[1], 1)
input_shape

y_train = tensorflow.keras.utils.to_categorical(y_train, 2)
y_test = tensorflow.keras.utils.to_categorical(y_test, 2)

In [10]:
input_shape = (x_train.shape[1], 1)
input_shape

(31, 1)

In [11]:
input_layer = Input(shape=(input_shape))

In [12]:
conv_1 = Conv1D(filters = int(input_shape[0]), kernel_size = 2, dilation_rate = 1,
               padding = 'causal', strides = 1, input_shape = input_shape, kernel_regularizer = regularizers.l2(0.01),
               activation = 'relu')(input_layer)

pool_1 = MaxPooling1D(pool_size = 2, strides = 2)(conv_1)

conv_2 = Conv1D(filters = int(input_shape[0] / 2), kernel_size = 2, dilation_rate = 1,
               padding = 'causal', strides = 1, kernel_regularizer = regularizers.l2(0.01),
               activation = 'relu')(pool_1)

pool_2 = MaxPooling1D(pool_size = 2, strides = 3)(conv_2)

conv_3 = Conv1D(filters = int(input_shape[0] / 3), kernel_size = 2, dilation_rate = 1,
               padding = 'causal', strides = 1, kernel_regularizer = regularizers.l2(0.01),
               activation = 'relu')(pool_2)

encoder = Dense(int(input_shape[0] / 6), activation = 'relu')(conv_3)

In [13]:
upsample_1 = UpSampling1D(size=3)(encoder)

conv_4 = Conv1D(filters = int(input_shape[0]/3), kernel_size = 2, dilation_rate = 1,
               padding = 'causal', strides = 1, kernel_regularizer=regularizers.l2(0.01),
               activation = 'relu')(upsample_1)

upsample_2 = UpSampling1D(size = 2)(conv_4)

conv_5 = Conv1D(filters = int(input_shape[0]/2), kernel_size = 2, dilation_rate = 1,
               padding = 'causal', strides = 1, kernel_regularizer = regularizers.l2(0.05),
               activation='relu')(upsample_2)

zero_pad_1 = ZeroPadding1D(padding=(0,1))(conv_5)

conv_6 = Conv1D(filters=int(input_shape[0]), kernel_size = 2, dilation_rate = 1,
               padding='causal', strides=1, kernel_regularizer = regularizers.l2(0.05),
               activation='relu')(zero_pad_1)

flat = Flatten()(conv_6)

output_layer = Dense(2, activation = 'softmax')(flat)

TCN = Model(input_layer, output_layer)

In [14]:
from tensorflow.keras.losses import categorical_crossentropy
from tensorflow.keras.optimizers import Adam

TCN.compile(loss = categorical_crossentropy, optimizer = Adam(learning_rate=0.002),
           metrics=['accuracy'])

In [15]:
from tensorflow.keras.callbacks import ModelCheckpoint
checkpointer = ModelCheckpoint(filepath='model_ED-TCN_creditcard.h5', save_best_only=True)

In [16]:
TCN.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 31, 1)]           0         
                                                                 
 conv1d (Conv1D)             (None, 31, 31)            93        
                                                                 
 max_pooling1d (MaxPooling1D  (None, 15, 31)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 15, 15)            945       
                                                                 
 max_pooling1d_1 (MaxPooling  (None, 5, 15)            0         
 1D)                                                             
                                                                 
 conv1d_2 (Conv1D)           (None, 5, 10)             310   

In [17]:
TCN.fit(x_train, y_train, batch_size=128, epochs=25, validation_data=(x_test, y_test),
       callbacks=[checkpointer])

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


<keras.callbacks.History at 0x1d360621df0>

In [18]:
score = TCN.evaluate(x_test, y_test)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.1878034919500351
Test accuracy: 0.9628886580467224


In [19]:
preds = TCN.predict(x_test)
from sklearn.metrics import roc_auc_score
auc = roc_auc_score(np.round(preds), y_test)

print('AUC: {:.2%}'.format(auc))

AUC: 97.05%
