In [1]:
%pylab inline
import keras
import keras.backend as K
from keras import Input, Model, Sequential
from keras.layers import Lambda, LSTM, RepeatVector, Dense, TimeDistributed, Bidirectional, concatenate,\
Conv1D, MaxPooling1D, UpSampling1D, BatchNormalization, Activation, Flatten, Reshape
from keras.optimizers import Adam, RMSprop
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler, QuantileTransformer
from matplotlib import pyplot as plt
import os, shutil

Populating the interactive namespace from numpy and matplotlib


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def split_data(dataset, timesteps):
    D = dataset.shape[1]
    if D < timesteps:
        return None
    elif D == timesteps:
        return dataset
    else:
        splitted_data, remaining_data = np.hsplit(dataset, [timesteps])
        remaining_data = split_data(remaining_data, timesteps)
        if remaining_data is not None:
            return np.vstack([splitted_data, remaining_data])
        return splitted_data

In [3]:
normalized_transactions_filepath = "../../datasets/berka_dataset/usable/normalized_transactions.npy"

timesteps = 50
transactions = np.load(normalized_transactions_filepath)
transactions = split_data(transactions, timesteps)
np.random.shuffle(transactions)
N, D = transactions.shape
print(N, D)

193500 50


In [17]:
class GAN:
    def __init__(self, timesteps, latent_dim, generator_type,
                 discriminator_type):
        self._timesteps = timesteps
        self._latent_dim = latent_dim
        self._generator_type = generator_type
        self._discriminator_type = discriminator_type

    def build_model(self, lr):
        optimizer = RMSprop(lr, clipnorm=1.0)

        self._generator = self._get_generator(
            self._latent_dim, self._timesteps, self._generator_type)
        self._generator.compile(
            loss='binary_crossentropy', optimizer=optimizer)

        self._discriminator = self._get_discriminator(self._timesteps,
                                                      self._discriminator_type)
        self._discriminator.compile(
            loss='binary_crossentropy', optimizer=optimizer)

        z = Input(shape=(self._latent_dim, ))
        fake = self._generator(z)

        real = Input(shape=[
            self._timesteps,
        ])

        self._discriminator.trainable = False

        valid = self._discriminator(fake)

        self._gan = Model(z, valid, 'GAN')

        self._gan.compile(
            loss='binary_crossentropy',
            optimizer=optimizer,
            metrics=['accuracy'])

#         self._gan.summary()
#         self._generator.summary()
#         self._discriminator.summary()
        return self._gan, self._generator, self._discriminator

    def _get_generator(self, noise_dim, timesteps, generator_type):
        generator_inputs = Input((latent_dim, ))

        if discriminator_type == 'dense':
            generated = Dense(timesteps, activation='relu')(generator_inputs)
            generated = Dense(timesteps, activation='tanh')(generated)

        elif generator_type == 'conv':
            generated = Dense(12, activation='tanh')(generator_inputs)
            generated = Reshape((4, 3))(generated)
            while generated.shape[1] < timesteps:
                generated = Conv1D(
                    32, 3, activation=None, padding='same')(generated)
                generated = BatchNormalization()(generated)
                generated = Activation('tanh')(generated)
                generated = UpSampling1D(2)(generated)
            generated = Conv1D(
                1, 3, activation='tanh', padding='same')(generated)
            generated = Lambda(lambda x: K.squeeze(x, -1))(generated)
            generated = Dense(timesteps, activation='tanh')(generated)

        elif generator_type == 'lstm':
            generated = RepeatVector(timesteps)(generator_inputs)
            generated = LSTM(32, return_sequences=True)(generated)
            generated = TimeDistributed(Dense(1, activation='tanh'))(generated)
            generated = Lambda(lambda x: K.squeeze(x, -1))(generated)

        elif generator_type == 'blstm':
            generated = RepeatVector(timesteps)(generator_inputs)
            generated = Bidirectional(LSTM(32,
                                           return_sequences=True))(generated)
            generated = TimeDistributed(Dense(1, activation='tanh'))(generated)
            generated = Lambda(lambda x: K.squeeze(x, -1))(generated)

        generator = Model(generator_inputs, generated, 'generator')
        return generator

    def _get_discriminator(self, timesteps, discriminator_type):
        discriminator_inputs = Input((timesteps, ))

        if discriminator_type == 'dense':
            discriminated = Dense(
                timesteps, activation='relu')(discriminator_inputs)
            discriminated = Dense(timesteps, activation='relu')(discriminated)
            discriminated = Dense(1, activation='sigmoid')(discriminated)

        elif discriminator_type == 'conv':
            discriminated = Lambda(lambda x: K.expand_dims(x))(
                discriminator_inputs)
            while discriminated.shape[1] > 3:
                discriminated = Conv1D(
                    32, 3, activation=None, padding='same')(discriminated)
                discriminated = BatchNormalization()(discriminated)
                discriminated = Activation('tanh')(discriminated)
                discriminated = MaxPooling1D(2, padding='same')(discriminated)
            discriminated = Flatten()(discriminated)
            discriminated = Dense(1, activation='sigmoid')(discriminated)            

        elif discriminator_type == 'lstm':
            discriminated = Lambda(lambda x: K.expand_dims(x))(
                discriminator_inputs)
            discriminated = LSTM(32, return_sequences=False)(discriminated)
            discriminated = Dense(1, activation='sigmoid')(discriminated)

        elif discriminator_type == 'blstm':
            discriminated = Lambda(lambda x: K.expand_dims(x))(
                discriminator_inputs)
            discriminated = Bidirectional(LSTM(
                32, return_sequences=False))(discriminated)
            discriminated = Dense(1, activation='sigmoid')(discriminated)

        discriminator = Model(discriminator_inputs, discriminated,
                              'discriminator')
        return discriminator

    def train(self, batch_size, epochs, n_generator, n_discriminator, dataset,
              img_frequency):
        half_batch = int(batch_size / 2)

        losses = [[], []]
        for epoch in range(epochs):
            for _ in range(n_discriminator):
                indexes = np.random.randint(0, dataset.shape[0], half_batch)
                batch_transactions = dataset[indexes]

                noise = np.random.normal(0, 1, (half_batch, self._latent_dim))

                generated_transactions = self._generator.predict(noise)

                discriminator_loss_real = self._discriminator.train_on_batch(
                    batch_transactions, np.ones((half_batch, 1)))
                discriminator_loss_fake = self._discriminator.train_on_batch(
                    generated_transactions, np.zeros((half_batch, 1)))
                discriminator_loss = 0.5 * np.add(discriminator_loss_real,
                                                  discriminator_loss_fake)

            for _ in range(n_generator):
                noise = np.random.normal(0, 1, (batch_size, latent_dim))

                generator_loss = self._gan.train_on_batch(
                    noise, np.ones((batch_size, 1)))[0]

            losses[0].append(generator_loss)
            losses[1].append(discriminator_loss)

            print("%d [D loss: %f] [G loss: %f]" % (epoch, discriminator_loss,
                                                    generator_loss))

            if epoch % img_frequency == 0:
                self._save_imgs(epoch)
                self._save_losses(losses)

    def _save_imgs(self, epoch):
        rows, columns = 5, 5
        noise = np.random.normal(0, 1, (rows * columns, latent_dim))
        generated_transactions = self._generator.predict(noise)

        plt.subplots(rows, columns, figsize=(15, 5))
        k = 1
        for i in range(rows):
            for j in range(columns):
                plt.subplot(rows, columns, k)
                plt.plot(generated_transactions[k - 1])
                plt.xticks([])
                plt.yticks([])
                plt.ylim(0, 1)
                k += 1
        plt.tight_layout()
        plt.savefig('gan/%05d.png' % epoch)
        plt.savefig('gan/last.png')
        plt.close()

    @staticmethod
    def _save_losses(losses):
        plt.plot(losses[0])
        plt.plot(losses[1])
        plt.legend(['generator', 'discriminator'])
        plt.savefig('gan/losses.png')
        plt.close()

In [20]:
batch_size = 64
epochs = int(1e5)
n_discriminator = 1
n_generator = 5
latent_dim = 10
lr = 0.00005
img_frequency = 100
timesteps = timesteps
generator_type = 'conv'
discriminator_type = 'conv'

In [21]:
if os.path.exists('gan'):
    shutil.rmtree('gan')
os.makedirs('gan')

gan = GAN(timesteps, latent_dim, generator_type, discriminator_type)
gan.build_model(lr)
gan.train(batch_size, epochs, n_generator, n_discriminator, transactions, img_frequency)

  'Discrepancy between trainable weights and collected trainable'


0 [D loss: 0.781228] [G loss: 0.610225]
1 [D loss: 0.705468] [G loss: 0.645783]
2 [D loss: 0.754780] [G loss: 0.636253]
3 [D loss: 0.680573] [G loss: 0.621176]
4 [D loss: 0.727733] [G loss: 0.694229]
5 [D loss: 0.682014] [G loss: 0.686220]
6 [D loss: 0.688449] [G loss: 0.651591]
7 [D loss: 0.590919] [G loss: 0.682804]
8 [D loss: 0.696846] [G loss: 0.642654]
9 [D loss: 0.673813] [G loss: 0.640989]
10 [D loss: 0.676983] [G loss: 0.677521]
11 [D loss: 0.676589] [G loss: 0.644284]
12 [D loss: 0.678639] [G loss: 0.657079]
13 [D loss: 0.697872] [G loss: 0.665781]
14 [D loss: 0.642257] [G loss: 0.675854]
15 [D loss: 0.779292] [G loss: 0.618375]
16 [D loss: 0.595859] [G loss: 0.714812]
17 [D loss: 0.672955] [G loss: 0.655860]
18 [D loss: 0.588093] [G loss: 0.699263]
19 [D loss: 0.642453] [G loss: 0.632001]
20 [D loss: 0.601557] [G loss: 0.623174]
21 [D loss: 0.674933] [G loss: 0.641624]
22 [D loss: 0.587216] [G loss: 0.640304]
23 [D loss: 0.674911] [G loss: 0.648670]
24 [D loss: 0.664989] [G l

198 [D loss: 0.415226] [G loss: 0.833044]
199 [D loss: 0.453705] [G loss: 0.924012]
200 [D loss: 0.335197] [G loss: 0.819304]
201 [D loss: 0.344700] [G loss: 0.892154]
202 [D loss: 0.334815] [G loss: 0.861370]
203 [D loss: 0.348250] [G loss: 0.930943]
204 [D loss: 0.348828] [G loss: 0.873121]
205 [D loss: 0.352869] [G loss: 0.873692]
206 [D loss: 0.340974] [G loss: 0.958266]
207 [D loss: 0.350065] [G loss: 0.940705]
208 [D loss: 0.375177] [G loss: 0.863186]
209 [D loss: 0.333482] [G loss: 0.978227]
210 [D loss: 0.357901] [G loss: 0.909515]
211 [D loss: 0.410041] [G loss: 0.887642]
212 [D loss: 0.312153] [G loss: 0.899383]
213 [D loss: 0.311687] [G loss: 0.905995]
214 [D loss: 0.322605] [G loss: 1.015714]
215 [D loss: 0.376870] [G loss: 0.911102]
216 [D loss: 0.408094] [G loss: 0.956954]
217 [D loss: 0.404608] [G loss: 0.951929]
218 [D loss: 0.377350] [G loss: 0.886644]
219 [D loss: 0.401885] [G loss: 0.891377]
220 [D loss: 0.344300] [G loss: 0.953464]
221 [D loss: 0.304903] [G loss: 0.

394 [D loss: 0.153214] [G loss: 1.872989]
395 [D loss: 0.145466] [G loss: 1.814703]
396 [D loss: 0.149923] [G loss: 1.726815]
397 [D loss: 0.147406] [G loss: 1.741858]
398 [D loss: 0.135158] [G loss: 1.709247]
399 [D loss: 0.171099] [G loss: 1.839386]
400 [D loss: 0.158703] [G loss: 1.694318]
401 [D loss: 0.133735] [G loss: 1.913823]
402 [D loss: 0.137716] [G loss: 1.734319]
403 [D loss: 0.147918] [G loss: 1.856893]
404 [D loss: 0.139814] [G loss: 1.904143]
405 [D loss: 0.144415] [G loss: 1.895223]
406 [D loss: 0.150034] [G loss: 1.760907]
407 [D loss: 0.257670] [G loss: 1.821511]
408 [D loss: 0.131784] [G loss: 1.939548]
409 [D loss: 0.150273] [G loss: 1.720747]
410 [D loss: 0.108371] [G loss: 1.806991]
411 [D loss: 0.184211] [G loss: 1.878728]
412 [D loss: 0.099380] [G loss: 1.850675]
413 [D loss: 0.158137] [G loss: 1.846521]
414 [D loss: 0.161967] [G loss: 1.902648]
415 [D loss: 0.172744] [G loss: 1.917065]
416 [D loss: 0.120145] [G loss: 1.872469]
417 [D loss: 0.123637] [G loss: 1.

KeyboardInterrupt: 