In [1]:
import os
import sys

import pandas as pd

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, LeakyReLU, Dense, Bidirectional, Dropout, TimeDistributed, RepeatVector

import numpy as np
import random

In [2]:
root = os.path.dirname(os.getcwd())
sys.path.append(root)

df = pd.read_csv(root + os.sep + 'data'+ os.sep + 'BASE.csv')

In [3]:
from src.utils.models import CharacterPreprocessor

In [4]:
cp = CharacterPreprocessor(df)
cp.preprocess_type2(maxlen=20)

Corpus length: 630844
Total chars: 52
Number of sequences: 210268


In [5]:
#baseline = keras.models.load_model(root + os.sep + 'models'+ os.sep + 'Base_Quote_Generator.h5')

In [5]:
from tensorflow.keras.layers import InputLayer, Dense, LSTM
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.optimizers import RMSprop

In [12]:
class GAN(CharacterPreprocessor):

    def __init__(self, df, maxlen=40, step=3):
        CharacterPreprocessor.__init__(self, df)
        self.disc_loss = []
        self.gen_loss =[]
        self.maxlen = maxlen
        self.step = step

        self.preprocess_type2(maxlen=self.maxlen, step=self.step)
        
        optimizer = RMSprop()

        # Build and compile the discriminator
        self.discriminator = self.build_discriminator()
        self.discriminator.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

        # Build the generator
        self.generator = self.build_generator()

        # For the combined model we will only train the generator
        self.discriminator.trainable = False

        # Build the GAN Model
        self.gan = self.build_gan(self.generator, self.discriminator)
        self.gan.compile(loss='binary_crossentropy', optimizer=optimizer)

    def build_discriminator(self):
        discriminator = keras.Sequential([
            keras.layers.InputLayer(input_shape=(self.maxlen, len(self.chars))),
            layers.LSTM(128),
            keras.layers.Dense(1, activation='sigmoid')
        ])
        discriminator.summary()

        return discriminator

    def build_generator(self):
        generator = keras.Sequential([
            keras.layers.InputLayer(input_shape=(self.maxlen, len(self.chars))),
            layers.LSTM(128, return_sequences=True),
            layers.Dense(len(self.chars), activation='softmax'),
        ])
        generator.summary()

        return generator

    def build_gan(self, g_model, d_model):

        gan = keras.Sequential([
            layers.InputLayer(input_shape=(self.maxlen, len(self.chars))),
            g_model,
            d_model
        ])
        gan.summary()

        return gan


    def train(self, epochs, batch_size=128, sample_interval=50, verbose=True):

        half_batch = batch_size//2
        # Training the model
        for epoch in range(epochs):

            # Training the discriminator
            # Select a random batch of character sequences
            if verbose:
                print('Generating real samples')
            X_real, y_real = self.generate_real_samples(half_batch)

            # Generate a batch of fake character sequences
            if verbose:
                print('Generating fake samples')
            X_fake, y_fake = self.generate_fake_samples(self.generator, half_batch)

            # Train the discriminator
            if verbose:
                print('Training Discriminator')
            d_loss_real = self.discriminator.train_on_batch(X_real, y_real)
            d_loss_fake = self.discriminator.train_on_batch(X_fake, y_fake)
            d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

            #  Training the Generator
            if verbose:
                print('Generating GAN samples')
            X_gan, y_gan = self.generate_gan_samples(batch_size)

            # Train the generator (to have the discriminator label samples as real)
            if verbose:
                print('Training Generator')
            g_loss = self.gan.train_on_batch(X_gan, y_gan)

            # Print the progress and save into loss lists
            if epoch % sample_interval == 0:
              print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
              self.disc_loss.append(d_loss[0])
              self.gen_loss.append(g_loss)

In [13]:
gan = GAN(df)

Corpus length: 630844
Total chars: 52
Number of sequences: 210255
Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 128)               92672     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 129       
Total params: 92,801
Trainable params: 92,801
Non-trainable params: 0
_________________________________________________________________
Model: "sequential_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_5 (LSTM)                (None, 40, 128)           92672     
_________________________________________________________________
dense_5 (Dense)              (None, 40, 52)            6708      
Total params: 99,380
Trainable params: 99,380
Non-trainable params: 0
________________________

In [14]:
gan.train(epochs=5, sample_interval=1, verbose=True)

Generating real samples
Generating fake samples
Training Discriminator
Generating GAN samples
Training Generator
0 [D loss: 0.726377, acc.: 0.78%] [G loss: 0.698529]
Generating real samples
Generating fake samples
Training Discriminator
Generating GAN samples
Training Generator
1 [D loss: 0.694949, acc.: 54.69%] [G loss: 0.710837]
Generating real samples
Generating fake samples
Training Discriminator
Generating GAN samples
Training Generator
2 [D loss: 0.677116, acc.: 67.97%] [G loss: 0.737378]
Generating real samples
Generating fake samples
Training Discriminator
Generating GAN samples
Training Generator
3 [D loss: 0.655305, acc.: 84.38%] [G loss: 0.870384]
Generating real samples
Generating fake samples
Training Discriminator
Generating GAN samples
Training Generator
4 [D loss: 0.766217, acc.: 70.31%] [G loss: 1.205061]


In [7]:
generator = keras.Sequential([
    keras.layers.InputLayer(input_shape=(cp.maxlen, len(cp.chars))),
    layers.LSTM(128, return_sequences=True),
    layers.Dense(len(cp.chars), activation='softmax'),
])

generator.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 20, 128)           92672     
_________________________________________________________________
dense_1 (Dense)              (None, 20, 52)            6708      
Total params: 99,380
Trainable params: 99,380
Non-trainable params: 0
_________________________________________________________________


In [9]:
discriminator = keras.Sequential([
    keras.layers.InputLayer(input_shape=(cp.maxlen, len(cp.chars))),
    layers.LSTM(128),
    keras.layers.Dense(1, activation='sigmoid')
])
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

discriminator.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 128)               92672     
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 129       
Total params: 92,801
Trainable params: 92,801
Non-trainable params: 0
_________________________________________________________________


In [21]:
batch_size = 50
discriminator.trainable = False

combined = keras.Sequential([
    generator,
    discriminator
])

combined.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [65]:
X_gan, _ = cp.generate_gan_samples(20)
y_gan = np.ones(len(X_gan))
y = np.ones(len(cp.X))

In [69]:
prueba.fit(cp.X, y, epochs=10)

Epoch 1/10

KeyboardInterrupt: 

In [7]:
def train_gan(generator, discriminator, cp, epochs, batch_size=128, sample_interval=5, verbose=True):

    half_batch = int(batch_size / 2)
    disc_loss, gen_loss = [], []
    
    # Training the model
    for epoch in range(epochs):

        if verbose:
            print(f'Training epoch {epoch+1}/{epochs}')
            print('Generating real and fake batches')
        X_real, y_real = cp.generate_real_samples(half_batch)
        X_fake, y_fake = cp.generate_fake_samples(generator, half_batch)

        # Train the discriminator
        if verbose:
            print('Discriminator training for real samples')
        d_loss_real = discriminator.train_on_batch(X_real, y_real)
        if verbose:
            print('Discriminator training for fake samples')
        d_loss_fake = discriminator.train_on_batch(X_fake, y_fake)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        #  Training the Generator
        if verbose:
            print('Training generator')
        X_gan, y_gan = cp.generate_gan_samples(batch_size)

        # Train the generator (to have the discriminator label samples as real)
        g_loss = generator.train_on_batch(X_gan, y_gan)

        # Print the progress and save into loss lists
        if epoch % sample_interval == 0:
            print ("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % (epoch, d_loss[0], 100*d_loss[1], g_loss))
            disc_loss.append(d_loss[0])
            gen_loss.append(g_loss)

    return generator, (disc_loss, gen_loss)

In [8]:
gen, losses = train_gan(generator, discriminator, cp, 10, sample_interval=1, verbose=True)

Training epoch 0/10
Generating real and fake batches
Discriminator training for real samples
Discriminator training for fake samples
Training generator
0 [D loss: 1.111697, acc.: 29.69%] [G loss: 3.950763]
Training epoch 1/10
Generating real and fake batches
Discriminator training for real samples
Discriminator training for fake samples
Training generator
1 [D loss: 0.654581, acc.: 50.00%] [G loss: 3.559381]
Training epoch 2/10
Generating real and fake batches
Discriminator training for real samples
Discriminator training for fake samples
Training generator
2 [D loss: 0.741457, acc.: 50.00%] [G loss: 4.466999]
Training epoch 3/10
Generating real and fake batches
Discriminator training for real samples
Discriminator training for fake samples
Training generator
3 [D loss: 0.834218, acc.: 50.00%] [G loss: 3.439421]
Training epoch 4/10
Generating real and fake batches
Discriminator training for real samples
Discriminator training for fake samples
Training generator
4 [D loss: 0.708010, acc

KeyboardInterrupt: 

In [55]:
def build_generator():

    model = Sequential()
    model.add(Bidirectional(LSTM(128, return_sequences=True), input_shape=(cp.maxlen, len(cp.chars))))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Bidirectional(LSTM(128, return_sequences=True)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Bidirectional(LSTM(128)))
    model.add(LeakyReLU(alpha=0.2))
    #specifying output to have 40 timesteps
    model.add(RepeatVector(16))
    #specifying 1 feature as the output
    model.add(Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Bidirectional(LSTM(128, return_sequences=True, dropout = 0.2)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.3))   
    model.add(TimeDistributed(Dense(128)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(TimeDistributed(Dense(128)))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Dropout(0.4))
    model.add(TimeDistributed(Dense(1)))
    model.add(LeakyReLU(alpha=0.2))
    model.summary()

    noise = Input(shape=(cp.maxlen,len(cp.chars)))
    img = model(noise)

    return Model(noise, img)

In [56]:
g_model = build_generator()

Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_3 (Bidirection (None, 40, 256)           185344    
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 40, 256)           0         
_________________________________________________________________
bidirectional_4 (Bidirection (None, 40, 256)           394240    
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 40, 256)           0         
_________________________________________________________________
bidirectional_5 (Bidirection (None, 256)               394240    
_________________________________________________________________
leaky_re_lu_8 (LeakyReLU)    (None, 256)               0         
_________________________________________________________________
repeat_vector (RepeatVector) (None, 16, 256)         

NameError: name 'Input' is not defined