In [3]:
# Lab 7 - Text generation with LSTM
#
# Step 1 (not assessed): build and train a model to generate text in the style of a corpus.
#
# Based on the Keras text generation example (https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py)
#
# Step 2: build a model to distinguish genuine from fake sentences.

In [4]:
# Import essential modules
import pickle
import random
import sys
import time
import keras

import numpy as np
from sklearn.model_selection import train_test_split

from keras.layers import Input, LSTM, GRU, Dense, Activation, Conv1D, Dropout, Flatten
from keras.layers import CuDNNGRU, CuDNNLSTM, GaussianNoise, BatchNormalization
from keras.regularizers import l1, l2
from keras.optimizers import RMSprop, Adam, Nadam, SGD
from keras.models import Model, Sequential
from keras.models import save_model, load_model
from keras.utils.data_utils import get_file
from keras.layers.advanced_activations import LeakyReLU
from keras import initializers

from IPython.display import clear_output

import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.ticker import MaxNLocator
from sklearn import datasets, linear_model
from sklearn.model_selection import cross_val_score

Using TensorFlow backend.


In [5]:
# Helper function to sample an index from an array of predictions.
#
# The input array 'preds' should be the output of a text generation model.
# The elements contain the values of the units in the final layer.
# Each unit corresponds to a character in the text alphabet.
# The final layer should have SoftMax activation, and thus the
# value corresponds to the 'strength of prediction' of that character
# as the next output value---so the maximum value indicates which character
# is most strongly predicted (considerd most likely) as the next one.
#
def sample(preds, temperature=1.0):
    # Convert to high-precision datatype (we are going to be manipulating some
    # very small values in this function)
    preds = np.asarray(preds).astype('float64')  
    
    # The next line has the effect of raising each prediction value to the power 1/T.
    # It's done using logs to improve numerical precision.  This is a kind of value-dependent
    # scaling: for T < 1.0 (1/T > 1.0), small values are made smaller (proportionally) than 
    # large values (unlike a linear scaling, such as multiplication by 0.9, which scales all values
    # the same).
    #
    # Example: Consider that we have only two symbols (letters) in our alphabet, and our 
    # probabilities are [0.2, 0.8].  A temperature of 1.0 means 'do not adjust the
    # probabilities at all', so in this case there will be a 20% chance that the 
    # function will return 'symbol 0' and an 80% chance  that it will return 'symbol 1'.
    # Note that symbol 1 is 4x more likely than symbol 0.
    #
    # Now: if we supply a temperature of 0.5, our probabilites will be raised to the
    # power 1/0.5 = 2, becoming [0.04, 0.64].  These will then be normalized to sum to 1,
    # but anyway it is clear that symbol 1 is here 16x (the square of 4x) more likely than 
    # symbol 0.
    #
    # Conversely, for a temperature of 2, our probabilities will be raised to 0.5 (square-rooted),
    # becoming [.4472, 0.8944] - and so here symbol 1 is only 2x (sqrt of 4x) more likely than
    # symbol 0.
    #
    # So: low temperatures make the distribution peakier, exaggerating the difference between
    # values.  High temperatures flatten the distribution, reducing the difference between values.
    #
    # As the return value is a sample of the manipulated distribution, manipulating it to
    # be peakier (by supplying a low temperature) makes the sample more conservative, i.e.
    # more likely to pick the highest-probability symbol.
    #
    # Making the distribution flatter (by suppyling a high temperature) causes the
    # sample to be less conservative, i.e. more likely to pick some lower-likelihood
    # symbol.
    #
    # Phew!
    preds = np.exp(np.log(preds) / temperature)
    
    preds = preds / np.sum(preds)  # ensure that probs sum to 1
    probas = np.random.multinomial(1, preds, 1)  # take 1 sample from the distribution
    return np.argmax(probas)

In [6]:
# Decide how much data to use for training.
# You might want to reduce this to ~100k for faster experimentation, and then bring it back
# to 600k when you're happy with your network architecture.
# IMPORTANT: mke sure you end up with a 57-symbol alphabet after reducing the corpus size!
# If the number of symbols (shown in the next cell) gets smaller than it was with the full
# corpus, bring your sample size back up.  This is necessary because the encoding used for
# training must match that used for assessment.
#desired_num_chars = 600*1000  # Max: 600893
desired_num_chars = 480139  # Max: 600893

random.seed(43)  # Fix random seed for repeatable results.

# Slurp down all of Nietzsche from Amazon.
path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
text = open(path).read().lower()
print('original corpus length:', len(text))

start_index = random.randint(0, len(text) - desired_num_chars - 1)
text = text[start_index:start_index + desired_num_chars]
text
print('length for training:', len(text))
num_chars = len(sorted(list(set(text))))
print('char count', num_chars)

#for i in range (-100000, 100000):
#    random.seed(i)
#    path_i = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
#    text_i = open(path_i).read().lower()
#    start_index_i = random.randint(0, len(text_i) - desired_num_chars - 1)
#    text_i = text_i[start_index_i:start_index_i + desired_num_chars]
#    num_chars_i = len(sorted(list(set(text_i))))
#    if num_chars_i == 57: break

#print('i', i)

original corpus length: 600901
length for training: 480139
char count 57


In [7]:
# Let's have a quick look at a random exceprt.
#
# Caution: Nietzsche might drive you mad: dare you behold more than 1000 of his terrible chars..? 
sample_length = 1000

random.seed(None)  # Seeds random from current time (so re-eval this cell for a new sample).

start_index = random.randint(0, len(text) - sample_length - 1)
print(text[start_index:start_index+sample_length])

n himself, and hence
affords a clue to the understanding of the universe in general. the
whole teleology is so planned that man during the last four thousand
years shall be spoken of as a being existing from all eternity, and
with reference to whom everything in the cosmos from its very inception
is naturally ordered. yet everything evolved: there are no eternal facts
as there are no absolute truths. accordingly, historical philosophising
is henceforth indispensable, and with it honesty of judgment.

[4] geworden.


3

=appreciation of simple truths.=--it is the characteristic of an
advanced civilization to set a higher value upon little, simple truths,
ascertained by scientific method, than upon the pleasing and magnificent
errors originating in metaphysical and ã¦sthetical epochs and peoples. to
begin with, the former are spoken of with contempt as if there could be
no question of comparison respecting them, so rigid, homely, prosaic and
even discouraging is the aspect of the first, 

In [8]:
# Establish the alphabet (set of symbols) we are going to use.
chars = sorted(list(set(text)))
print('total chars:', len(chars))
print(chars)

char_indices = dict((c, i) for i, c in enumerate(chars))  # Map to look up index of a particular char (e.g. x['a'] = 0)
indices_char = dict((i, c) for i, c in enumerate(chars))  # Map to look up char at an index (e.g. x[0] = 'a')

total chars: 57
['\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '=', '?', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '¤', '¦', '«', 'ã']


In [9]:
# Establish a training set of semi-redundant (i.e. overlapping) sequences of maxlen characters.
maxlen = 40
step = 3
sentences = []  # Not syntactic sentences, but just sequences of 40 chars pulled from the corpus.
next_chars = [] # next_chars[n] stores the character which followed sentences[n]
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('nb sequences:', len(sentences))

nb sequences: 160033


In [10]:
# Convert the data to one-hot encoding.
# 'x' will contain the one-hot encoding of the training 'sentences'.
# 'y' will contain the one-hot encoding of the 'next char' for each sentence.
#
# 
# Let's consider that we have N sentences of length L:
#
# The 'native' encoding is an NxL matrix where element [n][l]
# is the symbol index for character at index (l) of sentence (n)
# (e.g., say, 5, corresponding to 'e').
#
# The one-hot encoding is an NxLxS matrix, where S is the 
# number of symbols in the alphabet, such that element [n][l][s]
# is 1 if the character at index (l) in sentence (n) has the
# symbol index (s), and 0 otherwise.
def onehot_encode(sentence, maxlen):
    x = np.zeros((maxlen, len(chars)), dtype=np.bool)
    for t, char in enumerate(sentence):
        x[t, char_indices[char]] = 1
    return x

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    x[i,:,:] = onehot_encode(sentence, maxlen)
    y[i, :] = onehot_encode(next_chars[i], 1)

print(x.shape)
print(y.shape)

(160033, 40, 57)
(160033, 57)


In [11]:
# Build the generator model: a single GRU layer with 128 cells.
#generator_model = Sequential()
#generator_model.add(GRU(128, input_shape=(maxlen, len(chars))))
#generator_model.add(Dense(len(chars)))
#generator_model.add(Activation('softmax'))

# You could experiment with NAdam instead of RMSProp.
#optimizer = RMSprop(lr=0.01)
#generator_model.compile(loss='categorical_crossentropy', optimizer=optimizer)
#trained_epochs = 0

In [12]:
#GRU generator
gru_generator_model = Sequential()
gru_generator_model.add(CuDNNGRU(64, return_sequences=True, input_shape=(maxlen, len(chars))))
gru_generator_model.add(GaussianNoise(1))
gru_generator_model.add(CuDNNGRU(128, return_sequences=False, input_shape=(maxlen, len(chars))))
gru_generator_model.add(Dense(len(chars)))
gru_generator_model.add(Activation('softmax'))

# You could experiment with NAdam instead of RMSProp.
gru_generator_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])
gru_trained_epochs = 0
gru_generator_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnngru_1 (CuDNNGRU)       (None, 40, 64)            23616     
_________________________________________________________________
gaussian_noise_1 (GaussianNo (None, 40, 64)            0         
_________________________________________________________________
cu_dnngru_2 (CuDNNGRU)       (None, 128)               74496     
_________________________________________________________________
dense_1 (Dense)              (None, 57)                7353      
_________________________________________________________________
activation_1 (Activation)    (None, 57)                0         
Total params: 105,465
Trainable params: 105,465
Non-trainable params: 0
_________________________________________________________________


In [13]:
#LSTM generator
lstm_generator_model = Sequential()
lstm_generator_model.add(CuDNNLSTM(128, return_sequences=False, input_shape=(maxlen, len(chars))))
lstm_generator_model.add(Dense(len(chars)))
lstm_generator_model.add(Activation('softmax'))

# You could experiment with NdaAm instead of RMSProp.
#optimizer = Nadam(lr=0.005)
lstm_generator_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics = ['accuracy'])
lstm_trained_epochs = 0
lstm_generator_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_1 (CuDNNLSTM)     (None, 128)               95744     
_________________________________________________________________
dense_2 (Dense)              (None, 57)                7353      
_________________________________________________________________
activation_2 (Activation)    (None, 57)                0         
Total params: 103,097
Trainable params: 103,097
Non-trainable params: 0
_________________________________________________________________


In [14]:
#Keras Model
keras_model = Sequential()
keras_model.add(LSTM(128, input_shape=(maxlen, len(chars))))
keras_model.add(Dense(len(chars), activation='softmax'))

opt = RMSprop(lr=0.01)
keras_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
keras_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               95232     
_________________________________________________________________
dense_3 (Dense)              (None, 57)                7353      
Total params: 102,585
Trainable params: 102,585
Non-trainable params: 0
_________________________________________________________________


In [15]:
#ML Model
ml_model = Sequential()
#ml_model.add(CuDNNLSTM(256, input_shape=(maxlen, len(chars))))
#ml_model.add(Dropout(0.2))
#ml_model.add(Dense(len(chars), activation='softmax'))

ml_model.add(CuDNNLSTM(256, input_shape=(maxlen, len(chars)), return_sequences=True))
ml_model.add(Dropout(0.2))
ml_model.add(CuDNNLSTM(256))
ml_model.add(Dropout(0.2))
ml_model.add(Dense(len(chars), activation='softmax'))

ml_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
ml_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_2 (CuDNNLSTM)     (None, 40, 256)           322560    
_________________________________________________________________
dropout_1 (Dropout)          (None, 40, 256)           0         
_________________________________________________________________
cu_dnnlstm_3 (CuDNNLSTM)     (None, 256)               526336    
_________________________________________________________________
dropout_2 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 57)                14649     
Total params: 863,545
Trainable params: 863,545
Non-trainable params: 0
_________________________________________________________________


In [16]:
def generate_sentence_list(seed_list, length=400, temperature=0.25, model_type='gru'):
    sentence_list_1 = [];
    generated_list_1 = [];
    n = len(seed_list)
    # copy lists
    temperature_1 = temperature
    for seed in seed_list:
        sentence_list_1.append(seed[:])
        generated_list_1.append(seed[:])
    
    for i in range(length):
      
        workdone = (i+1)*1.0 / length
        sys.stdout.write("\rgenerating {0:} sentences: [{1:20s}] {2:.1f}%".format(model_type,
                                                                                  '#' * int(workdone * 20), workdone*100))
        sys.stdout.flush()
            
        x_pred_list = np.zeros((n, maxlen, len(chars)))
        for j, sentence in enumerate(sentence_list_1):
            for t, char in enumerate(sentence):
                x_pred_list[j, t, char_indices[char]] = 1.

        start = time.time()
        if model_type == 'lstm':
            pred_list = lstm_generator_model.predict(x_pred_list, verbose=0)
        elif model_type == 'keras':
            pred_list = keras_generator_model.predict(x_pred_list, verbose=0)
        elif model_type == 'ml':
            pred_list = ml_model.predict(x_pred_list, verbose=0)
        else:
            pred_list = gru_generator_model.predict(x_pred_list, verbose=0)
        end = time.time()

        for j in range(n):
            next_index_1 = sample(pred_list[j,:], temperature_1)
            next_char_1 = indices_char[next_index_1]
            generated_list_1[j] += next_char_1
            sentence_list_1[j] = sentence_list_1[j][1:] + next_char_1
    
    sys.stdout.write(' - done\n')
    sys.stdout.flush()
    
    generated_list = generated_list_1
    return generated_list

In [31]:
def generate_sentence_list_n(seed_list, length=400, temperature=0.25, model_type='gru'):
    sentence_list_1 = [];
    sentence_list_2 = [];
    sentence_list_3 = [];
    generated_list_1 = [];
    generated_list_2 = [];
    generated_list_3 = [];
    n = len(seed_list)
    # copy lists
    temperature_1 = temperature
    temperature_2 = (temperature + 0.08)
    #temperature_3 = max(0.01, (temperature - 0.15))
    temperature_3 = (temperature + 0.15)
    for seed in seed_list:
        sentence_list_1.append(seed[:])
        sentence_list_2.append(seed[:])
        sentence_list_3.append(seed[:])
        generated_list_1.append(seed[:])
        generated_list_2.append(seed[:]) 
        generated_list_3.append(seed[:]) 
    
    for i in range(length):
      
        workdone = (i+1)*1.0 / length
        sys.stdout.write("\rgenerating {0:} sentences: [{1:20s}] {2:.1f}%"
                         .format(model_type, '#' * int(workdone * 20), workdone*100))
        sys.stdout.flush()
            
        x_pred_list = np.zeros((n, maxlen, len(chars)))
        for j, sentence in enumerate(sentence_list_1):
            for t, char in enumerate(sentence):
                x_pred_list[j, t, char_indices[char]] = 1.

        start = time.time()
        if model_type == 'lstm':
            pred_list = lstm_generator_model.predict(x_pred_list, verbose=0)
        elif model_type == 'keras':
            pred_list = keras_generator_model.predict(x_pred_list, verbose=0)
        elif model_type == 'ml':
            pred_list = ml_model.predict(x_pred_list, verbose=0)
        else:
            pred_list = gru_generator_model.predict(x_pred_list, verbose=0)
        end = time.time()

        for j in range(n):
            next_index_1 = sample(pred_list[j,:], temperature_1)
            next_char_1 = indices_char[next_index_1]
            generated_list_1[j] += next_char_1
            sentence_list_1[j] = sentence_list_1[j][1:] + next_char_1
            next_index_2 = sample(pred_list[j,:], temperature_2)
            next_char_2 = indices_char[next_index_2]
            generated_list_2[j] += next_char_2
            sentence_list_2[j] = sentence_list_2[j][1:] + next_char_2
            next_index_3 = sample(pred_list[j,:], temperature_3)
            next_char_3 = indices_char[next_index_3]
            generated_list_3[j] += next_char_3
            sentence_list_3[j] = sentence_list_3[j][1:] + next_char_3
    
    sys.stdout.write(' - done\n')
    sys.stdout.flush()
    
    generated_list = generated_list_1 + generated_list_2 + generated_list_3
    return generated_list

In [18]:
def print_sentences(seeds, sentences):
    for seed, sentence in zip(seeds, sentences):
        print('-'*5)
        sys.stdout.write('\x1b[32m')
        sys.stdout.write(sentence[0:len(seed)])
        sys.stdout.write('\x1b[34m')
        sys.stdout.write(sentence[len(seed):-1])
        sys.stdout.write('\x1b[m')
        sys.stdout.write('\n')    
        sys.stdout.flush()
        
def pick_sentences(n, maxlen):
    global text    
    start_index_list = np.random.randint(len(text) - maxlen - 1, size=(1, n)).flatten().tolist()
    seed_list = [] 
    for start_index in start_index_list:
        seed_list.append(text[start_index: start_index + maxlen])
    return seed_list

In [209]:
# Generate 3 seeds which we will use to inspect the progress of our training:
#preview_seeds = pick_sentences(3, maxlen=40)

# Train the model, output generated text after each iteration
for iteration in range(0, 1):
    print()
    print('-' * 50)
    print('Iteration', iteration)
    print('GRU')
    gru_generator_model.fit(x, y,
                  batch_size=1024,
                  epochs=60)
    print('LSTM')
    lstm_generator_model.fit(x, y,
                  batch_size=1024,
                  epochs=60)
    print('Keras')
    keras_model.fit(x, y, batch_size=128,
                   epochs=60)
    print('ML')
    ml_model.fit(x, y, batch_size=64, epochs=100)
    #generated_sentences = generate_sentence_list(preview_seeds)
    #print_sentences(preview_seeds, generated_sentences)


--------------------------------------------------
Iteration 0
GRU
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
LSTM
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/

Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
ML
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Ep

In [210]:
# For a more complete inspection, print out a load of sentences:
#
num_sentences = 100             # how many to generate
sentence_length = 40            # 100--400 is good
sample_temperature = 0.25       # see discussion of temperature up near the top
#sample_temperature = 0.1

start_index_list = np.random.randint(len(text) - maxlen - 1, size=(1, num_sentences)).flatten().tolist()
preview_seeds = [] 
for start_index in start_index_list:
    preview_seeds.append(text[start_index: start_index + maxlen])

#gru_generated_sentences = generate_sentence_list(preview_seeds, length=sentence_length, temperature=sample_temperature); 
#print_sentences(preview_seeds, gru_generated_sentences)

#lstm_generated_sentences = generate_sentence_list(preview_seeds, length=sentence_length, temperature=sample_temperature, 
#                                                 model_type='lstm'); 
#print_sentences(preview_seeds, lstm_generated_sentences)

#keras_generated_sentences = generate_sentence_list(preview_seeds, length=sentence_length, temperature=sample_temperature,
#                                                  model_type='keras'); 
#print_sentences(preview_seeds, keras_generated_sentences)

ml_generated_sentences = generate_sentence_list(preview_seeds, length=sentence_length, temperature=sample_temperature,
                                               model_type='ml'); 
print_sentences(preview_seeds, ml_generated_sentences)

generating ml sentences: [####################] 100.0% - done
-----
[32m to which sense and heart prompt them--a[34mnd who developed in the great spirit kn[m
-----
[32m complaisant and wanton surrender to the[34m suffering man indestingly was for the [m
-----
[32ma prejudice, perhaps a
prematureness or [34mexperiences, the great reasons of some
[m
-----
[32m our
knowledge can permit only pleasure [34min something desires to spirituality an[m
-----
[32mery seductive atmosphere--of the moral
m[34maxurs of the world, phologically discov[m
-----
[32mined to command, in whom the
judgment an[34md scholar, when in dream in the most en[m
-----
[32m been a long tragedy in its origin.

26.[34m what they all werragle of the world, p[m
-----
[32mon of many kinds of morality. in every "[34mnew for the post concernated in the pos[m
-----
[32mif we please, become sensible, even in o[34mur grammatical from the posters of his [m
-----
[32mand again and again,
the one of whom

-----
[32m he overturns whatever he
finds veiled o[34mf a god, and in a conduct concerns the [m
-----
[32malk
and laugh, and whom one may send to [34madvact the same advancement and of some[m
-----
[32m gives up everything for him, does not p[34mresent the most different from the posi[m
-----
[32m
conclusions with the artificial, as do [34mnot believe that in its own science.


[m
-----
[32mman has allowed himself to be
persuaded [34mthe distrustful and artists of mankind,[m
-----
[32mly have?) that which i always stood most[34m indifeetence, for the sake of latest g[m
-----
[32mnsters should be careful lest he thereby[34m alone who have done as a man of lates [m
-----
[32myoung, in the penal laws (which have an [34meldient and possestion of the powerful [m
-----
[32m, and because scientificness leads to
th[34me whole or all has the position of the [m
-----
[32mg similarities, yet in the same
mental c[34monscience to express in the promach of [m
-----
[32

In [211]:
# This is just a checkpoint, which will let you download and re-upload (or add to git) this model.
save_model(gru_generator_model, './gru_generator_model.h5')
save_model(lstm_generator_model, './lstm_generator_model.h5')
save_model(keras_model, './keras_generator_model.h5')
save_model(ml_model, './ml_generator_model.h5')

In [30]:
gru_generator_model = load_model('./gru_generator_model.h5')
lstm_generator_model = load_model('./lstm_generator_model.h5')
keras_model = load_model('./keras_generator_model.h5')
ml_model = load_model('./ml_generator_model.h5')
#ml_model = load_model('./cluster_generator_model.h5')

In [32]:
# Generating the training fake sentences for the Discriminator network
#
# These are saved to the file 'fake.pkl' -- you could download this to your
# user drive and re-upload it in a subsequent session, to save regenerating
# it again (in which case you don't need to evaluate this cell).

#training_seeds = pick_sentences(3000, maxlen=40)
training_seeds = pick_sentences(5000, maxlen=40)
#training_generated_sentences = (generate_sentence_list_n(training_seeds, length=40) 
#                                + generate_sentence_list_n(training_seeds, length=40, model_type='lstm'))
#training_generated_sentences = generate_sentence_list(training_seeds, length=40, temperature=0.1, model_type='lstm')
training_generated_sentences = generate_sentence_list_n(training_seeds, length=40, temperature=0.25, model_type='ml')

# Strip out the initial 40 chars (the seed sequence, which is genuine data from the corpus).
for i, sentence in enumerate(training_generated_sentences):
    training_generated_sentences[i] = sentence[40:40+40]
    
output = open('fake.pkl', 'wb')
pickle.dump(training_seeds, output)
pickle.dump(training_generated_sentences, output)
output.close()

generating ml sentences: [                    ] 2.5%



generating ml sentences: [####################] 100.0% - done


In [33]:
# Load the training set from the file
pkl_file = open('fake.pkl', 'rb')
training_seeds = pickle.load(pkl_file)
training_generated_sentences = pickle.load(pkl_file)
pkl_file.close()

In [34]:
# Make a 50:50 set of 'fake' (generated) and genuine sentences:
num_generated = len(training_generated_sentences)
print('Num generated: ', num_generated)
training_real_sentences = pick_sentences(num_generated, maxlen=40)

all_training_sentences = training_generated_sentences + training_real_sentences

n = len(all_training_sentences)
x = np.zeros((n, 40, len(chars)))
y = np.zeros((n, 1))
print('All training sequences: ', n)

for i, sentence in enumerate(all_training_sentences):
    x[i, :, :] = onehot_encode(sentence, maxlen=40)
y[num_generated:] = 1  # Encodes the fact that sentences with indexes larger than (num_generated) are real.

Num generated:  15000
All training sequences:  30000


In [23]:
class PlotLossAccuracy(keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.acc = []
        self.losses = []
        self.val_losses = []
        self.val_acc = []
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(int(self.i))
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.acc.append(logs.get('acc'))
        self.val_acc.append(logs.get('val_acc'))
        
        self.i += 1
        
        clear_output(wait=True)
        plt.figure(figsize=(16, 6))
        plt.plot([1, 2])
        plt.subplot(121) 
        plt.plot(self.x, self.losses, label="train loss")
        plt.plot(self.x, self.val_losses, label="validation loss")
        plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.title('Model Loss')
        plt.legend()
        plt.subplot(122)         
        plt.plot(self.x, self.acc, label="training accuracy")
        plt.plot(self.x, self.val_acc, label="validation accuracy")
        plt.legend()
        plt.ylabel('accuracy')
        plt.xlabel('epoch')
        plt.title('Model Accuracy')
        plt.gca().xaxis.set_major_locator(MaxNLocator(integer=True))
        plt.show();

In [53]:
print('Build model...')

# Define some layers here..

# Use your layers to create the model.
discriminator_model = Sequential()
#discriminator_model.add(LSTM(128, dropout=0.6, recurrent_dropout=0.7,
#                            input_shape=(maxlen, len(chars))))
#discriminator_model.add(LSTM(256, dropout=0.0, recurrent_dropout=0.01,
#                             kernel_regularizer=l2(0.2),
#                             input_shape=(maxlen, len(chars))))

#inputs = Input(shape=(40, 59))
#h = LSTM(256)(inputs)
#h = Dropout(0.2)(h)
#h = Dense(1024, activation='relu')(h)
#h = LSTM(256, return_sequences=False)(h)
#h = Dense(512, activation='tanh')(h)

#output = Dense(1, activation='softmax')(h)

discriminator_model = Sequential()
#discriminator_model.add(Embedding(4020, 40, input_length=59))
#discriminator_model.add(LSTM(256, dropout = 0.2, return_sequences=True, 
#                             recurrent_dropout = 0.2, input_shape=(maxlen, len(chars))))
#                             unit_forget_bias=True, kernel_regularizer=l1(0.01), recurrent_regularizer=l2(0.01)))
#discriminator_model.add(GRU(128, input_shape=(maxlen, len(chars))))
#discriminator_model.add(BatchNormalization())
#discriminator_model.add(LSTM(256, recurrent_dropout=0.0, return_sequences=False, input_shape=(maxlen, len(chars))))
#discriminator_model.add(LSTM(128))
#discriminator_model.add(Conv1D(64, 5, activation='relu', padding='valid', input_shape=(maxlen, len(chars))))
#discriminator_model.add(Conv1D(32, 3, activation='tanh', padding='same'))
#discriminator_model.add(Dropout(0.7))
#discriminator_model.add(Dense(len(chars), activation='relu'))
#discriminator_model.add(LSTM(128, return_sequences=False))
#discriminator_model.add(Flatten())
#discriminator_model.add(Dropout(0.7))
#discriminator_model.add(LSTM(256, dropout = 0.5, return_sequences=True, recurrent_dropout = 0.2))
#discriminator_model.add(GRU(64))
#discriminator_model.add(Flatten())
#discriminator_model.add(Dense(len(chars), activation='softmax'))
#discriminator_model.add(Dense(1, activation='sigmoid'))
#discriminator_model.add(Dense(1024))
#discriminator_model.add(LeakyReLU(0.2))
#discriminator_model.add(Dense(512))
#discriminator_model.add(LeakyReLU(0.2))
#discriminator_model.add(Dropout(0.4))
#discriminator_model.add(LSTM(256))
#discriminator_model.add(Dropout(0.1))
#discriminator_model.add(Dense(1))

discriminator_model.add(GRU(64, use_bias=True, dropout=0.2, recurrent_dropout=0.2,
                            input_shape=(maxlen, len(chars))))

discriminator_model.add(Dense(1, activation='sigmoid'))
#opt = RMSprop(lr=0.001)
opt = Nadam(lr=0.001)
# Setup the optimisation strategy.
discriminator_model.compile(optimizer=opt,
                    loss='binary_crossentropy',
                    metrics=['accuracy'])
                             
print('compiled.')
discriminator_model.summary()

Build model...
compiled.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
gru_9 (GRU)                  (None, 64)                23424     
_________________________________________________________________
dense_13 (Dense)             (None, 1)                 65        
Total params: 23,489
Trainable params: 23,489
Non-trainable params: 0
_________________________________________________________________


In [54]:
[x_train, x_test, y_train, y_test] = train_test_split(x, y, test_size=0.35, random_state=42)
discriminator_model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=5, batch_size=4)
#pltCallBack = PlotLossAccuracy()
#discriminator_model.fit(x_train, y_train, 
#                        validation_data=(x_test, y_test), 
#                        epochs=3, batch_size=64,
#                        callbacks=[pltCallBack])

Train on 19500 samples, validate on 10500 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x266eafc3208>

In [55]:
# Once you're happy with your discriminator model, evaluate this cell to save it:
save_model(discriminator_model, './discriminator_model.h5')
# Run these commands in the terminal to submit your model for assessment.
# git add lab-07/discriminator_model.h5
# git commit -m "Add/update discriminator model."
# git push
# submit-lab 7

score,train = discriminator_model.evaluate(x_train, y_train, batch_size = 64, verbose=0)
score,acc = discriminator_model.evaluate(x_test, y_test, batch_size = 64, verbose=0)
print("Score: %.2f" % (score))
print("Training Accuracy: %.2f%%" % (train*100))
print("Validation Accuracy: %.2f%%" % (acc*100))

Score: 0.30
Training Accuracy: 89.44%
Validation Accuracy: 86.53%
