In [1]:
from keras.models import Sequential, Model, load_model
from keras.layers import Dense, Activation, Dropout, LSTM, Input

import numpy as np

Using TensorFlow backend.


In [2]:
SEQUENCE_LENGTH = 50
latent_dim = 256

In [3]:
with open("lirik_lp_woendl.txt", 'r', encoding='utf8') as f:
    lirik = f.read().lower().split("\n")
    
chars = set()
for index, line in enumerate(lirik):
    lirik[index] = "\t{}\n".format(lirik[index])
    for char in lirik[index]:
        if char not in chars:
            chars.add(char)
    
lirik

['\twhy does it feel like night today?\n',
 "\tsomething in here's not right today.\n",
 '\twhy am i so uptight today?\n',
 "\tparanoia's all i got left\n",
 "\ti don't know what stressed me first\n",
 '\tor how the pressure was fed\n',
 '\tbut i know just what it feels like\n',
 '\tto have a voice in the back of my head\n',
 '\tlike a face that i hold inside\n',
 '\ta face that awakes when i close my eyes\n',
 '\ta face that watches every time i lie\n',
 '\ta face that laughs every time i fall\n',
 '\t(and watches everything)\n',
 "\tso i know that when it's time to sink or swim\n",
 '\tthat the face inside is here in me\n',
 '\tright underneath my skin\n',
 "\tit's like i'm paranoid lookin' over my back\n",
 "\tit's like a whirlwind inside of my head\n",
 "\tit's like i can't stop what i'm hearing within\n",
 "\tit's like the face inside is right beneath my skin\n",
 "\ti know i've got a face in me\n",
 '\tpoints out all my mistakes to me\n',
 "\tyou've got a face on the inside too\n

In [4]:
chars = sorted(list(chars))
print(chars)

['\t', '\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', ':', ';', '?', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'á', 'ç', '—', '’', '…']


In [5]:
char_to_index = dict((c, i) for i, c in enumerate(chars)) 
index_to_char = dict((i, c) for i, c in enumerate(chars))

In [6]:
sequences = []
next_sequences = []
for i, line in enumerate(lirik):
    if i != len(lirik) - 1:
        sequences.append(line)
        next_sequences.append(lirik[i + 1])

In [7]:
max_sequences_length = max([len(line) for line in sequences])
max_next_sequences_length = max([len(line) for line in next_sequences])

In [8]:
nb_samples = len(sequences)
nb_chars = len(chars)

tokenized_sequences = np.zeros((nb_samples, max_sequences_length, nb_chars), dtype='float32')
tokenized_next_sequences = np.zeros((nb_samples, max_next_sequences_length, nb_chars), dtype='float32')
target_data = np.zeros((nb_samples, max_next_sequences_length, nb_chars), dtype='float32')

for i in range(nb_samples):
    for j, char in enumerate(sequences[i]):
        tokenized_sequences[i, j, char_to_index[char]] = 1
        
    for j, char in enumerate(next_sequences[i]):
        tokenized_next_sequences[i, j, char_to_index[char]] = 1

        if j > 0:
            target_data[i, j - 1, char_to_index[char]] = 1

In [9]:
encoder_input = Input((None, nb_chars))
encoder_LSTM = LSTM(latent_dim, return_state = True)
encoder_outputs, encoder_h, encoder_c = encoder_LSTM (encoder_input)
encoder_states = [encoder_h, encoder_c]

decoder_input = Input(shape=(None, nb_chars))
decoder_LSTM = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_out, _ , _ = decoder_LSTM(decoder_input, initial_state=encoder_states)
decoder_dense = Dense(nb_chars, activation='softmax')
decoder_out = decoder_dense(decoder_out)

model = Model(inputs=[encoder_input, decoder_input], outputs=[decoder_out])

model.compile(optimizer='rmsprop', loss='categorical_crossentropy')








In [None]:
model.fit(
    x = [tokenized_sequences, tokenized_next_sequences], 
    y = target_data,
    batch_size = 64,
    epochs = 30,
    validation_split = 0.2)

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where



Train on 2789 samples, validate on 698 samples
Epoch 1/30





Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30

In [None]:
model.save('seq2seq_model.h5')

In [57]:
model = load_model("seq2seq_model.h5")
encoder_inputs = model.input[0]
encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output
encoder_states = [state_h_enc, state_c_enc]
encoder_model = Model(encoder_inputs, encoder_states)

decoder_inputs = model.input[1]
decoder_state_input_h = Input(shape=(latent_dim,))
decoder_state_input_c = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
decoder_lstm = model.layers[3]
decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state = decoder_states_inputs)
decoder_states = [state_h, state_c]
decoder_dense = model.layers[4]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)

def decode_sequence(input_seq):
    states_value = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1, nb_chars))
    target_seq[0, 0, char_to_index['\t']] = 1.

    stop_condition = False
    decoded_sentence = ''
    while not stop_condition:
        output_tokens, h, c = decoder_model.predict([target_seq] + states_value)

        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_char = index_to_char[sampled_token_index]
        decoded_sentence += sampled_char

        if (sampled_char == '\n' or
           len(decoded_sentence) > max_decoder_seq_length):
            stop_condition = True

        target_seq = np.zeros((1, 1, num_decoder_tokens))
        target_seq[0, 0, sampled_token_index] = 1.

        states_value = [h, c]

    return decoded_sentence

for seq_index in range(100):
    input_seq = tokenized_sequences[seq_index: seq_index + 1]
    decoded_sentence = decode_sequence(input_seq)
    print('-')
    print('Input sentence:', input_texts[seq_index])
    print('Decoded sentence:', decoded_sentence)

KeyError: '\t'