In [1]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout, LSTM
from keras.optimizers import RMSprop

import numpy as np

import io

Using TensorFlow backend.


In [2]:
SEQUENCE_LENGTH = 50

In [3]:
with io.open("lirik_lp.txt", 'r', encoding='utf8') as f:
    lirik = f.read().lower()

In [4]:
chars = sorted(list(set(lirik)))
print(chars)

['\n', ' ', '!', '"', "'", '(', ')', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', ':', ';', '?', '[', ']', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'á', 'ç', '—', '’', '…']


In [5]:
sequences = []
next_chars = []
for i in range(0, len(lirik) - SEQUENCE_LENGTH):
    sequences.append(lirik[i: i + SEQUENCE_LENGTH])
    next_chars.append(lirik[i + SEQUENCE_LENGTH])

In [6]:
char_to_index = dict((c, i) for i, c in enumerate(chars)) 
index_to_char = dict((i, c) for i, c in enumerate(chars))

In [7]:
X = np.zeros((len(sequences), SEQUENCE_LENGTH, len(chars)), dtype=np.bool)
y = np.zeros((len(sequences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sequences):
    for t, char in enumerate(sentence):
        X[i, t, char_to_index[char]] = 1
    y[i, char_to_index[next_chars[i]]] = 1

In [8]:
model = Sequential()
model.add(LSTM(128, input_shape=(SEQUENCE_LENGTH, len(chars))))
model.add(Dropout(0.2))
model.add(Dense(128, activation='relu'))
model.add(Dense(len(chars), activation='softmax'))

optimizer = RMSprop(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=optimizer)
model.summary()

W0720 16:26:17.634644 140251119593280 deprecation_wrapper.py:119] From /home/golok/anaconda3/envs/machinelearning/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0720 16:26:18.015065 140251119593280 deprecation_wrapper.py:119] From /home/golok/anaconda3/envs/machinelearning/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0720 16:26:18.125256 140251119593280 deprecation_wrapper.py:119] From /home/golok/anaconda3/envs/machinelearning/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0720 16:26:19.584431 140251119593280 deprecation_wrapper.py:119] From /home/golok/anaconda3/envs/machinelearning/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:133: The n

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               94720     
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_2 (Dense)              (None, 56)                7224      
Total params: 118,456
Trainable params: 118,456
Non-trainable params: 0
_________________________________________________________________


In [9]:
model.fit(X, y, batch_size=128, epochs=20)

Instructions for updating:
Use tf.cast instead.
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1340a92c7b8>

In [10]:
model.save('lirik_model.h5')

In [8]:
model = load_model("lirik_model.h5")
model.summary()

W0720 20:55:11.222376 140340736927552 deprecation_wrapper.py:119] From /home/golok/anaconda3/envs/machinelearning/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0720 20:55:13.233971 140340736927552 deprecation_wrapper.py:119] From /home/golok/anaconda3/envs/machinelearning/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0720 20:55:13.379240 140340736927552 deprecation_wrapper.py:119] From /home/golok/anaconda3/envs/machinelearning/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0720 20:55:15.057075 140340736927552 deprecation_wrapper.py:119] From /home/golok/anaconda3/envs/machinelearning/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:174: The n

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 128)               94720     
_________________________________________________________________
dense_1 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_2 (Dense)              (None, 56)                7224      
Total params: 118,456
Trainable params: 118,456
Non-trainable params: 0
_________________________________________________________________


In [9]:
def get_highest_index(preds):
    preds = np.asarray(preds).astype('float64')
    return np.argmax(preds)

In [11]:
generated = ''
sentence = "Blame me for my past, but I just want to see you\nI"
sentence = sentence.lower()
generated += sentence

print('----- Generating with seed: "' + sentence + '"')
print(generated, end='')

for i in range(400):
    x = np.zeros((1, SEQUENCE_LENGTH, len(chars)))
    for t, char in enumerate(sentence):
        x[0, t, char_to_index[char]] = 1.

    predictions = model.predict(x, verbose=0)[0]
    next_index = get_highest_index(predictions)
    next_char = index_to_char[next_index]

    generated += next_char
    sentence = sentence[1:] + next_char

    print(next_char, end='')

----- Generating with seed: "blame me for my past, but i just want to see you
i"
blame me for my past, but i just want to see you
i    t  t   t                   t t         t t   t     t t  t  t      t  t t    t      t t t     t t  t t    t      t    t  t    t         t    t t   t    t      t     to          t  t t t       t      t  t     t t  t   t                       t  t   t      t   tt                 t              t  t  t    t    t       t             t          t   t t tt     th   t   t t  t           t t  t        