In [12]:
with open("airport_reviews_short.csv", encoding="utf-8") as fp:
    reviews_text = fp.read()

In [13]:
chars_list = sorted(list(set(reviews_text)))
char_to_index_dict = {
    character: chars_list.index(character) for character in chars_list
}

In [14]:
char_to_index_dict

{'\n': 0,
 ' ': 1,
 '&': 2,
 "'": 3,
 '(': 4,
 ')': 5,
 '-': 6,
 '.': 7,
 '/': 8,
 '0': 9,
 '1': 10,
 '2': 11,
 '3': 12,
 '5': 13,
 'A': 14,
 'B': 15,
 'D': 16,
 'E': 17,
 'F': 18,
 'I': 19,
 'J': 20,
 'K': 21,
 'L': 22,
 'M': 23,
 'N': 24,
 'O': 25,
 'S': 26,
 'T': 27,
 'U': 28,
 'W': 29,
 'X': 30,
 'a': 31,
 'b': 32,
 'c': 33,
 'd': 34,
 'e': 35,
 'f': 36,
 'g': 37,
 'h': 38,
 'i': 39,
 'j': 40,
 'k': 41,
 'l': 42,
 'm': 43,
 'n': 44,
 'o': 45,
 'p': 46,
 'q': 47,
 'r': 48,
 's': 49,
 't': 50,
 'u': 51,
 'v': 52,
 'w': 53,
 'x': 54,
 'y': 55}

In [15]:
import keras
from keras import layers

max_length = 40
rnn = keras.models.Sequential()
rnn.add(
    layers.LSTM(1024, input_shape=(max_length, len(chars_list)), return_sequences=True)
)
rnn.add(layers.LSTM(1024, input_shape=(max_length, len(chars_list))))
rnn.add(layers.Dense(len(chars_list), activation="softmax"))

In [16]:
optimizer = keras.optimizers.SGD(lr=0.01, decay=1e-6, nesterov=True)
rnn.compile(loss="categorical_crossentropy", optimizer=optimizer)

In [17]:
import numpy as np


def text_to_vector(input_txt, max_length):
    """Reads in the text and vectorizes it.
    X will consist of consecutive sequences of characters. 
    Y will consist of the next character.
    """
    sentences = []
    next_characters = []
    for i in range(0, len(input_txt) - max_length):
        sentences.append(input_txt[i : i + max_length])
        next_characters.append(input_txt[i + max_length])
    X = np.zeros((len(sentences), max_length, len(chars_list)))
    y = np.zeros((len(sentences), len(chars_list)))
    for i, sentence in enumerate(sentences):
        for t, char in enumerate(sentence):
            X[i, t, char_to_index_dict[char]] = 1
            y[i, char_to_index_dict[next_characters[i]]] = 1
    return [X, y]

In [18]:
X, y = text_to_vector(reviews_text, max_length)
rnn.fit(X, y, batch_size=256, epochs=1)

Epoch 1/1


<keras.callbacks.callbacks.History at 0x2263dc149b0>

In [19]:
rnn.save_weights("weights.hdf5")