# Long Short-Term Memory

A Long Short-Term Memory (LSTM) network is a Recurent Neural Network (RNN) that has the capacity to remember.

Let's build a LSTM network "from scratch" to have a better understanding!

## Hand Made LSTM Network

In [1]:
import os
import numpy as np
import pandas as pd
import pickle
import re

import tensorflow as tf
from keras.preprocessing.text import Tokenizer
from keras.models import Model
from keras.layers import Input, Embedding, LSTM, Dense, Dropout
from keras.optimizers import RMSprop
from keras.callbacks import Callback, LearningRateScheduler
from keras.callbacks import ModelCheckpoint
from keras.utils import plot_model

# Clear TensorFlow session
tf.keras.backend.clear_session()

import matplotlib.pyplot as plt

2023-12-09 21:40:43.993920: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-09 21:40:44.075001: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-09 21:40:44.445728: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-09 21:40:44.445834: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-09 21:40:44.534255: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

### Download Data

In [2]:
# !bash data_script.sh 11339 aesop

### Load Data

In [2]:
def preprocess(text, start_story):
    text = text.lower()
    text = start_story + text
    text = text.replace("\n\n\n\n\n", start_story)
    text = text.replace("\n", " ")   # Replace newline character by a space
    text = re.sub("  +", ". ", text) # Replace space followed by oen or more space by a point and a space
    text = text.strip()              # Remove leading and trailing spaces
    text = text.replace("..", ".")   # Replace double point by a single point
    text = re.sub('([!"#$%&()*+,-./:;<=>?@[\]^_`{|}~])', r' \1', text)
    text = re.sub("\s{2,}", " ", text) # Replace two whitespace character by one space
    return text

In [3]:
FILE_PATH = "./data/aesop/data.txt"

SEQ_LENGTH = 20
START_STORY = "|" * SEQ_LENGTH

with open(FILE_PATH, encoding="utf-8-sig") as f:
    text = f.read()

# Preprocessing
text = preprocess(text, START_STORY)

# Tokenization
tokenizer = Tokenizer(char_level=False,
                      filters="")
tokenizer.fit_on_texts([text])
NB_TOTAL_WORDS = len(tokenizer.word_index) + 1
token_list = tokenizer.texts_to_sequences([text])[0]

print(f"Total words = {NB_TOTAL_WORDS}")
print(f"Tokenizer word index = {tokenizer.word_index}")
print(f"Tokenizer token list = {tokenizer.texts_to_sequences([text])}")

Total words = 5084
Tokenizer word index = {'|': 1, 'the': 2, ',': 3, '.': 4, 'and': 5, 'a': 6, 'to': 7, 'of': 8, 'he': 9, 'his': 10, 'in': 11, '"': 12, 'you': 13, 'was': 14, 'him': 15, 'for': 16, 'it': 17, 'that': 18, 'with': 19, 'but': 20, 'at': 21, 'they': 22, 'said': 23, 'as': 24, 'i': 25, '|the': 26, ':': 27, 'by': 28, 'on': 29, 'when': 30, 'be': 31, ';': 32, 'so': 33, 'one': 34, 'had': 35, 'all': 36, 'is': 37, 'them': 38, 'not': 39, 'up': 40, 'who': 41, 'lion': 42, 'her': 43, 'fox': 44, 'out': 45, 'have': 46, 'your': 47, 'are': 48, 'were': 49, 'me': 50, 'which': 51, 'no': 52, 'their': 53, 'this': 54, 'man': 55, 'ass': 56, 'my': 57, 'if': 58, 'wolf': 59, 'or': 60, 'an': 61, 'from': 62, 'she': 63, 'into': 64, 'do': 65, '!': 66, 'about': 67, 'time': 68, 'came': 69, '?': 70, 'there': 71, 'will': 72, 'upon': 73, 'then': 74, 'what': 75, 'himself': 76, 'any': 77, 'some': 78, 'once': 79, 'day': 80, 'project': 81, 'would': 82, 'other': 83, 'than': 84, 'very': 85, 'work': 86, 'went': 87, 'c

In [4]:
print(f'Tokenizer token list = {tokenizer.texts_to_sequences(["man the lion"])}')
print(f'Pre-processed text sample = {preprocess("man the lion", START_STORY)}')
print(f'Tokenizer token list = {tokenizer.texts_to_sequences([preprocess("man the lion", START_STORY)])}')

Tokenizer token list = [[55, 2, 42]]
Pre-processed text sample =  | | | | | | | | | | | | | | | | | | | |man the lion
Tokenizer token list = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 42]]


In [5]:
def generate_sequences(token_list, sequence_length, step, nb_classes):
    X = []
    y = []
    for i in range(0, len(token_list) - sequence_length, step):
        X.append(token_list[i:i+sequence_length])
        y.append(token_list[i+sequence_length])

    y = tf.keras.utils.to_categorical(y, num_classes=nb_classes)

    nb_seq = len(X)
    print(f"Number of sequences = {nb_seq}")

    return X, y, nb_seq

In [6]:
# Build dataset
STEP = 1

X, y, NB_SEQ = generate_sequences(token_list, SEQ_LENGTH, STEP, NB_TOTAL_WORDS)

X = np.array(X)
y = np.array(y)

Number of sequences = 57231


### LSTM Network

In [8]:
class LSTM_RNN():

    def __init__(self, nb_units, embedding_size, total_words, dropout_rate=0.2,learning_rate=0.001):
        self.nb_units = nb_units
        self.embedding_size = embedding_size
        self.total_words = total_words
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        
        # Input
        self.input = Input(shape=(None,))

        # Layers
        x = Embedding(input_dim=self.total_words,
                      output_dim=self.embedding_size,
                      name="embedding")(self.input)
        x = LSTM(units=self.nb_units,
                 name="lstm")(x)
        x = Dropout(rate=dropout_rate,
                    name="dropout")(x)
        
        # Output
        self.output = Dense(units=self.total_words,
                            activation="softmax",
                            name="output")(x)
        
        # Model
        self.model = Model(self.input, self.output)

        # Compile
        self.optimizer = RMSprop(learning_rate=self.learning_rate)
        self.model.compile(optimizer=self.optimizer,
                           loss="categorical_crossentropy")
        
    def summary(self):
        self.model.summary()

    def plot_model(self, file_path="model/viz/model.png"):
        plot_model(self.model, to_file=file_path, show_shapes=True, show_layer_names=True)

    def load_weights(self, file_path="./model/weights/save"):
        self.model.load_weights(file_path)
        
    def fit(self, X_train, y_train, batch_size=32, epochs=100):
        self.model.fit(x=X_train,
                       y=y_train,
                       batch_size=batch_size,
                       epochs=epochs,
                       shuffle=True)
        
    def predict(self, x):
        return self.model.predict(x)
    
    def save_weights(self, file_path="./model/weights/save"):
        self.model.save_weights(file_path)

In [9]:
NB_UNITS = 256
EMBEDDING_SIZE = 100
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

lstm = LSTM_RNN(nb_units=NB_UNITS,
                embedding_size=EMBEDDING_SIZE,
                total_words=NB_TOTAL_WORDS,
                dropout_rate=DROPOUT_RATE,
                learning_rate=LEARNING_RATE)

In [10]:
lstm.plot_model()

In [11]:
lstm.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 100)         508400    
                                                                 
 lstm (LSTM)                 (None, 256)               365568    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 output (Dense)              (None, 5084)              1306588   
                                                                 
Total params: 2180556 (8.32 MB)
Trainable params: 2180556 (8.32 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


### Stacked-LSTM Network

In [12]:
class Stacked_LSTM_RNN():

    def __init__(self, nb_units, embedding_size, total_words, dropout_rate=0.2,learning_rate=0.001):
        self.nb_units = nb_units
        self.embedding_size = embedding_size
        self.total_words = total_words
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        
        # Input
        self.input = Input(shape=(None,))

        # Layers
        x = Embedding(input_dim=self.total_words,
                      output_dim=self.embedding_size,
                      name="embedding")(self.input)
        x = LSTM(units=self.nb_units,
                 return_sequences=True,
                 name="lstm_1")(x)
        x = LSTM(units=self.nb_units,
                 name="lstm_2")(x)
        x = Dropout(rate=dropout_rate,
                    name="dropout")(x)
        
        # Output
        self.output = Dense(units=self.total_words,
                            activation="softmax",
                            name="output")(x)
        
        # Model
        self.model = Model(self.input, self.output)

        # Compile
        self.optimizer = RMSprop(learning_rate=self.learning_rate)
        self.model.compile(optimizer=self.optimizer,
                           loss="categorical_crossentropy")
        
    def summary(self):
        self.model.summary()

    def plot_model(self, file_path="model/viz/model.png"):
        plot_model(self.model, to_file=file_path, show_shapes=True, show_layer_names=True)

    def load_weights(self, file_path="./model/weights/save"):
        self.model.load_weights(file_path)
        
    def fit(self, X_train, y_train, batch_size=32, epochs=100):
        self.model.fit(x=X_train,
                       y=y_train,
                       batch_size=batch_size,
                       epochs=epochs,
                       shuffle=True)
        
    def predict(self, x):
        return self.model.predict(x)
    
    def save_weights(self, file_path="./model/weights/save"):
        self.model.save_weights(file_path)

In [13]:
NB_UNITS = 256
EMBEDDING_SIZE = 100
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

stacked_lstm = Stacked_LSTM_RNN(nb_units=NB_UNITS,
                                embedding_size=EMBEDDING_SIZE,
                                total_words=NB_TOTAL_WORDS,
                                dropout_rate=DROPOUT_RATE,
                                learning_rate=LEARNING_RATE)

In [14]:
stacked_lstm.plot_model()

In [15]:
stacked_lstm.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, None)]            0         
                                                                 
 embedding (Embedding)       (None, None, 100)         508400    
                                                                 
 lstm_1 (LSTM)               (None, None, 256)         365568    
                                                                 
 lstm_2 (LSTM)               (None, 256)               525312    
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 output (Dense)              (None, 5084)              1306588   
                                                                 
Total params: 2705868 (10.32 MB)
Trainable params: 2705868 

### Train

In [12]:
BATCH_SIZE = 32
EPOCHS = 100

lstm.fit(X, y, BATCH_SIZE, EPOCHS)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100

KeyboardInterrupt: 

In [17]:
BATCH_SIZE = 32
EPOCHS = 4

stacked_lstm.fit(X, y, BATCH_SIZE, EPOCHS)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


### Evaluate LSTM Network

### Save Model

In [13]:
lstm.save_weights()

In [None]:
stacked_lstm.save_weights()

### Load Pre-Trained Model

In [None]:
lstm.load_weights()

In [None]:
stacked_lstm.load_weights()

### Predictions

In [18]:
def sample_with_temp(preds, temperature=1.0):
    """
    Sample an index from a probability array.
    """
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probs = np.random.multinomial(1, preds, 1)
    return np.argmax(probs)

In [19]:
def generate_text(tokenizer, model, seed_text, nb_words, max_sequence_length, temperature):
    output_text = seed_text
    seed_text = preprocess(seed_text, START_STORY)
    seed_text = seed_text
    print(f"seed_text = {seed_text}")

    for i in range(nb_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = token_list[-max_sequence_length:] # take the last max_sequence_length tokens (cap the input sequence to improve performance)
        token_list = np.reshape(token_list, (1, max_sequence_length))

        probs = model.predict(token_list, verbose=0)[0]
        y_class = sample_with_temp(probs, temperature)

        output_word = tokenizer.index_word[y_class] if y_class > 0 else ""

        if output_word == "|":
            break

        seed_text += output_word + " "
        output_text += output_word + " "

    return output_text

In [21]:
# Seed text
seed_text = "the lion and the man"

In [18]:
output_text = generate_text(tokenizer=tokenizer,
                            model=lstm.model,
                            seed_text=seed_text,
                            nb_words=100,
                            max_sequence_length=20,
                            temperature=0.2)

print(output_text)

seed_text =  | | | | | | | | | | | | | | | | | | | |the lion and the man
the lion and the man. lion . a lion and a lion fell into the lamb and turned up the road and his driver , and the lion , and the other had she should the eagle and said , "i am not to go to kill the man . " the bull replied , "oh , i am never been down to the head , and you must be down to the land , and we shall not been to get over the head of being afraid of it , and said , "you am think , i am have to come to 


In [22]:
output_text = generate_text(tokenizer=tokenizer,
                            model=stacked_lstm.model,
                            seed_text=seed_text,
                            nb_words=100,
                            max_sequence_length=20,
                            temperature=0.2)

print(output_text)

seed_text =  | | | | | | | | | | | | | | | | | | | |the lion and the man
the lion and the man. fox . a fox was a man , and , and , , and , , said , " said he , " said the fox , "i you , " said he , " said he , " said the lion , "you i , " said you , " said he , " said the fox , "you have , " said he , " said he , "you you , " said the lion , " said the fox , and said , " said the fox , "you have not a time , " said 
