In [1]:
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, Dense, Dropout, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
import tensorflow.keras.utils as ku
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras.models import Model

class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = tf.keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

# Importing text data
from google.colab import files
uploaded = files.upload()
from data import text_data_arr
data = text_data_arr

tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(data)
total_words = len(tokenizer.word_index) + 1

# convert data to sequence of tokens
input_sequences = []
output_sequences = []

for line in data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence[:-1])
        output_sequences.append(n_gram_sequence[-1])

# pad sequences
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

# create predictors and label
predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
label = ku.to_categorical(label, num_classes=total_words)

embed_dim = 10
num_heads = 2
ff_dim = 32

inputs = layers.Input(shape=(max_sequence_len-1,))
embedding_layer = Embedding(total_words, embed_dim, input_length=max_sequence_len-1)(inputs)
transformer_block1 = TransformerBlock(embed_dim, num_heads, ff_dim)(embedding_layer)
transformer_block2 = TransformerBlock(embed_dim, num_heads, ff_dim)(transformer_block1)
pooling = GlobalAveragePooling1D()(transformer_block2)
mlp = layers.Dense(64, activation='relu')(pooling)
mlp = layers.Dense(32, activation='relu')(mlp)
outputs = layers.Dense(total_words, activation='softmax')(mlp)

model = Model(inputs=inputs, outputs=outputs)

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')
model.fit(predictors, label, epochs=30, verbose=1)

Saving data.py to data.py
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f5b2ed545e0>

In [3]:
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted_probs = model.predict(token_list, verbose=0)
        predicted = np.argmax(predicted_probs, axis=-1)

        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += output_word
    return seed_text

print(generate_text("John: How are you, Sarah?", 100, model, max_sequence_len))

John: How are you, Sarah?                                                                                                    
