In [29]:
pip install -q "tensorflow-text==2.8.*"

Note: you may need to restart the kernel to use updated packages.


In [33]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
import numpy as np
import warnings
warnings.filterwarnings('always')
warnings.filterwarnings('ignore')

In [35]:
#Open data
data_file = open('Data.txt').read()
data_file

#Create tokenizer object
tokenizer = Tokenizer()

#Convert data to lowercase
data = data_file.lower().split('.')

In [24]:
def Text_Vectorization(tokenizer, data, max_sentence_length):
    #Create dictionary of words with the frequency they occur
    #Every word gets unique value > 0
    #0 is reserved for padding
    tokenizer.fit_on_texts(data)

    #Transforms sentences into set of integers from the dictionary
    input_sequences = tokenizer.texts_to_sequences(data)
    
    #Pad sequences to length of max_length
    input_sequences = tf.keras.preprocessing.sequence.pad_sequences(input_sequences, maxlen = max_sentence_length, padding ='post', value = 0)
    
    #Counts total words
    total_words = len(tokenizer.word_index) + 1
    
    return total_words, input_sequences    

In [25]:
def Create_Output_Sequences(input_sequences):
    #Shift array by one to create targets
    #Last item is all 0s because it is not used
    roll_amount = input_sequences.shape[1] * (input_sequences.shape[0]-1)
    output_sequences = np.array(np.roll(input_sequences, roll_amount))
    output_sequences[-1] = 0

In [38]:
#Embed one input sentence at a time into a matrix of sentence length by embed_dim
#Adds position embeddings to word matrix
class Embed_Inputs(keras.layers.Layer):
    def __init__(self, total_words, embed_dim, max_sentence_length):
        self.embedding_layer = keras.layers.Embedding(input_dim = total_words, output_dim = embed_dim, mask_zero = True, input_length = max_sentence_length)
        self.positions_layer = keras.layers.position_embedding_layer(input_dim = max_sentence_length, output_dim = embed_dim)
        
    def Forward(self, data):
        self.embedded_words = self.embedding_layer(data) + self.positions_layer(tf.range(max_sentence_length))
        return self.embedded_words

In [40]:
class Transformer_Block(keras.layers.Layer):
    def __init__(self, num_heads, embed_dim, ff_dim, dropout):
        self.attention = keras.layers.MultiHeadAttention(num_heads = num_heads, key_dim = embed_dim)
        self.layer_norm1 = keras.layers.LayerNormalization(epsilon = 1e-6)
        self.feed_forward = keras.Sequential([keras.layers.Dense(ff_dim, activation = "relu"), keras.layers.Dense(embed_dim)])
        self.layer_norm2 = keras.layers.LayerNormalization(1e-6)
        self.dropout = keras.layers.dropout(dropout)
        
    def Forward(self, embedded_words, training):
        attention_output = self.attention(embedded_words, embedded_words)
        attention_output = self.dropout1(attention_output, training=training)
        #Residual connections from input to add/norm
        res1 = self.layer_norm1(embedded_words + attention_output)
        ffn_output = self.feed_forward(res1)
        ffn_output = self.dropout2(ffn_output, training=training)
        #Residual connections from add/norm to add/norm
        final_output = self.layer_norm2(ffn_output + res1)
        return final_output

In [53]:
class Encoder(keras.layers.Layer):
    def __init__(self, data, training, total_words, embed_dim, max_sentence_length, num_heads, ff_dim, dropout):
        self.embeddings = Embed_Inputs(total_words, embed_dim, max_sentence_length)
        self.transformer_block = Transformer_Block(num_heads, embed_dim, ff_dim, dropout)
        
    def Forward(self, data):
        embeddings_output = self.embeddings.Forward(data)
        transformer_block_output = self.transformer_block.Forward(embeddings_output, training)
        return transformer_block_output

In [42]:
class Decoder(keras.layers.Layer):
    def __init__(self, num_heads, embed_dim, ff_dim, dropout,  data, training, total_words, max_sentence_length, num_heads):
        #Masked positional embeddings
        #Masked mutlihead attention
        self.encoder = Encoder(data, training, total_words, embed_dim, max_sentence_length, num_heads, ff_dim, dropout)
        self.transformer_block = Transformer_Block(num_heads, embed_dim, ff_dim, dropout)
        
    def Forward(self, ):
        encoder_output = encoder.Forward(data)
        #Pass through transformer block

In [43]:
#class Transformer():
#    def __init__():
        #Input Embeddings
        #Encoder
            #Multi headed attention
            #Feed forward
        #Decoder
            #Masked multi headed attention
            #Multi headed attention
            #Feed forward 
        #Feed forward
        #Softmax