# Project 4
## Students:
 - Bryson Gullett
 - Robert Schaffer
 - Matthew Dixson
 

In [6]:
import tensorflow as tf
from tensorflow import keras
from tensorflow import optimizers
from tensorflow.keras import layers
import numpy as np
import re
import random as rand

In [None]:
print(tf.__version__)# you may want to upgrade to 2.10.0

### Please Use Markdown
> for markdown, see here: https://www.ibm.com/docs/en/watson-studio-local/1.2.3?topic=notebooks-markdown-jupyter-cheatsheet

## Task 1

In [None]:
class TransformerModel():
    def __init__(self, vocab_size, embed_dim=256, num_heads=2, num_blocks=1, ff_dim=256, maxlen=80, rate=0.1):
        #initailize variables
        self.vocab_size = vocab_size
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.num_blocks = num_blocks
        self.ff_dim = ff_dim
        self.maxlen = maxlen
        self.rate = rate

    def TransformerBlock(self, inputs):
        #create the transformer block as discribed in the writeup, use the Keras functional API (https://keras.io/guides/functional_api/)
        #MultiHeadAttention layer, specifiy 'use_causal_mask=True' (https://keras.io/api/layers/attention_layers/multi_head_attention/)
        #LayerNormalization layer, specifiy 'epsilon=1e-6' (https://keras.io/api/layers/normalization_layers/layer_normalization/)
        #Use the rate variable for the dropout layers and remember to use two dense layers
        #See assignment and its figures for more details.
        multihead_atten = layers.MultiHeadAttention(num_heads=self.num_heads, key_dim=self.num_heads, dropout=self.rate)(inputs, inputs, use_causal_mask=True)
        add1 = layers.Add()([multihead_atten, inputs])
        layer_norm1 = layers.LayerNormalization(epsilon=1e-6)(add1)
        dense1 = layers.Dense(self.ff_dim, activation='relu')(layer_norm1)
        dense2 = layers.Dense(self.ff_dim, activation='relu')(dense1)
        dropout1 = layers.Dropout(self.rate)(dense2)
        add2 = layers.Add()([dropout1, layer_norm1])
        layer_norm2 = layers.LayerNormalization(epsilon=1e-6)(add2)

        return layer_norm2
    
    def EmbeddingLayer(self, inputs):
        #create the embedding layer
        #create (1) an embedding for the tokens and (2) an embedding for the positions
        #you can use https://keras.io/api/layers/core_layers/embedding/ Embedding class
        #you can use tf.range to enocde positions
        #add (1) and (2) and return the layer
        token_embeddings = layers.Embedding(self.vocab_size, self.embed_dim, input_length=self.maxlen)(inputs)
        position_embeddings = layers.Embedding(self.maxlen, self.embed_dim, input_length=self.maxlen)(tf.range(self.maxlen))

        add = layers.Add()([token_embeddings, position_embeddings])
        return add
    
    def create_model(self):
        #combine the EmbeddingLayer and num_blocks TransformerBlocks to create the model, use the Keras functional API (https://keras.io/guides/functional_api/)
        #use the SparseCategoricalCrossentropy loss function (https://keras.io/api/losses/probabilistic_losses/#sparsecategoricalcrossentropy-class)
        inputs = keras.Input(shape=(self.maxlen,))
        embedding_layer = self.EmbeddingLayer(inputs)
        prev_layer = embedding_layer
        for _ in range(self.num_blocks):
            transformer_block = self.TransformerBlock(prev_layer)
            prev_layer = transformer_block
        final_dense = layers.Dense(self.vocab_size, activation='softmax')(prev_layer)
        model = keras.Model(inputs=inputs, outputs=final_dense)
        opt = optimizers.Adam(learning_rate=0.0001)
        model.compile(loss=keras.losses.SparseCategoricalCrossentropy, optimizer=opt)
        return model
    
my_model = TransformerModel(1000)
print(my_model.create_model().summary())

## Task 2

In [None]:
class DataSet():
    def __init__(self, filename, len):
        #load the text from the file
        self.text = ''
        self.len = len
        with open(filename) as fin:
            for line in fin:
                self.text += line

    def prep_text(self):
        #remove all punctuation, set to lowercase, remove duplicate spaces and other whitespace (keep newlines)
        self.text = re.sub(r'[^\w\s]', ' ', self.text)
        self.text = re.sub(r' +', ' ', self.text)
        self.text = re.sub(r'â', '', self.text)
        
        
        
        
    def tokenize_text(self):
        #seperate into words, create a vocab and convert the text to a list of numbers using the vocab such that each unique word is represented by its own number number
        self.text = self.text.split()
        self.vocab = np.unique(self.text)
        self.vocab = np.append(self.vocab, ['EOT'])
        self.vocab_nums = [i for i in range(len(self.vocab))]
        self.vocab_dict = dict(zip(self.vocab, self.vocab_nums))
        

    def create_dataset(self):
        #split the tokenized data into sequences of length len, return the sequences and vocab
        self.prep_text()
        self.tokenize_text()
        X = []
        Y = []
        for i in range(int(len(self.text)/self.len)+1):
            sequence_X = []
            sequence_Y = []
            for j in range(self.len):
                if i*self.len+j < len(self.text):
                    sequence_X.append(self.vocab_dict[self.text[i*self.len+j]])
                else:
                    sequence_X.append(self.vocab_dict['EOT'])
                if i*self.len+j+1 < len(self.text):
                    sequence_Y.append(self.vocab_dict[self.text[i*self.len+j+1]])
                else:
                    sequence_Y.append(self.vocab_dict['EOT'])
            X.append(sequence_X)
            Y.append(sequence_Y)
        return X, Y, self.vocab

## Task 3

In [None]:
class GenerateText():
    def __init__(self, model, vocab):
        self.model = model
        self.vocab = vocab
    
    def generate_text(self, start_string, num_generate=100):
        #generate text using the model and vocab, start with the start_string and generate num_generate words
        text = start_string
        for _ in range(num_generate):
            start_string = self.vocab[np.argmax(self.model.predict(start_string))]
            text += ' ' + start_string
            if start_string == 'EOT':
                break
        return text
    
    def generate_random_text(self, num_generate=100):
        text = ''
        for _ in range(num_generate):
            word = self.vocab[rand.randrange(len(self.vocab))]
            text += word + ' '
            if word == 'EOT':
                break
        return text   

## Task 4: Model Traning and Testing

In [None]:
#Train the model while periodically generating text to show progress
def train_model(model, vocab, x, y, epochs=50):
    return model


# Report

## Introduction

## Results

## Conclusion

## How to Run Code

Please include any special libraries and list your tf version here.