<a href="https://colab.research.google.com/github/ForestPearson/CS410-510-NLP-project/blob/Forest-Branch/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import tensorflow as tf
import os
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import StringLookup
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import GRU
from tensorflow.keras.layers import Embedding

BATCH_SIZE = 64
BUFFER_SIZE = 10000
EPOCHS = 30
DIM = 256
RNN = 1024

#path = tf.keras.utils.get_file('input.txt', 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt')
path = tf.keras.utils.get_file('alls_well_that_ends_well.txt', 'https://raw.githubusercontent.com/ForestPearson/CS410-510-NLP-project/Forest-Branch/data/alls_well_that_ends_well.txt')

In [None]:
text = open(path, 'rb').read().decode(encoding='utf-8')
print("Length:", len(text))
print(text[:500])

vocab = sorted(set(text))

Length: 129859
ACT I

SCENE I. Rousillon. The COUNT's palace.

Enter BERTRAM, the COUNTESS of Rousillon, HELENA, and LAFEU, all in black
COUNTESS
In delivering my son from me, I bury a second husband.
BERTRAM
And I in going, madam, weep o'er my father's death
anew: but I must attend his majesty's command, to
whom I am now in ward, evermore in subjection.
LAFEU
You shall find of the king a husband, madam; you,
sir, a father: he that so generally is at all times
good must of necessity hold his virtue to you; who


In [None]:
example_texts = ['abcdefg', 'xyz']

chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')

ids_from_chars = StringLookup(vocabulary=list(vocab), mask_token=None)
chars_from_ids = StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)
vocabSize = len(ids_from_chars.get_vocabulary())

ids = ids_from_chars(chars)
chars = chars_from_ids(ids)


In [None]:
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [None]:
seq_length = 100
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
#Convert to character indices
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
#Form sequences made up of 100 characters
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

In [None]:
#Training data creation and target creation useing sequences
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

dataset = sequences.map(split_input_target)
dataset = (dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True).prefetch(tf.data.experimental.AUTOTUNE))

dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [None]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    self.embedding = Embedding(vocab_size, embedding_dim)
    self.gru = GRU(rnn_units,return_sequences=True,return_state=True)
    self.dense = Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [None]:
model = MyModel(
    vocab_size=vocabSize,
    embedding_dim=DIM,
    rnn_units=RNN)

In [None]:
model.compile(optimizer='adam', loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True))
# Directory where the checkpoints will be saved
dir = './data/epochs'
#File names
fileName = os.path.join(dir, "ckpt_{epoch}")
results = tf.keras.callbacks.ModelCheckpoint(filepath=fileName,save_weights_only=True)

In [None]:
history = model.fit(dataset, epochs=EPOCHS, callbacks=[results])

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
class Generate(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  def predict(self, inputs, states=None):
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    predicted_logits, states = self.model(inputs=input_ids, states=states,return_state=True)
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    predicted_logits = predicted_logits + self.prediction_mask

    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    predicted_chars = self.chars_from_ids(predicted_ids)

    return predicted_chars, states

In [None]:
Generator = Generate(model, chars_from_ids, ids_from_chars)
states = None
seed = tf.constant(['COUNTESS'])
result = [seed]

for n in range(1000):
  seed, states = Generator.predict(seed, states=states)
  result.append(seed)

result = tf.strings.join(result)
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)

COUNTESS
Ah, what sharp stings are in her mildest words!
Rinaldo, you did never lacquair have, you that
dreament of his life and in the highester for me.
BERTRAM
Nay, by your leave hope the, Sely amm'daits,
That ring was herelf ottended; this draves with viltue
Why, the trueps if our count to bear her quickl.
Enter PAROLLES

PAROLLES
[To BERTRAM] These things shall not hear.
Now will I charge you in the band of truth,
When you are dead, 'e, madam: I will stay at your swars.
BERTRAM
Your brother he shall go along with me.
Second Lord
As't please your lordship will entreed you, sir: a truth, as he does think
He had not my virginity.
KING
What say'st thou to her?
BERTRAM
She's impudent, my lord, in mine own direct knowledge,
without any malice, but to speak of him as my
kindly honour. But must by thysil tainga no princ, I
love my son; but of the finstand nature
With his induacy his majeisted of your dumbse inabibet! I do beseech you?
Widow
At the Saint France is a virgin: virginity freely