In [None]:
import tensorflow as tf
from tensorflow.keras.layers.experimental import preprocessing

import numpy as np
import os
import time
physical_devices = tf.config.list_physical_devices('GPU')
try:
  tf.config.set_logical_device_configuration(
    physical_devices[0],
    tf.config.LogicalDeviceConfiguration(memory_limit=4096))

  logical_devices = tf.config.list_logical_devices('GPU')
  assert len(logical_devices) == len(physical_devices) + 1

  tf.config.set_logical_device_configuration(
    physical_devices[0],
    tf.config.LogicalDeviceConfiguration(memory_limit=4096))
except:
#   # Invalid device or cannot modify logical devices once initialized.
  pass
path_to_file = tf.keras.utils.get_file('austen.txt', 'https://raw.githubusercontent.com/byui-cse/cse450-course/master/data/austen/austen.txt')
# path_to_file = ('D:\Coding\Eye-of-the-World_-The-Robert-Jordan.txt')

# path_to_file = ('D:\Coding\wotCombined.txt')


text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# print('Length of text: {} characters'.format(len(text)))
# print(text[:200])
# Now we'll get a list of the unique characters in the file. This will form the
# vocabulary of our network. There may be some characters we want to remove from this 
# set as we refine the network.
vocab = sorted(set(text))
print('{} unique characters'.format(len(vocab)))
ids_from_chars = preprocessing.StringLookup(vocabulary=list(vocab))
chars_from_ids = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True)

# Here's a little helper function that we can use to turn a sequence of ids
# back into a string:
# turn them into a string:
def text_from_ids(ids):
  joinedTensor = tf.strings.reduce_join(chars_from_ids(ids), axis=-1)
  return joinedTensor.numpy().decode("utf-8")
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))

ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)
seq_length = 100
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

# This function will generate our sequence pairs:
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

# Call the function for every sequence in our list to create a new dataset
# of input->target pairs
dataset = sequences.map(split_input_target)

# Finally, we'll randomize the sequences so that we don't just memorize the books
# in the order they were written, then build a new streaming dataset from that.
# Using a streaming dataset allows us to pass the data to our network bit by bit,
# rather than keeping it all in memory. We'll set it to figure out how much data
# to prefetch in the background.

BATCH_SIZE = 64
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

# Create our custom model. Given a sequence of characters, this
# model's job is to predict what character should come next.
class AustenTextModel(tf.keras.Model):

  # This is our class constructor method, it will be executed when
  # we first create an instance of the class 
  def __init__(self,vocab_size, embedding_dim, rnn_units,name = None):
    super(AustenTextModel, self).__init__(name=name)
    # Our model will have three layers:
    
    # 1. An embedding layer that handles the encoding of our vocabulary into
    #    a vector of values suitable for a neural network

    

    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

    # 2. A GRU layer that handles the "memory" aspects of our RNN. If you're
    #    wondering why we use GRU instead of LSTM, and whether LSTM is better,
    #    take a look at this article: https://datascience.stackexchange.com/questions/14581/when-to-use-gru-over-lstm
    #    then consider trying out LSTM instead (or in addition to!)
    # self.gru = tf.keras.layers.LSTM(rnn_units, return_sequences=True, return_state=True)
    self.lstm = tf.keras.layers.LSTM(rnn_units, return_sequences=True, return_state=True)
    # self.dropouta = tf.keras.layers.Dropout(0.4)
    # self.lstmb = tf.keras.layers.LSTM(rnn_units, return_sequences=True, return_state=True)
    # self.dropoutb = tf.keras.layers.Dropout(0.4)
    # self.lstmc = tf.keras.layers.LSTM(rnn_units, return_sequences=True, return_state=True)
    # self.dropoutc = tf.keras.layers.Dropout(0.4)
    # self.lstmd = tf.keras.layers.LSTM(rnn_units, return_sequences=True, return_state=True)

    # 3. Our output layer that will give us a set of probabilities for each
    #    character in our vocabulary.
    self.dense = tf.keras.layers.Dense(vocab_size)
  # This function will be executed for each epoch of our training. Here
  # we will manually feed information from one layer of our network to the 
  # next.
  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    print('inputs.shape')
    print(inputs.shape)
    # 1. Feed the inputs into the embedding layer, and tell it if we are
    #    training or predicting
    x = self.embedding(x, training=training)

    # 2. If we don't have any state in memory yet, get the initial random state
    #    from our GRUI layer.
    if states is None:
      states = self.lstm.get_initial_state(x)
    
    # 3. Now, feed the vectorized input along with the current state of memory
    #    into the gru layer.
    x, state_h, states_l = self.lstm(x, initial_state=states, training=training)
    # states = (state_h, states_l)
    # state_h = states[0]
    # states_l = states[1]
    # x, state_h, states_l = self.lstmb(x, initial_state=states, training=training)
    states = (state_h, states_l)
    # state_h = states[0]
    # states_l = states[1]
    # x, state_h, states_l = self.lstmc(x, initial_state=states, training=training)
    

    # states = (state_h, states_l)
    # 4. Finally, pass the results on to the dense layer
    x = self.dense(x, training=training)

    # 5. Return the results
    if return_state:
      return x, states
    else: 
      return x



  # def get_config(self):
  #       config = super(AustenTextModel, self).get_config()
  #       config.update({
  #           'vocab_size': self.vocab_size,
  #           'embedding_dim': self.embedding_dim,
  #           'rnn_units': self.rnn_units
  #       })
  #       return config

vocab_size=len(ids_from_chars.get_vocabulary())
embedding_dim = 256
rnn_units = 2048

model = AustenTextModel(vocab_size, embedding_dim, rnn_units,name = 'austen_text_model')

# Verify the output of our model is correct by running one sample through
# This will also compile the model for us. This step will take a bit.
for input_example_batch, target_example_batch in  dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss)

history = model.fit(dataset, epochs=25)

class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature=temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "" or "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['','[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices = skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())]) 
    self.prediction_mask = tf.sparse.to_dense(sparse_mask,validate_indices=False)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()
    print('inputs2.shape')
    print(inputs.shape)
    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits] 
    predicted_logits, states =  self.model(inputs=input_ids, states=states, 
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    
    # Apply the prediction mask: prevent "" or "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Return the characters and model state.
    return chars_from_ids(predicted_ids), states
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

# Now, let's generate a 1000 character chapter by giving our model "Chapter 1"
# as its starting text
states = None
next_char = tf.constant(['The world seemed like such a peaceful place until the magic tree was discovered in London.'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)

# Print the results formatted.
print(result[0].numpy().decode('utf-8'))
# print(vocab_size)
# print(embedding_dim)
# inputs = (1,None)
# model.build(inputs)
# model.save('D:\Coding\ML\RNN\\1_epoch_LSTM_model',save_format="tf")

# print('before')
# new_model = tf.keras.models.load_model('D:\Coding\ML\RNN\\1_epoch_LSTM_model', custom_objects={'austen_text_model': AustenTextModel})
# # from tensorflow.keras.layers import change_model
# for input_example_batch, target_example_batch in dataset.take(1):
#     example_batch_predictions = model(input_example_batch)
#     print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")
# loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
# new_model.compile(optimizer='adam', loss=loss)

# history = new_model.fit(dataset, epochs=110)
# # new_model = change_model(new_model,new_input_shape=(1,None))
# print('after')
# # new_model.compile(optimizer='adam', loss=loss)
# # history = new_model.fit(dataset, epochs=1, steps_per_epoch = 5)
# # Load the state of the old model
# print("after after")
# one_step_model = OneStep(new_model, chars_from_ids, ids_from_chars)
# # Now, let's generate a 1000 character chapter by giving our model "Chapter 1"
# # as its starting text

# states = None
# next_char = tf.constant(['Chapter 1'])
# result = [next_char]

# for n in range(1000):
#   next_char, states = one_step_model.generate_one_step(next_char, states=states)
#   result.append(next_char)

# result = tf.strings.join(result)