In [None]:
#%tensorflow_version 2.x  # this line is not required unless you are in a notebook
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

ModuleNotFoundError: No module named 'google.colab'

In [2]:
#path_to_file = 'FatherTime_Dialog_clean.txt'
#Length of text: 16995 characters
path_to_file = 'cleaner_script.txt'
#Length of text: 319880 characters

In [3]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

Length of text: 319880 characters


In [4]:
#sort all the words and number them 0-'n' based on this order, if we had to rebuild this sequence, it'll be the same
vocab = sorted(set(text))
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim,
                              batch_input_shape=[batch_size, None]),
    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True,
                        stateful=True,
                        recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)
  ])
  return model

def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [5]:
# lets look at how part of our text is encoded
text_as_int = text_to_int(text)
print("Text:", text[:13])
print("Encoded:", text_to_int(text[:13]))
print(int_to_text(text_as_int[:13]))

Text: A Manikin mus
Encoded: [22  2 34 47 60 55 57 55 60  2 59 67 65]
A Manikin mus


we need to split our text data from above into many shorter sequences that we can pass to the model as training examples.
The training examples we will prepapre will use a seq_length sequence as input and a seq_length sequence as the output where that sequence is the original sequence shifted one letter to the right. For example:
input: Hell | output: ello

In [6]:
seq_length = 70  # length of sequence for a training example default is 100
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [7]:
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)
#when we run out of letters, we drop the rest

In [8]:
def split_input_target(chunk):  # for the example: hello
    input_text = chunk[:-1]  # hell
    target_text = chunk[1:]  # ello
    return input_text, target_text  # hell, ello

def split_input_left(chunk):  # for the example: hello
    #input_text = chunk[1:]  # ello
    #target_text = chunk[:-1]  # hell
    #LSTM doesn't flow backwards well, so we'll train our model with reversed text
    revChunk = tf.reverse(chunk, [-1])
    input_text = revChunk[:-1] #olle
    target_text = revChunk[1:] #lleh
    return input_text, target_text  # olle, lleh

dataset = sequences.map(split_input_target)  # we use map to apply the above function to every entry
dataset_left = sequences.map(split_input_left)  # we use map to apply the above function to every entry

In [149]:
#text to right
for x, y in dataset.take(147):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\nOUTPUT")
  print(int_to_text(y))



EXAMPLE

INPUT
A Manikin must have stolen it and fled, in hopes of returning to the V

OUTPUT
 Manikin must have stolen it and fled, in hopes of returning to the Vo


EXAMPLE

INPUT
rtex World. 
A big group of Manikins went downstairs. 
A bunch of Mant

OUTPUT
tex World. 
A big group of Manikins went downstairs. 
A bunch of Mantr


EXAMPLE

INPUT
a demons came through here on their way to fight Nihilo. 
A buncha Man

OUTPUT
 demons came through here on their way to fight Nihilo. 
A buncha Mani


EXAMPLE

INPUT
kins just walked right in there! 
A buncha people were killed. You cou

OUTPUT
ins just walked right in there! 
A buncha people were killed. You coul


EXAMPLE

INPUT
d hear the sirens wailing all night long. 
A ceremony is about to star

OUTPUT
 hear the sirens wailing all night long. 
A ceremony is about to start


EXAMPLE

INPUT
. It seems very fishy indeed 
A ceremony is being prepared. It seems f

OUTPUT
 It seems very fishy indeed 
A ceremony is being prepared. It seems f



EXAMPLE

INPUT
what it's gonna be like when it does get reborn 
And I mean, mop the f

OUTPUT
hat it's gonna be like when it does get reborn 
And I mean, mop the fl


EXAMPLE

INPUT
oor!! 
And I opened the way without anybody's help. 
And I was right L

OUTPUT
or!! 
And I opened the way without anybody's help. 
And I was right Lo


EXAMPLE

INPUT
oks like I hit the jackpot. 
And Kagutsuchi, the light that watches ov

OUTPUT
ks like I hit the jackpot. 
And Kagutsuchi, the light that watches ove


EXAMPLE

INPUT
r the Vortex World as well as creation 
And all those that we called c

OUTPUT
 the Vortex World as well as creation 
And all those that we called co


EXAMPLE

INPUT
mrades have deserted us. 
And by deciphering the symbols on it, 
And h

OUTPUT
rades have deserted us. 
And by deciphering the symbols on it, 
And he


EXAMPLE

INPUT
's got lots of it, that's for sure. 
And here I was wondering what kin

OUTPUT
s got lots of it, that's for sure. 
And here I was wondering what kin

In [148]:
#text to left
for x, y in dataset_left.take(147):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\nOUTPUT")
  print(int_to_text(y))



EXAMPLE

INPUT
oV eht ot gninruter fo sepoh ni ,delf dna ti nelots evah tsum nikinaM 

OUTPUT
V eht ot gninruter fo sepoh ni ,delf dna ti nelots evah tsum nikinaM A


EXAMPLE

INPUT
rtnaM fo hcnub A
 .sriatsnwod tnew snikinaM fo puorg gib A
 .dlroW xet

OUTPUT
tnaM fo hcnub A
 .sriatsnwod tnew snikinaM fo puorg gib A
 .dlroW xetr


EXAMPLE

INPUT
inaM ahcnub A
 .olihiN thgif ot yaw rieht no ereh hguorht emac snomed 

OUTPUT
naM ahcnub A
 .olihiN thgif ot yaw rieht no ereh hguorht emac snomed a


EXAMPLE

INPUT
luoc uoY .dellik erew elpoep ahcnub A
 !ereht ni thgir deklaw tsuj sni

OUTPUT
uoc uoY .dellik erew elpoep ahcnub A
 !ereht ni thgir deklaw tsuj snik


EXAMPLE

INPUT
trats ot tuoba si ynomerec A
 .gnol thgin lla gniliaw sneris eht raeh 

OUTPUT
rats ot tuoba si ynomerec A
 .gnol thgin lla gniliaw sneris eht raeh d


EXAMPLE

INPUT
if smees tI .deraperp gnieb si ynomerec A
 deedni yhsif yrev smees tI 

OUTPUT
f smees tI .deraperp gnieb si ynomerec A
 deedni yhsif yrev smees tI 



EXAMPLE

INPUT
nA
 ytilaer a tolp sih ekam ot elba saw eh nosaer eht dnA
 .ssorc a ek

OUTPUT
A
 ytilaer a tolp sih ekam ot elba saw eh nosaer eht dnA
 .ssorc a eki


EXAMPLE

INPUT
rb eht no sthgil esoht dnA
 .kaeps ew sa neve ,no segar llits raw eht 

OUTPUT
b eht no sthgil esoht dnA
 .kaeps ew sa neve ,no segar llits raw eht d


EXAMPLE

INPUT
iknam fo epocs eht dnoyeb dehcaer eh ,yletamitlu dnA
 .seixiP era segd

OUTPUT
knam fo epocs eht dnoyeb dehcaer eh ,yletamitlu dnA
 .seixiP era segdi


EXAMPLE

INPUT
eurt semoc hsiw sih nehw dnA
 .dlrow eht fo noitaerc eht dettolp dna d

OUTPUT
urt semoc hsiw sih nehw dnA
 .dlrow eht fo noitaerc eht dettolp dna dn


EXAMPLE

INPUT
kcatta t'nerew uoy dnA
 .taht hsilpmocca lliw ohw eno eht era uoy dnA


OUTPUT
catta t'nerew uoy dnA
 .taht hsilpmocca lliw ohw eno eht era uoy dnA
 


EXAMPLE

INPUT
ioj yllautneve llahs uoy ,dnA
 meht retfa pu naelc I ,dnA
 ???ino yb d

OUTPUT
oj yllautneve llahs uoy ,dnA
 meht retfa pu naelc I ,dnA
 ???ino yb d

In [11]:
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab)  # vocab is number of unique characters
EMBEDDING_DIM = 256
RNN_UNITS = 1024

# Buffer size to shuffle the dataset
# (TF data is designed to work with possibly infinite sequences,
# so it doesn't attempt to shuffle the entire sequence in memory. Instead,
# it maintains a buffer in which it shuffles elements).
BUFFER_SIZE = 10000

In [12]:
data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [13]:
data = dataset_left.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [14]:
model = build_model(VOCAB_SIZE,EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           18944     
_________________________________________________________________
lstm (LSTM)                  (64, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (64, None, 74)            75850     
Total params: 5,341,770
Trainable params: 5,341,770
Non-trainable params: 0
_________________________________________________________________


In [15]:
for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch)  # ask our model for a prediction on our first batch of training data (64 entries)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")  # print out the output shape

(64, 70, 74) # (batch_size, sequence_length, vocab_size)


In [20]:
model.compile(optimizer='adam', loss=loss)

In [21]:
# Directory where the checkpoints will be saved
#WARNING:tensorflow:Can save best model only with val_loss available, skipping.
#https://stackoverflow.com/questions/52776622/keras-callbacks-keep-skip-saving-checkpoints-claiming-val-acc-is-missing
checkpoint_dir = 'cleaner_script_Model_left.hdf5'
# Name of the checkpoint files
#checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
#checkpoint_prefix = os.path.join(checkpoint_dir, "model.hdf5")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    monitor='loss',
    mode='min',
    filepath=checkpoint_dir,
    save_best_only=True,
    save_weights_only=True,
    save_freq=2
)

In [22]:
#Training the model. we set the runtime to > GPU to speed up this process
history = model.fit(data, epochs=200, callbacks=[checkpoint_callback])

Epoch 1/200
15/70 [=====>........................] - ETA: 2:48 - loss: 3.5375

KeyboardInterrupt: 

In [None]:
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)

In [None]:
#tf.keras.models.load_model('/','FatherTimeModel.hdf5', custom_objects=None, compile=True)
model.build(tf.TensorShape([1, None]))

In [None]:
#This line was ommited from the visual demo. It's here for reference, 
#however it should have been checked to be working. As it is not.
checkpoint_num = 10
model.load_weights(tf.train.load_checkpoint("./training_checkpoints/ckpt_" + str(checkpoint_num)))
model.build(tf.TensorShape([1, None]))

In [23]:
modelR = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)
modelL = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)
modelL.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (1, None, 256)            18944     
_________________________________________________________________
lstm_2 (LSTM)                (1, None, 1024)           5246976   
_________________________________________________________________
dense_2 (Dense)              (1, None, 74)             75850     
Total params: 5,341,770
Trainable params: 5,341,770
Non-trainable params: 0
_________________________________________________________________


In [24]:
#We're changing the shape, but loading the weights of our trained model
#model.load_weights(tf.train.latest_checkpoint('FatherTimeModel.hdf5'))
modelR.load_weights('cleaner_script_Model_right.hdf5')
modelL.load_weights('cleaner_script_Model_left.hdf5')

In [25]:
modelR.build(tf.TensorShape([1, None]))
modelL.build(tf.TensorShape([1, None]))

In [109]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 200

  # Converting our start string to numbers (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
        
      # remove the batch dimension 
      # squeeze take the nested arrays of [[]] and removes them
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)      
      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [37]:
def generate_text_dual(modelL, modelR, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 100

  # Converting our start string to numbers (vectorizing)
  input_eval_R = [char2idx[s] for s in start_string]
  input_eval_R = tf.expand_dims(input_eval_R, 0)
  input_eval_L = [char2idx[s] for s in start_string]
  input_eval_L = tf.expand_dims(input_eval_L, 0)

  # Empty string to store our results
  text_generated = [start_string]

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 0.5

  # Here batch size == 1
  modelR.reset_states()
  modelL.reset_states()
  for i in range(num_generate):
      predictions_R = modelR(input_eval_R)
      predictions_L = modelL(input_eval_L)
        
      # remove the batch dimension 
      # squeeze take the nested arrays of [[]] and removes them
      predictions_R = tf.squeeze(predictions_R, 0)
      predictions_L = tf.squeeze(predictions_L, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions_R = predictions_R / temperature
      predictions_L = predictions_L / temperature
      predicted_id_R = tf.random.categorical(predictions_R, num_samples=1)[-1,0].numpy()
      predicted_id_L = tf.random.categorical(predictions_L, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval_R = tf.expand_dims([predicted_id_R], 0)
      input_eval_L = tf.expand_dims([predicted_id_L], 0)
      
      text_generated.append(idx2char[predicted_id_R])
      text_generated.insert(0, idx2char[predicted_id_L])

  return (''.join(text_generated))

In [31]:
def generate_text_left(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
  num_generate = 200

  # Converting our start string to numbers (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = [start_string]

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
  temperature = 1.0

  # Here batch size == 1
  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)
        
      # remove the batch dimension 
      # squeeze take the nested arrays of [[]] and removes them
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)
      
      text_generated.insert(0, idx2char[predicted_id])

  return (''.join(text_generated))

In [118]:
#Father Time tests Temp 1.0
inp = input("Type a starting string: ")

print(generate_text_dual(modelL, modelR, inp))

Type a starting string: time
mmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmbmbmbtimelous my whe Will returay of unayncare of wher you tell bef formyot.
If you exproing ons meats me. Y


In [136]:
#Father Time tests Temp 1.0
inp = input("Type a starting string: ")

print(generate_text_left(modelL, inp))

Type a starting string: time
ACTCvS8P-U8PPKKG.LKLL.L.




vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvevevevevtime


In [28]:
#Type left tests Temp 1.0
inp = input("Type a starting string: ")

print(generate_text_left(modelL, inp))

Type a starting string: answer
g from inside the home of thisky, but it's better feel the presence of superior is the seeds of creation, denying the reincarnation of the world. 
Phew! We it with demon was worried for nothing. Now, answer


In [40]:
#type dual tests Temp 0.5
inp = input("Type a starting string: ")

print(generate_text_dual(modelL, modelR, inp))

Type a starting string:  return
 well 
Man can change the wore junk in our way. 
Sorry, but I can't help you right now. I need to co return all the candelabra to the Labyrinth of Amala. 
I know the name of Nihilo. 
I'm not really in the Co


In [None]:
def generate_sentence(modelL, modelR, start_string):
    # Low temperatures results in more predictable text.
    # Higher temperatures results in more surprising text.
    temperature = 0.8
    # Max number of characters to generate in either direction
    num_generate = 100
    
    # reversing the starting string when using the left model as the model is built in reverse, then vectorize
    rev_string = reverse_string(start_string)
    input_eval_L = [char2idx[s] for s in rev_string]
    input_eval_L = tf.expand_dims(input_eval_L, 0)

    # Empty string to store our results
    text_generated = [start_string]
    
    modelL.reset_states()
    #Generate text to the left
    for i in range(num_generate):
        predictions_L = modelL(input_eval_L)
        predictions_L = tf.squeeze(predictions_L, 0)
        predictions_L = predictions_L / temperature
        predicted_id_L = tf.random.categorical(predictions_L, num_samples=1)[-1,0].numpy()

        input_eval_L = tf.expand_dims([predicted_id_L], 0)
        character = idx2char[predicted_id_L]
        #if period, remove it, and the following space then end
        if character == '.' or character == '?' or character == '!':
            text_generated.pop(0)
            break
        text_generated.insert(0, idx2char[predicted_id_L])
    
    # update our string to include the text we've generated
    start_string = ''.join(text_generated)
    input_eval_R = [char2idx[s] for s in start_string]
    input_eval_R = tf.expand_dims(input_eval_R, 0)
    
    modelR.reset_states()    
    #`generate text to the right
    for i in range(num_generate):
        predictions_R = modelR(input_eval_R)
        predictions_R = tf.squeeze(predictions_R, 0)
        predictions_R = predictions_R / temperature
        predicted_id_R = tf.random.categorical(predictions_R, num_samples=1)[-1,0].numpy()

        input_eval_R = tf.expand_dims([predicted_id_R], 0)
        character = idx2char[predicted_id_R]
        text_generated.append(idx2char[predicted_id_R])
        #If period, add to array then end
        if character == '.' or character == '?' or character == '!':
            break

    return (''.join(text_generated))

def reverse_string(string):
    reversedString = ""
    for r in range(len(string)-1,-1,-1):
        reversedString += string[r]
    return reversedString

In [52]:
inp = input("Type a starting string: ")
print(generate_sentence(modelL, modelR, inp))

Type a starting string: life

When you have dlife can see your heart, but inside there is nothing.


In [62]:
inp = input("Type a starting string: ")
print(generate_sentence(modelL, modelR, inp))

Type a starting string: life
life

one life and fulfill the part which you were originally intended for.


In [73]:
inp = input("Type a starting string: ")
print(generate_sentence(modelL, modelR, inp))

Type a starting string: think
think
What the hell are you thinking about coming up with a Reason.


In [133]:
inp = input("Type a starting string: ")
print(generate_sentence(modelL, modelR, inp))

Type a starting string: the answer

If you can reach the top, you should ckill the answer to the unanswerable question, Why is a world reborn?


In [142]:
inp = input("Type a starting string: ")
print(generate_sentence(modelL, modelR, inp))

Type a starting string: I'm thinkin

That's why I'm thinking, and I'm going to continue on with my life.


In [143]:
inp = input("Type a starting string: ")
print(generate_sentence(modelL, modelR, inp))

Type a starting string: I'm thinkin

That's why I'm thinking, but 
I had not to be functioning, even though the switch for really thinks of you.


In [266]:
inp = input("Type a starting string: ")
print(generate_sentence(modelL, modelR, inp))

Type a starting string: the important thing

I found the important things like this thing can keep record of your progress.


In [None]:
the treasure 