In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, LSTM, Bidirectional, Embedding
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [2]:
tf.debugging.set_log_device_placement(True)

In [5]:
text = open('/content/stories.txt').read().lower()

In [6]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

In [8]:
print(tokenizer.word_index)

{'the': 1, 'and': 2, 'of': 3, 'to': 4, 'a': 5, 'they': 6, 'their': 7, 'with': 8, 'in': 9, 'that': 10, 'as': 11, 'her': 12, 'friends': 13, 'had': 14, 'for': 15, 'was': 16, 'she': 17, 'them': 18, '\xa0': 19, 'by': 20, 'on': 21, 'his': 22, 'through': 23, 'were': 24, 'but': 25, 'journey': 26, 'who': 27, 'world': 28, 'it': 29, 'from': 30, 'adventure': 31, 'each': 32, 'an': 33, 'garden': 34, 'importance': 35, 'city': 36, 'its': 37, 'learned': 38, 'forest': 39, 'wisdom': 40, 'power': 41, 'end': 42, 'hidden': 43, 'desert': 44, 'young': 45, 'would': 46, 'about': 47, 'knew': 48, 'into': 49, 'discovered': 50, 'great': 51, 'one': 52, 'he': 53, 'village': 54, 'shared': 55, 'heart': 56, 'group': 57, 'began': 58, 'at': 59, 'beauty': 60, 'magical': 61, 'oasis': 62, 'life': 63, 'ancient': 64, 'not': 65, 'found': 66, 'could': 67, 'together': 68, 'deep': 69, 'secret': 70, 'filled': 71, 'creatures': 72, 'lost': 73, 'quest': 74, 'wise': 75, 'lessons': 76, 'nature': 77, 'where': 78, 'also': 79, 'more': 80, 

In [9]:
input_sequence = []
for line in text.split('\n'):
  token_list = tokenizer.texts_to_sequences([line])[0]
  print(token_list)

  for i in range(1, len(token_list)):
    n_gram_sequence = token_list[:i+1]
    input_sequence.append(n_gram_sequence)

[1338, 73, 103, 665, 31]
[9, 1, 308, 142, 3, 666, 5, 1339, 16, 1340, 1, 665, 103, 5, 348, 3, 1, 900, 104, 2, 238, 14, 901, 518, 519, 29, 1, 142, 16, 1341, 4, 33, 1342, 1343, 30, 7, 902, 903, 239, 1, 142, 3, 904, 1, 45, 186, 349, 48, 10, 29, 16, 126, 4, 12, 4, 105, 1, 73, 103, 2, 350, 12, 422, 142]
[186, 349, 16, 309, 15, 12, 168, 2, 1344, 25, 17, 14, 169, 187, 21, 170, 33, 423, 74, 143, 8, 83, 9, 12, 56, 17, 905, 5, 57, 3, 424, 13, 4, 1345, 12, 9, 12, 906, 208, 907, 908, 5, 667, 2, 171, 909, 668, 910, 5, 75, 2, 1346, 1347, 2, 911, 5, 271, 2, 520, 1348, 19]
[7, 26, 172, 18, 23, 93, 425, 127, 188, 173, 2, 69, 49, 426, 1349]
[32, 209, 6, 113, 351, 7, 90, 40, 2, 144, 349, 50, 10, 669, 12, 13, 16, 210, 912, 913, 25, 17, 427, 38, 1, 35, 3, 521, 1350, 2, 914, 9, 1, 522, 3, 12, 670, 19]
[11, 6, 272, 211, 49, 1, 1351, 1, 57, 189, 84, 33, 64, 352, 671, 49, 5, 523, 1352, 915, 4, 916, 1, 428, 3, 1, 73, 103]
[8, 668, 1353, 917, 2, 1354, 1355, 918, 6, 672, 1, 352, 2, 50, 1, 919, 43, 1356, 94, 208, 5

In [None]:
input_sequence

[[1338, 73],
 [1338, 73, 103],
 [1338, 73, 103, 665],
 [1338, 73, 103, 665, 31],
 [9, 1],
 [9, 1, 308],
 [9, 1, 308, 142],
 [9, 1, 308, 142, 3],
 [9, 1, 308, 142, 3, 666],
 [9, 1, 308, 142, 3, 666, 5],
 [9, 1, 308, 142, 3, 666, 5, 1339],
 [9, 1, 308, 142, 3, 666, 5, 1339, 16],
 [9, 1, 308, 142, 3, 666, 5, 1339, 16, 1340],
 [9, 1, 308, 142, 3, 666, 5, 1339, 16, 1340, 1],
 [9, 1, 308, 142, 3, 666, 5, 1339, 16, 1340, 1, 665],
 [9, 1, 308, 142, 3, 666, 5, 1339, 16, 1340, 1, 665, 103],
 [9, 1, 308, 142, 3, 666, 5, 1339, 16, 1340, 1, 665, 103, 5],
 [9, 1, 308, 142, 3, 666, 5, 1339, 16, 1340, 1, 665, 103, 5, 348],
 [9, 1, 308, 142, 3, 666, 5, 1339, 16, 1340, 1, 665, 103, 5, 348, 3],
 [9, 1, 308, 142, 3, 666, 5, 1339, 16, 1340, 1, 665, 103, 5, 348, 3, 1],
 [9, 1, 308, 142, 3, 666, 5, 1339, 16, 1340, 1, 665, 103, 5, 348, 3, 1, 900],
 [9,
  1,
  308,
  142,
  3,
  666,
  5,
  1339,
  16,
  1340,
  1,
  665,
  103,
  5,
  348,
  3,
  1,
  900,
  104],
 [9,
  1,
  308,
  142,
  3,
  666,
  5,
  13

In [None]:
max_length = max([len(x) for x in input_sequence])
input_pad = pad_sequences(input_sequence, maxlen=max_length, padding='pre')

In [None]:
predictors, label = input_pad[:,:-1], input_pad[:,-1]
label = tf.keras.utils.to_categorical(label, num_classes=total_words)

In [None]:
model = Sequential([
    Embedding(total_words, 200, input_length=max_length-1),
    Bidirectional(LSTM(200)),
    Dense(total_words, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

history = model.fit(predictors, label, epochs=100)



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Executing op Range in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Mean in device /job:localhost/replica:0/task:0/device:GPU:0
[1m 29/469[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m12s[0m 29ms/step - accuracy: 0.9797 - loss: 0.0644Executing op __inference_one_step_on_iterator_3031 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op __inference_one_step_on_iterator_3031 in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Range in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Mean in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Range in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op Mean in device /job:localhost/replica:0/task:0/device:GPU:0
[1m 31/469

In [None]:
def generate_story(seed_text, next_words, model, max_length):
  for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_length-1, padding='pre')

    prediction = model.predict(token_list)
    predicted_index = np.argmax(prediction)
    output_word = ''

    for word, index in tokenizer.word_index.items():
      if index == predicted_index:
        output_word = word
        break
    seed_text += " " + output_word
  return seed_text

In [None]:
input_text= "In the hustle and bustle of ipoti"
#tokenizer.fit_on_texts([input_text])
print(generate_story(input_text, 50, model, max_length))

Executing op RangeDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op MapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op FlatMapDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:GPU:0
Executing op TensorDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RepeatDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ZipDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op ParallelMapDatasetV2 in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op OptionsDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op OptionsDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op PrefetchDataset in device /job:localhost/replica:0/task:0/device:CPU:0
Executing o