In [1]:
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences, to_categorical
from tensorflow.keras.layers import Dense, InputLayer, GRU, LSTM, Embedding, Bidirectional, Flatten
from tensorflow.keras.models import Model, Sequential
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint

# Loading Data

In [2]:
text = ""
with open("Dataset.txt", 'r') as file:
    text = file.read()

# Tokenizing the data

In [3]:
tokenizer = Tokenizer(oov_token='OOV')
tokenizer.fit_on_texts([text])
word_index = tokenizer.word_index
len(word_index)

8922

In [4]:
# Filtering the text to create n-ragged sequences
print(len(text))
new_text = text.replace('_', ' ')
new_text = new_text.replace('.', '')
new_text = new_text.replace(',', '')
new_text = new_text.replace('!', '')
new_text = new_text.replace('$', '')
new_text = new_text.replace('-', '')
new_text = new_text.replace('(', '')
new_text = new_text.replace(')', '')
new_text = new_text.replace('  ', ' ')
new_text = new_text.replace('“', '')
new_text = new_text.replace('”', '')
print(len(new_text))

580614
559814


In [5]:
final_text = []
for x in new_text.split('\n'):
    if len(x) >= 20:
        final_text.append(x)

len(final_text)

8811

In [6]:
# Tokenizing the sequence
sequences = tokenizer.texts_to_sequences(final_text)
n_ragged_sequences = []
# Creating n-ragged sequences
for sequence in sequences:
    for i in range(len(sequence) - 1):
        new_sequence = sequence[0:i+2]
        n_ragged_sequences.append(new_sequence)
len(n_ragged_sequences)

96973

In [7]:
# Padding the sequences
padded_sequences = pad_sequences(n_ragged_sequences, padding='pre')
padded_sequences = np.array(padded_sequences)
max_length = len(padded_sequences[0])
max_length

18

# Preparing the Dataset

In [8]:
X = padded_sequences[:, :-1]
y = padded_sequences[:, -1]
y = to_categorical(y)
X.shape, y.shape

((96973, 17), (96973, 8923))

# Defining the Model

In [17]:
# Splitting the dataset in training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.9)

In [18]:
# Training and Testing Sizes
(len(X_train), len(y_train)), (len(X_test), len(y_test))

((87275, 87275), (9698, 9698))

In [16]:
num_labels = len(word_index) + 1
epochs = 20
batch_size = 1024

In [19]:
model = Sequential([
    Embedding(num_labels, 100, input_length=max_length - 1, ),
    LSTM(units= 64),
    Dense(units= num_labels, activation= 'softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 17, 100)           892300    
                                                                 
 lstm_2 (LSTM)               (None, 64)                42240     
                                                                 
 dense_2 (Dense)             (None, 8923)              579995    
                                                                 
Total params: 1,514,535
Trainable params: 1,514,535
Non-trainable params: 0
_________________________________________________________________


In [20]:
checkpoint = ModelCheckpoint(filepath= 'predictor.h5', monitor='val_loss', save_best_only=True, save_weights_only=False, mode='min', save_freq='epoch')
history = model.fit(X_train, y_train, epochs=100, batch_size=1, validation_data=(X_test, y_test), validation_batch_size=1, callbacks=[checkpoint])

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.

In [14]:
predictions = np.argmax(model.predict(X), 0)



ResourceExhaustedError: Graph execution error:

Detected at node 'sequential/dense_8/Softmax' defined at (most recent call last):
    File "d:\Applications\Anaconda\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "d:\Applications\Anaconda\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "d:\Applications\Anaconda\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "d:\Applications\Anaconda\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
      app.start()
    File "d:\Applications\Anaconda\lib\site-packages\ipykernel\kernelapp.py", line 711, in start
      self.io_loop.start()
    File "d:\Applications\Anaconda\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "d:\Applications\Anaconda\lib\asyncio\base_events.py", line 603, in run_forever
      self._run_once()
    File "d:\Applications\Anaconda\lib\asyncio\base_events.py", line 1906, in _run_once
      handle._run()
    File "d:\Applications\Anaconda\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "d:\Applications\Anaconda\lib\site-packages\ipykernel\kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "d:\Applications\Anaconda\lib\site-packages\ipykernel\kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "d:\Applications\Anaconda\lib\site-packages\ipykernel\kernelbase.py", line 406, in dispatch_shell
      await result
    File "d:\Applications\Anaconda\lib\site-packages\ipykernel\kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "d:\Applications\Anaconda\lib\site-packages\ipykernel\ipkernel.py", line 411, in do_execute
      res = shell.run_cell(
    File "d:\Applications\Anaconda\lib\site-packages\ipykernel\zmqshell.py", line 531, in run_cell
      return super().run_cell(*args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 2961, in run_cell
      result = self._run_cell(
    File "d:\Applications\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3016, in _run_cell
      result = runner(coro)
    File "d:\Applications\Anaconda\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "d:\Applications\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3221, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "d:\Applications\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3400, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "d:\Applications\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3460, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\asimw\AppData\Local\Temp\ipykernel_25724\4158841612.py", line 1, in <module>
      predictions = np.argmax(model.predict(X), 0)
    File "d:\Applications\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\training.py", line 2253, in predict
      tmp_batch_outputs = self.predict_function(iterator)
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\training.py", line 2041, in predict_function
      return step_function(self, iterator)
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\training.py", line 2027, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\training.py", line 2015, in run_step
      outputs = model.predict_step(data)
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\training.py", line 1983, in predict_step
      return self(x, training=False)
    File "d:\Applications\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\sequential.py", line 410, in call
      return super().call(inputs, training=training, mask=mask)
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\layers\core\dense.py", line 255, in call
      outputs = self.activation(outputs)
    File "d:\Applications\Anaconda\lib\site-packages\keras\activations.py", line 84, in softmax
      output = tf.nn.softmax(x, axis=axis)
Node: 'sequential/dense_8/Softmax'
OOM when allocating tensor with shape[32,9281] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node sequential/dense_8/Softmax}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_predict_function_202115]

In [37]:
for j in range(50):
    sentence = ""
    index = j
    print(f'\n\n{j}th Example')
    for num in X[index]:
        sentence += f' {unique_words[int(num * max_character)]}'
    print(f'Actual: {sentence} {unique_words[int(y[index] * max_character)]}')
    print(f'Predicted: {sentence} {unique_words[int(predictions[index] * max_character)]}')



0th Example
Actual:  To Sherlock Holland she is
Predicted:  To Sherlock Holland she intrusions


1th Example
Actual:  Sherlock Holland she is always
Predicted:  Sherlock Holland she is intrusions


2th Example
Actual:  Holland she is always the
Predicted:  Holland she is always intrusions


3th Example
Actual:  she is always the woman
Predicted:  she is always the intrusions


4th Example
Actual:  is always the woman I
Predicted:  is always the woman intrusions


5th Example
Actual:  always the woman I hauling
Predicted:  always the woman I intrusions


6th Example
Actual:  the woman I hauling seldom
Predicted:  the woman I hauling intrusions


7th Example
Actual:  woman I hauling seldom heard
Predicted:  woman I hauling seldom intrusions


8th Example
Actual:  I hauling seldom heard him
Predicted:  I hauling seldom heard intrusions


9th Example
Actual:  hauling seldom heard him mention
Predicted:  hauling seldom heard him intrusions


10th Example
Actual:  seldom heard him mention 