In this project I will create a Recurrent Neural Network Model using a character predicitive model that will take as input a variable length sequence and predcit the next character

In [None]:
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import numpy as np
import os

#Dataset
For this project we only need one piece of data, so I will extract it from shakespheare's poem.

In [None]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [None]:
from google.colab import files
path_to_files = list(files.upload().keys())[0]

Saving Abai K.txt to Abai K.txt


#Read the content of a file

In [None]:
#Read, then decode for py2 compat
text = open(path_to_files, 'rb').read().decode(encoding='utf-8')
#Length of the text is the number of characters in it
print('Length of text: {} characters'.format(len(text)))

Length of text: 146790 characters


In [None]:
#First 250 characters in the text
print(text[:250])

The first word
 
Was life good or bad, but it passed a lot. I had enough in this life: disputes, and painful gossip, and struggle and unworthy quarrels... But now, when I’m on the end of the road, being exhausted and tired, I am convinced of the fu


#Encoding
I will encode each character as an unique integer


In [None]:
vocab = sorted(set(text))
#Creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

In [None]:
#Let's look at how the part of the text is encoded
print("Text:", text[:18])
print("Encoded:", text_to_int(text[:14]))

Text: The first word
 
Encoded: [43 58 55  2 56 59 68 69 70  2 73 65 68 54]


In [None]:
#Here I also created a numeric value into to text
def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])

print(int_to_text(text_as_int[:14]))

The first word


#Creating Training Examples
Our task is to feed a model a short sequence, and the model should give us the next character.

To do that we have to split our data into many shorter sequences that we can pass to the model as training examples.

The training examples we will prepare will use a seq_length sequence as input and a seq_length sequence as output, where that sequence is the original sequence shifted one letter to the right.

In [None]:
#First step is to create astream of characters
seq_length = 100 #length of sequnce for a training example
examples_per_epoch = len(text)//(seq_length+1)

#Creating training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [None]:
#I use batch method to turn this stream of characters into batches of desired length
sequences = char_dataset.batch(seq_length+1, drop_remainder = True)

In [None]:
#Split the sequences into input and output
def split_input_target(chunk): #For example Kitap
  input_text = chunk[:-1] #Kita
  target_text = chunk[1:] #itap
  return input_text, target_text #Kita, itap

dataset = sequences.map(split_input_target) #Use map to apply the above function to every entry

In [None]:
for x, y in dataset.take(2):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\OUTPUT")
  print(int_to_text(y))



EXAMPLE

INPUT
The first word
 
Was life good or bad, but it passed a lot. I had enough in this life: disputes, a
\OUTPUT
he first word
 
Was life good or bad, but it passed a lot. I had enough in this life: disputes, an


EXAMPLE

INPUT
d painful gossip, and struggle and unworthy quarrels... But now, when I’m on the end of the road, be
\OUTPUT
 painful gossip, and struggle and unworthy quarrels... But now, when I’m on the end of the road, bei


In [None]:
#Defining training bathces
BATCH_SIZE = 64
VOCAB_SIZE = len(vocab) #Number of unique characters
EMBEDDING_DIM = 256
RNN_UNITS = 1024
BUFFER_SIZE = 10000
data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

#Building the Model
Model consists of:
- Embedded layer
- LSTM layer
- FC layer node for  each unique character

In [None]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size, embedding_dim,
                                batch_input_shape=[batch_size, None]),
      tf.keras.layers.LSTM(rnn_units,
                           return_sequences=True,
                           stateful=True,
                           recurrent_initializer='glorot_uniform'),
      tf.keras.layers.Dense(vocab_size)
    ])
  return model

model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           21504     
                                                                 
 lstm (LSTM)                 (64, None, 1024)          5246976   
                                                                 
 dense (Dense)               (64, None, 84)            86100     
                                                                 
Total params: 5354580 (20.43 MB)
Trainable params: 5354580 (20.43 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


#Creating a loss function


In [None]:
for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 84) # (batch_size, sequence_length, vocab_size)


In [None]:
print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[ 3.7177228e-03 -1.2507101e-03  1.5630319e-03 ... -1.2115527e-02
    4.4590207e-03 -3.0637651e-03]
  [-2.5488422e-03  2.4273833e-03 -4.2964900e-03 ... -9.5116682e-03
   -5.4982479e-04  1.7775595e-04]
  [-2.0083948e-04  2.4780068e-03  5.2834107e-03 ... -4.8134523e-03
   -4.8327609e-04  3.1723981e-03]
  ...
  [-1.1472092e-02  1.0050940e-02  9.1998400e-03 ...  6.6153742e-03
    5.7823898e-04  3.3277522e-03]
  [-6.4987596e-03  9.0348013e-03  8.4063075e-03 ...  3.4767233e-03
   -6.9641811e-04 -1.8311625e-03]
  [-5.3135073e-03  1.1449246e-02  1.3402300e-02 ... -4.4642389e-04
    9.1532955e-04  2.2375826e-03]]

 [[ 1.8683632e-03 -1.3328118e-04  8.0821458e-03 ...  2.5380973e-03
   -6.8139168e-04  3.6335518e-03]
  [ 3.5403832e-03  2.7771690e-04  9.0905763e-03 ...  2.0293088e-03
    1.0899315e-04  6.7269653e-03]
  [-1.4382420e-03  2.4881577e-03  6.1914613e-03 ... -2.4711615e-03
   -1.1063137e-03  9.0872925e-03]
  ...
  [-9.0158544e-03  1.6256396e-02 -4.4330657e-03 ... -2.6978513e

In [None]:
#Let's see the very first prediction
pred = example_batch_predictions[0]
print(len(pred))
print(pred)

100
tf.Tensor(
[[ 0.00371772 -0.00125071  0.00156303 ... -0.01211553  0.00445902
  -0.00306377]
 [-0.00254884  0.00242738 -0.00429649 ... -0.00951167 -0.00054982
   0.00017776]
 [-0.00020084  0.00247801  0.00528341 ... -0.00481345 -0.00048328
   0.0031724 ]
 ...
 [-0.01147209  0.01005094  0.00919984 ...  0.00661537  0.00057824
   0.00332775]
 [-0.00649876  0.0090348   0.00840631 ...  0.00347672 -0.00069642
  -0.00183116]
 [-0.00531351  0.01144925  0.0134023  ... -0.00044642  0.00091533
   0.00223758]], shape=(100, 84), dtype=float32)


In [None]:
#Look at the prediciton at the first time step
time_pred = pred[0]
print(len(time_pred))
print(time_pred)

84
tf.Tensor(
[ 0.00371772 -0.00125071  0.00156303 -0.00124311 -0.00667032 -0.00274784
 -0.00302543  0.00148637 -0.00221082 -0.00237075  0.00349066 -0.00164978
 -0.00320705  0.00030495  0.00872804  0.00277136  0.00248159  0.00663225
  0.00022087 -0.00356803 -0.00453318 -0.0006427  -0.00541036 -0.00111798
  0.0046152  -0.00211615 -0.00350131  0.00378645  0.00077043  0.00495689
 -0.00266347  0.00099624  0.00105153 -0.00707394 -0.00308315 -0.00133746
  0.0006271  -0.00468759  0.00308134  0.00038349 -0.01143216  0.00156933
  0.00090544  0.00300666 -0.00532748  0.00328557 -0.00883934 -0.00256045
  0.00436269  0.00015389 -0.00434203 -0.00103183 -0.00153853 -0.00292304
  0.00519049 -0.00601248  0.00045956 -0.00465124 -0.00385284 -0.00161661
 -0.00497318 -0.00226722 -0.00556591 -0.00231706 -0.00016475  0.00056381
  0.00065338  0.00418298 -0.000384    0.00018901 -0.00040335 -0.00465836
  0.00687522  0.00206253  0.00052432 -0.0014558   0.00067758  0.00052424
  0.00600118  0.00137676  0.00321106 

In [None]:
sampled_indices = tf.random.categorical(pred, num_samples=1)
sampled_indices = np.reshape(sampled_indices, (1, -1))[0]
predicted_chars = int_to_text(sampled_indices)
predicted_chars

';J\n«!-WMefLPI.Q\riQ5r»JjTdaBu»ND–uD3lL:6jCKH\nED\rJs9Srw’Of0rypVt!VzR(olPBAo cum3q…]MlmuLG-?B\re1DkfLI\n»'

In [None]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

#Compiling the model
We can consider ur problem as a classification, where the model predicts the probability of ech unique character coming next

In [None]:
model.compile(optimizer='adam', loss=loss)

#Creating the checkpoints
Here I will setup and configure checkpoints to save the model as it trains. This will allow us to load the data from the checkpoint and continue to train

In [None]:
#Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
#Name of checkpoint_files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only = True)

#Training

In [None]:
history = model.fit(data, epochs=40, callbacks=[checkpoint_callback])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


#Loading the Model
I will rebuild the model from a checkpoint using a batch_size of 1, so that we can feed one piece of text to the model and have it make a prediction

In [None]:
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, batch_size=1)


In [None]:
#Once the model is finished training we can find the last checkpoint that stores the models weights using the following line
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

In [None]:
#We can load any checkpoint we want by specifying the exact file to load
#checkpoint_num = 10
#model.load_weights(tf.train.load_checkpoint("./training_checkpoints/ckpt_" + str(checkpoint_num)))
#model.build(tf.TensorShape([1, None]))

#Generating the text

In [None]:
def generate_text(model, start_string):
  num_generate = 800 #Number of characters to generate
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  text_generated = []

  temperature = 1.0

  model.reset_states()
  for i in range(num_generate):
    predictions = model(input_eval)
    predictions = tf.squeeze(predictions, 0)
    predictions = predictions/temperature
    predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

    input_eval = tf.expand_dims([predicted_id], 0)
    text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [None]:
inp = input("Type a starting string:")
print(generate_text(model, inp))

Type a starting string:Kazakh
Kazakhatphe friends of the philosophers is reaching. Those who are even confented is able to appear all the best mersigle, guving your eight give milah is a proke». «A gett craite. Why do not your mist! Stupidice. If he is the truth in in the man. No not think the believing – in all spenisly, but by his dig it inimain by lacuiful. But they asked himself and elective asticred hoped to the cack? When they real the world the troublemakers?
 
 
 
1892
 
The twenty-fifth word
 
S will real the heart and the hungry other rive sersivered and a thirsh of all living long. Af they ware udfellighted because he is grand, intenty and Sow Mahe says, «Ak, why do not with th ommyops, and the other is were adous, learn? Why do not you affection to wealth, knowledge, can be called "Thirst word
 
Hey aur
