In [None]:
# We're gonna make a model that's capable of predicting the next character in a sequence for this first we're gonna
# give some sequence as an input and it's simply predict the most likely next character. We give romeo and juliet as train inputs
# It will predict to us what the most likely next character for that sequence is and then take the output from the model and feed
#it as the input again to the model and keep predicting sequence of characters. So it will always keep predicting the next character
#from the previous output as many times as we want to generate an entire play
%tensorflow_version 2.x
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np


Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [None]:
#now we're dowloand the file of data set

path_to_file = tf.keras.utils.get_file('alice_in_wonderland.txt', 'https://www.gutenberg.org/files/11/11-0.txt')

In [None]:
#In here we're open our file wirh rb mode so it means read bytes mode then we read that as entire string.
#And with this decode(encoding='utf-8') we're gonna turn the byte string array to a normal string
text = open(path_to_file,'rb').read().decode(encoding='utf-8')
#We're gonna check the length of the characters in this text so how many characters we have in the text
print('Length of the text: {} characters'.format(len(text)))

Length of the text: 148139 characters


In [None]:
#We're gonna check the first 300 characters in the text
print(text[:300])

﻿﻿*** START OF THE PROJECT GUTENBERG EBOOK ALICE'S ADVENTURES IN
WONDERLAND ***
[Illustration]




Alice’s Adventures in Wonderland

by Lewis Carroll

THE MILLENNIUM FULCRUM EDITION 3.0

Contents

 CHAPTER I.     Down the Rabbit-Hole
 CHAPTER II.    The Pool of Tears
 CHAPTER III.  


In [None]:
# As you can see it's a text data set so we have to encode it as a integer.
# We're gonna encode each unique character as a different integer.

#With this code we will learn how many unique character in our vocabulary. This will sort every unique character in text
vocab = sorted(set(text))

#Now we're creating a mapping from unique characters to indices
#It's going letter to index
char2idx = {u:i for i, u in enumerate(vocab)}
# Then we turn the vocabulary as an array so we can use the index at which a letter appears as the reverse mapping
#It going indext to letter
idx2char = np.array(vocab)

#with this function we get some text and converts is an int
#every single character (c) in our text turn their int representation
def text_to_int(text):
  return np.array([char2idx[c] for c in text])



In [None]:
#You can see the result of what we're do in the above
print("Text: ", text[:15])
print("Encoded Text: ", text_to_int(text[:14]))

Text:  ﻿﻿*** START OF 
Encoded Text:  [77 77  7  7  7  2 34 35 16 33 35  2 30 21]


In [None]:
 #We're gonna passing different objects in here so if it's not already a numpy array we turn it as a numpy array because we have to do it for the code work

text_as_int = text_to_int(text)

def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])

print(int_to_text(text_as_int[:14]))

﻿﻿*** START OF


In [None]:
#It's very hard to train our model with 92409243097593873 sentence so we're gonna distribute our text datas to short arrays then we give them as train examples
#Every train example wil take the seq_length  longth character array as an input then it will output the one-letter shifted version of this string.

#length of sequence for a training example
seq_length = 300
#In here we're calculate we can how many training example. But why 101?
#Because in every training example we have to take 100 input and the last training output's last character so +1
examples_per_epoch = len(text)//(seq_length+1)

#It convert our entire data set into character
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)


In [None]:
#It means it gets our entire dataset with char_dataset and then batch it into length 101 and with drop remainder lets say we get 105 caharacter  it will automatically drop the last 4 character
sequences = char_dataset.batch(seq_length+1, drop_remainder= True)

In [None]:
#It takes sequences and distribute it as input and target with this we will get the training examples that we need

def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

#We use mapping for applying the above function to the every entry and it will be stored insdide the dataset object
dataset = sequences.map(split_input_target)

In [None]:
# We make it 2 times
for x, y in dataset.take(2):
  print("!!!THE INPUT!!!")
  print(int_to_text(x))
  print("\n\nTHE OUTPUT")
  print(int_to_text(y))

!!!THE INPUT!!!
﻿﻿*** START OF THE PROJECT GUTENBERG EBOOK ALICE'S ADVENTURES IN
WONDERLAND ***
[Illustration]




Alice’s Adventures in Wonderland

by Lewis Carroll

THE MILLENNIUM FULCRUM EDITION 3.0

Contents

 CHAPTER I.     Down the Rabbit-Hole
 CHAPTER II.    The Pool of Tears
 CHAPTER III.  


THE OUTPUT
﻿*** START OF THE PROJECT GUTENBERG EBOOK ALICE'S ADVENTURES IN
WONDERLAND ***
[Illustration]




Alice’s Adventures in Wonderland

by Lewis Carroll

THE MILLENNIUM FULCRUM EDITION 3.0

Contents

 CHAPTER I.     Down the Rabbit-Hole
 CHAPTER II.    The Pool of Tears
 CHAPTER III.   
!!!THE INPUT!!!
A Caucus-Race and a Long Tale
 CHAPTER IV.    The Rabbit Sends in a Little Bill
 CHAPTER V.     Advice from a Caterpillar
 CHAPTER VI.    Pig and Pepper
 CHAPTER VII.   A Mad Tea-Party
 CHAPTER VIII.  The Queen’s Croquet-Ground
 CHAPTER IX.    The Mock Turtle’s Story
 CHAPTER X.     The Lobst


THE OUTPUT
 Caucus-Race and a Long Tale
 CHAPTER 

In [None]:
#Now we will make training batches.

#We're gonna feed our model 64 batches of data at a time
BATCH_SIZE = 64
VOCAB_SIZE =len(vocab)
#We're gonna define the embedding dimension as the how we want every single vector to represent our words in the embedding layer
EMBEDDING_DIM = 256
RNN_UNITS =1024

 # its the size to shuffle the dataset
BUFFER_SIZE =10000

#We will shuffle all the data then with batch , batch it into that size (64) and if there is more use drop remainder
data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder = True)


In [None]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
      #None means we don't know how long the sequences are going to be in each batch we just know we're going to have 64 entries in each batch
      #but we don't know this 64 entries's long. We don't know how long the sequence is going to be so we leave this one

      #You can think of this layer like a translator. It converts each character into a vector that represents it in a meaningful way. In this way, the model uses these vectors when working with text.
      # vocab_size is the number of characters the model can recognize. For example, letter a, space, punctuation marks
      # embedding_dim is the size of the vector in which each character will be represented. This is necessary so that characters can be represented in a meaningful way.
      tf.keras.layers.Embedding(vocab_size, embedding_dim,
                                # This gives information about the shape of the input data. batch_size is the number of data groups to be given to the model in each operation. None indicates that the length of each array can be variable.
                                batch_input_shape = [batch_size,None]),


      #You can think of this layer like a writer. He remembers what he has written in the past and uses this information when deciding what to write in the future.
      #Rnn_units: Number of LSTM cells. This determines how complex and powerful the model can be.
      #Return_sequences=True: This parameter ensures that it returns intermediate outputs at each time step. In this way, we can understand what the model sees at each step.
      #Stateful=True: This allows the model to carry the previous cell state to the next cell. Thus, relations in long sequences are preserved.
      #Recurrent_initializer='glorot_uniform': This sets the initial value of the weights of the LSTM.
      tf.keras.layers.LSTM(rnn_units,
                           #return sequences means return the intermediate stage at every step because we want to look at what the model seeing at the intermediate steps not just the final step
                           return_sequences = True,
                           stateful = True,
                           recurrent_initializer = 'glorot_uniform'),
      # In dense layer which is going to contain the amount of vocabulary size nodes. The reasen we'Re doing this is because we want the final layer to have the amount of nodes in it rqual to the amount of characters in the vocabulary.
      #with this every nodes can represent a probability distribution the dot character comes next


      #You can think of this layer as a estimator. It predicts which character will come after each word written by the author and gives the probability distribution.
      #Vocab_size: The number of neurons in the output layer, that is, the size of the vocabulary. Each neuron represents the probability of a particular character appearing.
      tf.keras.layers.Dense(vocab_size)
  ])
  return model

#Now we're building the model with calling build_model function
model = build_model(VOCAB_SIZE , EMBEDDING_DIM , RNN_UNITS , BATCH_SIZE)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           19968     
                                                                 
 lstm (LSTM)                 (64, None, 1024)          5246976   
                                                                 
 dense (Dense)               (64, None, 78)            79950     
                                                                 
Total params: 5346894 (20.40 MB)
Trainable params: 5346894 (20.40 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
#Before calculating the loss function let's look at an input and output from our untrained model

#data.take(1): This takes a batch from the dataset. data here represents your training data. The take(1) statement indicates that we will take only one batch from this dataset.
#input_example_batch: The sample data we will give as input to our model.
#target_example_batch: Target outputs that our model needs to learn.
#This loop assigns a batch of data to the input_example_batch and target_example_batch variables.
for input_example_batch, target_example_batch in data.take(1):
  # model(input_example_batch):ask our model for a prediction on our first batch of training data (64 entries)
  #example_batch_predictions: Predictions given by the model. These predictions may initially be random because they are from the model that has not yet been trained.
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")  # print out the output shape

#It gives 78 the reason of this is when we create a dense layer as our last layer it has 78 nodes so every prediction contain 78 numbers
#And that's going to be the probability of every one of those characters occurring

(64, 300, 78) # (batch_size, sequence_length, vocab_size)


In [None]:
#we can see that the prediction is an array of 78 arrays, one for each entry in the batch
#As you can see we get 78 different predictions because we have 78 elements in the batch

print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[-1.27260350e-02  7.26364646e-03 -8.42981227e-03 ... -2.20729271e-04
   -2.05158582e-03  1.36162143e-03]
  [-9.98254213e-03  4.79651615e-03 -7.48366676e-03 ... -1.04447955e-03
   -5.76748187e-03 -6.76101248e-04]
  [-1.20214811e-02  3.17729171e-03 -1.18758641e-02 ...  1.94440479e-03
   -7.51129910e-03 -2.83305999e-03]
  ...
  [-1.48498053e-02  2.38533341e-03 -1.25077944e-02 ... -5.53831458e-04
    5.31782024e-03 -3.81807168e-03]
  [-6.10411353e-03 -3.20379680e-04 -1.02074463e-02 ... -5.33176586e-04
    5.37356222e-03 -1.89389405e-03]
  [ 1.04208663e-03 -5.78439096e-04 -7.30897021e-03 ... -2.13013031e-04
    4.69692238e-03 -1.37889758e-04]]

 [[-1.87387019e-02  9.49239917e-03  1.63768069e-03 ...  2.15098169e-03
   -1.70066929e-03  1.37356180e-03]
  [-1.06666498e-02  6.33637421e-03  1.07635884e-03 ...  4.91729937e-04
    7.03625847e-04  2.49040662e-03]
  [-9.18274466e-03  8.14232789e-03  1.03456446e-03 ... -3.35736573e-03
   -3.25542246e-03  7.20131677e-03]
  ...
  [-1.142

In [None]:
#Now let's look at the first prediction of the first element in the batch
pred = example_batch_predictions[0]
print(len(pred))
print(pred)
# notice this is a 2d array of length 300, where each interior array is the prediction for the next character at each time step
#In this it will show the predictions of the 300 time steps

300
tf.Tensor(
[[-0.01272603  0.00726365 -0.00842981 ... -0.00022073 -0.00205159
   0.00136162]
 [-0.00998254  0.00479652 -0.00748367 ... -0.00104448 -0.00576748
  -0.0006761 ]
 [-0.01202148  0.00317729 -0.01187586 ...  0.0019444  -0.0075113
  -0.00283306]
 ...
 [-0.01484981  0.00238533 -0.01250779 ... -0.00055383  0.00531782
  -0.00381807]
 [-0.00610411 -0.00032038 -0.01020745 ... -0.00053318  0.00537356
  -0.00189389]
 [ 0.00104209 -0.00057844 -0.00730897 ... -0.00021301  0.00469692
  -0.00013789]], shape=(300, 78), dtype=float32)


In [None]:
#Now let's look at the first time step for the first element in the batch

time_pred = pred[0]
print(len(time_pred))
print(time_pred)

#It will give us probability of every single characters occuring next at the first time step. And of course there is 78 values

78
tf.Tensor(
[-1.27260350e-02  7.26364646e-03 -8.42981227e-03 -1.41266715e-02
  2.56846496e-03  1.54364761e-02  3.29441857e-03  1.91028696e-03
 -7.64808850e-03  4.53224313e-03 -2.96300394e-03 -2.75725778e-03
 -1.09919393e-02 -5.04809991e-03  1.12738106e-02 -1.11805955e-02
 -7.99321290e-03 -3.71907139e-04  4.08116262e-04  3.19566438e-03
  1.30655589e-02 -7.02787098e-03  6.49509486e-03 -1.08425654e-02
 -6.73757773e-03  5.61409397e-03  1.72738917e-03 -1.00233676e-02
 -9.26385401e-05 -9.65325627e-03  5.97952306e-03 -2.18841783e-03
  6.21370785e-03  6.96362415e-03  9.07788984e-03 -4.57827281e-03
  6.01531286e-03  8.08725413e-03 -2.62106350e-03  7.50992820e-03
 -1.11967651e-03 -7.11710611e-03 -1.06015918e-03 -3.27578397e-03
 -3.36000603e-03 -1.00620883e-02 -1.60992797e-03  1.46036176e-03
  7.37965573e-04 -6.72496296e-03  1.50577128e-02 -2.62487144e-03
 -6.25204528e-03  7.61709176e-04  2.44490546e-03 -2.83480808e-03
  9.00028273e-03 -1.17240092e-02 -3.16363433e-03 -7.20179360e-03
  2.9277941

In [None]:
#If we want to determine the predicted character (from the above array) we need to sample the output distribution (pick a value based on probabilities)
#This function samples from the probability distribution on pred. num_samples=1 specifies that one sample will be taken from each probability distribution. This is done to determine which character is the most likely.
#This contains indices of samples from pred. Its shape is (batch_size, sequence_length, 1)
sampled_indicies = tf.random.categorical(pred, num_samples=1)

#np.reshape(sampled_indicies, (1, -1)): Reshapes the sampled_indicies tensor into (1, -1). This puts all indexes on a single line.
#[0]: Gets the first (and only) row from the reshaped tensor. This ensures that the sampled indexes are made into a flat list.
sampled_indices = np.reshape(sampled_indicies, (1, -1))[0]

#This function converts indexes to characters. Using the previously defined int_to_text function, the corresponding character of each index is found and they are concatenated. This creates the string of characters that the model predicts.
predicted_chars = int_to_text(sampled_indices)

#predicted_chars: Contains the sequence of characters the model predicts.You can see them in the under
predicted_chars

"X’—Qn\ufeffsyWU3!W\rYakPs_CYw]M'JlayyHnpogA3JE*AVcW3LcLET—'SkBtE\rT[xMV’ZS\r’p!CEg's:x; E]_wdhHjY*XR‘‘[3v[Pw0thqjS,f)tbYIl;]EUDw’YvUTYr.L)mALksqz’;Pdf-‘dIfE\r\n*RW‘”bGCQ[vo kJN-3shwrAJdFFS:(?hdE?Ym(,x',\rjzmQ‘ù?E\ufeff’g.h[’Dgd;rl'jjvcJXW“cU3EùB‘EXtfw!if\nA‘jOTDAK :dobjCC”qRV0o]wE”S!)J“S?VeyL’(\rpBlc])-3\nPZk_LII0FFdW"

In [None]:
#So we create a loss function that can compare the output to the expected output and give us some numeric value representing how close the two were
def loss(labels,logits):
  #it takes all the labels and all of the probability distributions(logits) and will compute a loss on those. So we can learn how different or how similar those two things are
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits = True)

In [None]:
#Now we're finally compile the model with adam optimizer and for loss function we will use the loss that we creae for loss function
model.compile(optimizer = 'adam', loss = loss)

In [None]:
#Now we are going to setup and configure our model to save checkpoinst as it trains. This will allow us to load our model from a checkpoint and continue training it.

# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
#checkpoint_prefix: Name and directory path of checkpoint files. The os.path.join function joins the string checkpoint_dir and ckpt_{epoch}. {epoch} is replaced with the current epoch number during training, thus creating a separate checkpoint file for each epoch.
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

#tf.keras.callbacks.ModelCheckpoint: This callback is used to save the weights of the model at certain intervals.
#filepath: The path to save checkpoint files. checkpoint_prefix is ​​used here.
#save_weights_only=True: Saves only the weights of the model, not the full architecture of the model. This provides faster speed and smaller file sizes.
checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)
#This configuration saves checkpoints at regular intervals (at the end of each epoch) during training of the model. These checkpoints contain the weights of the model so that they can be restarted if training of the model is interrupted.

In [None]:
#Now we're training our model
history = model.fit(data, epochs = 40, callbacks = [checkpoint_callback])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:
#After training our model we need to rebuild our model with using a new batch size of one. This means it only runs on one instance at a time.
model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN UNITS, batch_size = 1)

SyntaxError: invalid syntax. Perhaps you forgot a comma? (<ipython-input-1-505df83635e0>, line 2)

In [None]:
#In the training our model code block we're gonna do 40 checkpoints and every checkpoint is like checkpoint at epoch 1 or checkpoint at epoch 2 ...
#To get the latest checkpoint we're doing this
model.load_weights(tf.train.lates_checkpoint(checkpoint_dir)
#1 means we expect the input as 1 and None means we don't know what the next dimension length will be
model.build(tf.TensorShape([1,None]))

In [None]:
#checkpoint_num: Specifies the number of the checkpoint to be loaded. In this example, we want to load checkpoint 10.
checkpoint_num = 10

#tf.train.load_checkpoint("./training_checkpoints/ckpt_" + str(checkpoint_num)): Loads the checkpoint in the specified file path. Loads weights from file ./training_checkpoints/ckpt_10.
#model.load_weights(...): Applies the loaded weights to the model.
model.load_weights(tf.train.load_checkpoint("./training_checkpoints/ckpt_" + str(checkpoint_num)))

#model.build(tf.TensorShape([1, None])): Rebuilds the model. tf.TensorShape([1, None]) indicates that the input shape of the model is (1, None). This means the batch size is 1 and the array length is indeterminate (variable).
model.build(tf.TensorShape([1, None]))

In [None]:
def generate_text(model, start_string):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate is 1000
  num_generate = 1000

  # Converting our start string to numbers (vectorizing)
  #we need to preprocess this text again so it works properly
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
  text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.So we just write it randomly and not necessarly need this code too
  temperature = 1.0

  # Here batch size == 1
  #We rebuild our model but it will keep the old data so wee need to clear
  model.reset_states()
  #it will generate 1000 characters
  for i in range(num_generate):
      predictions = model(input_eval)
      #it's take our predictions and removes exterior dimension so we just have the predictions we want not extra dimensions
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

       #turn our integers into a text
      text_generated.append(idx2char[predicted_id])

#and then we retuen everything
  return (start_string + ''.join(text_generated))

In [None]:
user = input("Type a starting string: ")
print(generate_text(model, user))