<a href="https://colab.research.google.com/github/Daksh-Dhaker/Neural-Networks/blob/main/NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
import tensorflow as tf

In [3]:
text = open("shakespeare.txt",'r').read()

In [6]:
vocab = sorted(set(text))

In [8]:
len(vocab) # number of unique characters

84

In [10]:
#Text Processing (Vectorizing the text and creating encoding dictionary)
for pair in enumerate(vocab):
  print(pair)

(0, '\n')
(1, ' ')
(2, '!')
(3, '"')
(4, '&')
(5, "'")
(6, '(')
(7, ')')
(8, ',')
(9, '-')
(10, '.')
(11, '0')
(12, '1')
(13, '2')
(14, '3')
(15, '4')
(16, '5')
(17, '6')
(18, '7')
(19, '8')
(20, '9')
(21, ':')
(22, ';')
(23, '<')
(24, '>')
(25, '?')
(26, 'A')
(27, 'B')
(28, 'C')
(29, 'D')
(30, 'E')
(31, 'F')
(32, 'G')
(33, 'H')
(34, 'I')
(35, 'J')
(36, 'K')
(37, 'L')
(38, 'M')
(39, 'N')
(40, 'O')
(41, 'P')
(42, 'Q')
(43, 'R')
(44, 'S')
(45, 'T')
(46, 'U')
(47, 'V')
(48, 'W')
(49, 'X')
(50, 'Y')
(51, 'Z')
(52, '[')
(53, ']')
(54, '_')
(55, '`')
(56, 'a')
(57, 'b')
(58, 'c')
(59, 'd')
(60, 'e')
(61, 'f')
(62, 'g')
(63, 'h')
(64, 'i')
(65, 'j')
(66, 'k')
(67, 'l')
(68, 'm')
(69, 'n')
(70, 'o')
(71, 'p')
(72, 'q')
(73, 'r')
(74, 's')
(75, 't')
(76, 'u')
(77, 'v')
(78, 'w')
(79, 'x')
(80, 'y')
(81, 'z')
(82, '|')
(83, '}')


In [13]:
char_to_ind = {char:ind for ind,char in enumerate(vocab)} # mapping chars to numbers(0-83)

In [14]:
ind_to_char = np.array(vocab)

In [16]:
encoded_text = np.array([char_to_ind[c] for c in text]) # complete text with chars replaced by numbers

In [17]:
#Creating batches
seq_len = 120


In [18]:
total_seq = len(text)//(seq_len+1)

In [20]:
total_seq

45005

In [21]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)

In [22]:
type(char_dataset)

tensorflow.python.data.ops.from_tensor_slices_op._TensorSliceDataset

In [24]:
sequences = char_dataset.batch(seq_len+1,drop_remainder = True)

In [25]:
def create_seq_targets(seq):
  input_txt = seq[:-1]# Hello my nam
  target_txt = seq[1:]# ello my name
  return input_txt,target_txt

In [26]:
dataset = sequences.map(create_seq_targets)

In [27]:
batch_size = 128

In [28]:
buffer_size = 10000
dataset = dataset.shuffle(buffer_size).batch(batch_size,drop_remainder = True)

In [29]:
#Creating the model
vocab_size = len(vocab)
embed_dim = 64

In [30]:
rnn_neurons = 1026

In [31]:
from tensorflow.keras.losses import sparse_categorical_crossentropy

In [32]:
def sparse_cat_loss(y_true,y_pred):
  return sparse_categorical_crossentropy(y_true,y_pred,from_logits = True)

In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,GRU,Dense

def create_model(vocab_size,embed_dim,rnn_neurons,batch_size):
  model = Sequential()
  model.add(Embedding(vocab_size,embed_dim,batch_input_shape =[batch_size,None]))
  model.add(GRU(rnn_neurons,return_sequences = True,stateful = True,recurrent_initializer = 'glorot_uniform'))
  model.add(Dense(vocab_size))
  model.compile(optimizer ='adam',loss = sparse_cat_loss)

  return model

In [34]:
model = create_model(vocab_size = vocab_size,embed_dim = embed_dim,rnn_neurons = rnn_neurons,batch_size = batch_size )

In [35]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (128, None, 64)           5376      
                                                                 
 gru (GRU)                   (128, None, 1026)         3361176   
                                                                 
 dense (Dense)               (128, None, 84)           86268     
                                                                 
Total params: 3452820 (13.17 MB)
Trainable params: 3452820 (13.17 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [36]:
for input_example_batch,target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)

In [40]:
sampled_indices = tf.random.categorical(example_batch_predictions[0],num_samples =1)

In [42]:
sampled_indices = tf.squeeze(sampled_indices,axis =-1).numpy()

In [44]:
#ind_to_char[sampled_indices]

array(['A', ' ', 'V', 'x', "'", 'B', 'r', ')', '!', '\n', 'a', '3', 'r',
       'Y', 'T', 'd', 'E', 'K', 'u', 'V', '4', 'q', 'S', '>', 'r', 'B',
       's', 'z', 'X', 'x', 'Y', 'A', 'u', 'U', 'S', ';', 's', "'", '0',
       'a', ',', 'C', '6', 'Y', 'k', '6', '_', 'n', 'z', 'g', ';', 'L',
       'L', 'n', 'g', '<', ';', '>', 'C', 'h', '?', '\n', 'u', 'f', 'l',
       'O', 'D', '_', '0', '&', '?', 'j', 'c', 'K', 'z', '1', 'h', 'Q',
       'H', 'M', '"', 'E', 'x', 'g', ';', '2', 'O', 'W', 'E', 'e', 'Q',
       '<', '<', 'h', 'S', 'i', 'C', 'n', 'm', '_', '0', 'A', 'M', 'k',
       'i', 'R', "'", '4', 'U', 'F', '&', 'R', '9', '3', 'x', 'k', 'Z',
       'G', 'V', 'b'], dtype='<U1')

In [47]:
# epochs = 30
# model.fit(dataset,epochs = epochs)
from tensorflow.keras.models import load_model

In [48]:
model = create_model(vocab_size,embed_dim,rnn_neurons,batch_size=1)

In [49]:
model.load_weights('shakespeare_gen.h5')

model.build(tf.TensorShape([1,None]))

In [50]:
def generate_text(model,start_seed,gen_size = 500,temp =1.0):
  num_generate = gen_size
  input_eval = [char_to_ind[s] for s in start_seed]
  input_eval = tf.expand_dims(input_eval,0)
  text_generated = []
  temperature = temp
  model.reset_states()

  for i in range(num_generate):
    predictions = model(input_eval)
    predictions = tf.squeeze(predictions,0)
    predictions = predictions/temperature
    predicted_id = tf.random.categorical(predictions,num_samples =1)[-1,0].numpy()
    input_eval = tf.expand_dims([predicted_id],0)
    text_generated.append(ind_to_char[predicted_id])

  return (start_seed+"".join(text_generated))

In [57]:
print(generate_text(model,"JULIET",gen_size =100))

JULIET6

Sense? conclusion! and I hold him sleep.
  VALENTINE. Thus, in the dead Lopitus of Venice,
    An
