<a href="https://colab.research.google.com/github/Azhar-999-coder/conversational_bot/blob/master/Response_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Bidirectional, Concatenate, Permute, Dot, Input, LSTM, Multiply, GRU
from tensorflow.keras.layers import RepeatVector, Dense, Activation, Lambda
from tensorflow.keras.models import Model
import numpy as np
import os
import matplotlib.pyplot as plt

num_words=30000
max_length=30
Tx = max_length
Ty = max_length

def preprocess(path):
    dirlist = os.listdir(path)
    human_sentences=[]
    machine_sentences=[]
    for File in dirlist:
        with open(path+"/"+File, 'r') as raw_lines:
            lineList = []
            while True:        
                line = raw_lines.readline()
                if not line:
                    break
                lineList.append(line)

        for i in range(0, len(lineList)):
            if(i%2)==0:
                human_sentences.append(lineList[i])
            else:
                machine_sentences.append(lineList[i])    

    tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, oov_token='<OOV>')
    tokenizer.fit_on_texts(human_sentences)
    human_word_index = tokenizer.word_index
    human_reverse_word_index = {a:b for (b,a) in human_word_index.items()}

    tokenizer2 = tf.keras.preprocessing.text.Tokenizer(num_words=num_words, oov_token='<OOV>')
    
    tokenizer2.fit_on_texts(machine_sentences)
    machine_word_index = tokenizer2.word_index
    machine_reverse_word_index = {a:b for (b,a) in machine_word_index.items()}

    human_sequences = tokenizer.texts_to_sequences(human_sentences)
    human_padded = tf.keras.preprocessing.sequence.pad_sequences(human_sequences, maxlen=max_length, truncating = "post")

    machine_sequences = tokenizer2.texts_to_sequences(machine_sentences)
    machine_padded = tf.keras.preprocessing.sequence.pad_sequences(machine_sequences, maxlen=max_length, truncating = "post")  


    return human_padded, machine_padded, human_word_index, human_reverse_word_index, machine_word_index, machine_reverse_word_index

X, Y, human_vocab, reverse_human_vocab, machine_vocab, reverse_machine_vocab = preprocess('Data/')


In [3]:
!wget http://nlp.stanford.edu/data/glove.twitter.27B.zip

--2020-06-21 08:58:14--  http://nlp.stanford.edu/data/glove.twitter.27B.zip
Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://nlp.stanford.edu/data/glove.twitter.27B.zip [following]
--2020-06-21 08:58:14--  https://nlp.stanford.edu/data/glove.twitter.27B.zip
Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://downloads.cs.stanford.edu/nlp/data/glove.twitter.27B.zip [following]
--2020-06-21 08:58:15--  http://downloads.cs.stanford.edu/nlp/data/glove.twitter.27B.zip
Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22
Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1520408563 (1.4G) [appli

In [4]:
!unzip glove.twitter.27B.zip

Archive:  glove.twitter.27B.zip
  inflating: glove.twitter.27B.25d.txt  
  inflating: glove.twitter.27B.50d.txt  
  inflating: glove.twitter.27B.100d.txt  
  inflating: glove.twitter.27B.200d.txt  


In [2]:
embeddings_index = {}
f = open('glove.twitter.27B.100d.txt')
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()

print('Found %s word vectors.' % len(embeddings_index))

Found 1193514 word vectors.


In [3]:
embedding_dim=100
embedding_matrix = np.zeros((len(human_vocab)+1, embedding_dim))
for word, i in human_vocab.items():
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # words not found in embedding index will be all-zeros.
        embedding_matrix[i] = embedding_vector

In [4]:
embeddingLayer = tf.keras.layers.Embedding((len(human_vocab)+1),embedding_dim, weights=[embedding_matrix], trainable=False, input_length=max_length)

In [5]:
class Encoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
    super(Encoder, self).__init__()
    self.batch_sz = batch_sz
    self.enc_units = enc_units
    self.embedding = embeddingLayer
    self.gru = GRU(self.enc_units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')

  def call(self, x, hidden):
    x = self.embedding(x)
    output, state = self.gru(x, initial_state=hidden)
    return output, state

  def initialize_hidden_state(self):
    return tf.zeros((self.batch_sz, self.enc_units))


In [6]:
class BahdanauAttention(tf.keras.layers.Layer):
  def __init__(self, units):
    super(BahdanauAttention, self).__init__()
    self.W1 = tf.keras.layers.Dense(units)
    self.W2 = tf.keras.layers.Dense(units)
    self.V = tf.keras.layers.Dense(1)

  def call(self, hidden_state, values):
    hidden_state_with_time_axis = tf.expand_dims(hidden_state, 1)

    concat = self.V(tf.nn.tanh(
        self.W1(hidden_state_with_time_axis) + self.W2(values)))

    attention_weights = tf.nn.softmax(concat, axis=1)

    context_vector = attention_weights * values
    context_vector = tf.reduce_sum(context_vector, axis=1)

    return context_vector, attention_weights

In [23]:
class Decoder(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, n_s, batch_size):
    super(Decoder, self).__init__()
    self.vocab_size = vocab_size
    self.embedding_dim = embedding_dim
    self.n_s = n_s
    self.batch_size = batch_size
    self.embedding = embeddingLayer
    self.gru = GRU(self.n_s, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')
    self.attention = BahdanauAttention(n_s)
    self.fc = Dense(n_s)

  def call(self, x, hidden, encoder_output):
    context_vector, attention_weights = self.attention(hidden, encoder_output)

    x = self.embedding(x)

    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

    output, state = self.gru(x)

    output = tf.reshape(output, (-1, output.shape[2]))

    x = self.fc(output)

    return x, state, attention_weights


In [27]:
BATCH_SIZE=len(X)
n_a = 64
n_s = 64

In [13]:
def create_model(n_a, n_s, human_vocab_size, machine_vocab_size):

  inputs = Input(shape=(30,))
    
  encoder = Encoder(human_vocab_size, embedding_dim, n_a, BATCH_SIZE)

  hidden = encoder.initialize_hidden_state()
  output, hidden = encoder(inputs, hidden)
  
  attention_layer = BahdanauAttention(10)
  context, attention_weights = attention_layer(hidden, output)

  decoder = Decoder(machine_vocab_size, embedding_dim, n_s, BATCH_SIZE)

  outputs = []
  for t in range (0,Ty):
    x, state, attention_weights = decoder.decode(tf.random.uniform((BATCH_SIZE, 1)), hidden, output)
    outputs.append(x)
  

  model = Model(inputs=inputs, outputs=outputs)
    
  return model

In [14]:
model = create_model(n_a,n_s,len(human_vocab),len(machine_vocab))

In [15]:
model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [16]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 30)]         0                                            
__________________________________________________________________________________________________
encoder_1 (Encoder)             ((50000, 30, 64), (5 942672      input_2[0][0]                    
__________________________________________________________________________________________________
bahdanau_attention_3 (BahdanauA ((50000, 64), (50000 16769       encoder_1[0][1]                  
                                                                 encoder_1[0][1]                  
                                                                 encoder_1[0][1]                  
                                                                 encoder_1[0][1]            

In [17]:
model.fit(X,Y, epochs=10)

Epoch 1/10


InvalidArgumentError: ignored

In [172]:
print(Y.shape)

(50000, 30)


In [24]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
    from_logits=True, reduction='none')

def loss_function(real, pred):
  mask = tf.math.logical_not(tf.math.equal(real, 0))
  loss_ = loss_object(real, pred)

  mask = tf.cast(mask, dtype=loss_.dtype)
  loss_ *= mask

  return tf.reduce_mean(loss_)


In [25]:
encoder = Encoder(len(human_vocab), embedding_dim, n_a, BATCH_SIZE)
decoder = Decoder(len(machine_vocab), embedding_dim, n_s, BATCH_SIZE)
@tf.function
def train_step(inp, targ, enc_hidden):
  loss = 0


  with tf.GradientTape() as tape:
    enc_output, enc_hidden = encoder(inp, enc_hidden)

    dec_hidden = enc_hidden

    dec_input = tf.expand_dims([machine_vocab['<OOV>']] * BATCH_SIZE, 1)

    # Teacher forcing - feeding the target as the next input
    for t in range(1, targ.shape[1]):
      # passing enc_output to the decoder
      predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

      loss += loss_function(targ[:, t], predictions)

      # using teacher forcing
      dec_input = tf.expand_dims(targ[:, t], 1)

  batch_loss = (loss / int(targ.shape[1]))

  variables = encoder.trainable_variables + decoder.trainable_variables

  gradients = tape.gradient(loss, variables)

  optimizer.apply_gradients(zip(gradients, variables))

  return batch_loss

In [28]:
EPOCHS = 10
inp = X
targ=Y

for epoch in range(EPOCHS):

  enc_hidden = encoder.initialize_hidden_state()
  total_loss = 0

  batch_loss = train_step(inp, targ, enc_hidden)
  total_loss += batch_loss

  if batch % 100 == 0:
    print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1,
                                                   batch,
                                                   batch_loss.numpy()))
  # saving (checkpoint) the model every 2 epochs
  if (epoch + 1) % 2 == 0:
    checkpoint.save(file_prefix = checkpoint_prefix)

  print('Epoch {} Loss {:.4f}'.format(epoch + 1,
                                      total_loss / steps_per_epoch))
  print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))


TypeError: ignored