In [40]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers
import time

In [41]:
df = pd.read_csv('clean_data.csv')

In [42]:
df.shape

(3750, 3)

In [43]:
# filtering out the required length of sentences 
seq_len = 30
new_df = pd.DataFrame(columns = ['title', 'response'])
for i in range(1, len(df.title)):
    if len(df['title'][i].split()) <= seq_len and len(df['response'][i].split()) <= seq_len:
        new_df = new_df.append({'title': df.title[i], 'response': df.response[i]}, ignore_index=True)

In [44]:
new_df.shape 

(920, 2)

In [45]:
new_df.tail()

Unnamed: 0,title,response
915,<start> came to terms with stress <end>,<start> where i can take this test online <end>
916,<start> if this is any indication of how my li...,<start> i feel this on multiple levels . hang ...
917,<start> cod can help manage your stress <end>,<start> less cod oils helps me a lot dealing w...
918,<start> relative stress of confronting versus ...,"<start> this sounds interesting , i would alwa..."
919,<start> need to fulfill your promise <end>,<start> grandma i think you meant to send this...


In [46]:
# creating vocabulary funct
def tokenize(text, lang):
    tensor = lang.texts_to_sequences(list(text))   
    tensor = pad_sequences(tensor, padding='post')

    return tensor

In [47]:
# creating same vocab for both input and outputs/target
inp = new_df.title.values.tolist()
op = new_df.response.values.tolist()
inp.extend(op)
lang = tf.keras.preprocessing.text.Tokenizer(filters='')
lang.fit_on_texts(inp)

In [48]:
input_tensor = tokenize(new_df['title'], lang)
target_tensor = tokenize(new_df['response'], lang)

In [49]:
len(lang.word_counts) # vocab size - 1

2938

In [50]:
len(input_tensor)

920

In [51]:
max_len_target, max_len_input = target_tensor.shape[1], input_tensor.shape[1]

model

In [52]:
BUFFER_SIZE = len(input_tensor)
BATCH_SIZE = 64
steps_per_epoch = len(input_tensor)//BATCH_SIZE
embedding_dim = 256
units = 1024
vocab_size = len(lang.word_index)+1

# creating dataset
dataset = tf.data.Dataset.from_tensor_slices((input_tensor, target_tensor)).shuffle(BUFFER_SIZE)
# combining consecutive elements of the dataset into batches
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

In [53]:
# example batches to check the functions
example_input_batch, example_target_batch = next(iter(dataset))
example_input_batch.shape, example_target_batch.shape

(TensorShape([64, 30]), TensorShape([64, 30]))

In [54]:
class Encoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, enc_units, batch_sz):
        super(Encoder, self).__init__()
        self.batch_sz = batch_sz
        self.enc_units = enc_units
        # layers 
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.enc_units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')

    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.enc_units), initializer="random_normal", trainable=True)
        self.b = self.add_weight(shape=(self.enc_units,), initializer="random_normal", trainable=True)

    def call(self, x, hidden):
        x = self.embedding(x)
        output, state = self.gru(x, initial_state=hidden)
        return output, state

    def initialize_hidden_state(self):
        return tf.zeros((self.batch_sz, self.enc_units))    

In [55]:
encoder = Encoder(vocab_size, embedding_dim, units, BATCH_SIZE)

# testing the encoder layers on sample 
sample_hidden = encoder.initialize_hidden_state()
sample_output, sample_hidden = encoder(example_input_batch, sample_hidden)
print ('Encoder output shape: (batch size, sequence length, units) {}'.format(sample_output.shape))
print ('Encoder Hidden state shape: (batch size, units) {}'.format(sample_hidden.shape))

encoder.summary()

Encoder output shape: (batch size, sequence length, units) (64, 30, 1024)
Encoder Hidden state shape: (batch size, units) (64, 1024)
Model: "encoder_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      multiple                  752384    
_________________________________________________________________
gru_2 (GRU)                  multiple                  3938304   
Total params: 4,722,432
Trainable params: 4,722,432
Non-trainable params: 0
_________________________________________________________________


In [56]:
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = tf.keras.layers.Dense(units)
        self.W2 = tf.keras.layers.Dense(units)
        self.V = tf.keras.layers.Dense(1)

    def call(self, query, values):
        query_with_time_axis = tf.expand_dims(query, 1)

        score = self.V(tf.nn.tanh(self.W1(query_with_time_axis) + self.W2(values)))

        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)

        return context_vector, attention_weights

In [57]:
attention_layer = BahdanauAttention(10)

# testing the attention layer on thr sample data
attention_result, attention_weights = attention_layer(sample_hidden, sample_output) # give the encoder layer's output to the  attention model
print("Attention result shape: (batch size, units) {}".format(attention_result.shape))
print("Attention weights shape: (batch_size, sequence_length, 1) {}".format(attention_weights.shape))

Attention result shape: (batch size, units) (64, 1024)
Attention weights shape: (batch_size, sequence_length, 1) (64, 30, 1)


In [58]:
class Decoder(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, dec_units, batch_sz):
        super(Decoder, self).__init__()
        self.batch_sz = batch_sz
        self.dec_units = dec_units

        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.dec_units, return_sequences=True, return_state=True, recurrent_initializer='glorot_uniform')
        self.fc = tf.keras.layers.Dense(vocab_size)

        self.attention = BahdanauAttention(self.dec_units)

    def build(self, input_shape):
        self.w = self.add_weight(shape=(input_shape[-1], self.dec_units), initializer="random_normal", trainable=True)
        self.b = self.add_weight(shape=(self.dec_units,), initializer="random_normal", trainable=True)

    def call(self, x, hidden, enc_output):
        context_vector, attention_weights = self.attention(hidden, enc_output)
        x = self.embedding(x)
        x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

        output, state = self.gru(x)

        output = tf.reshape(output, (-1, output.shape[2])) # (batch_size, hidden_size)
        x = self.fc(output)

        return x, state, attention_weights

In [59]:
decoder = Decoder(vocab_size, embedding_dim, units, BATCH_SIZE)

sample_decoder_output, _, _ = decoder(tf.random.uniform((BATCH_SIZE, 1)),sample_hidden, sample_output)
print ('Decoder output shape: (batch_size, vocab size) {}'.format(sample_decoder_output.shape))

decoder.summary()

Decoder output shape: (batch_size, vocab size) (64, 2939)
Model: "decoder_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      multiple                  752384    
_________________________________________________________________
gru_3 (GRU)                  multiple                  7084032   
_________________________________________________________________
dense_16 (Dense)             multiple                  3012475   
_________________________________________________________________
bahdanau_attention_5 (Bahdan multiple                  2100225   
Total params: 12,951,164
Trainable params: 12,951,164
Non-trainable params: 0
_________________________________________________________________


In [60]:
optimizer = tf.keras.optimizers.Adam()
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')

def loss_function(real, pred):
    mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_ = loss_object(real, pred)

    mask = tf.cast(mask, dtype=loss_.dtype)
    loss_ *= mask

    return tf.reduce_mean(loss_)

In [61]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer, encoder=encoder, decoder=decoder)

Model Training

In [62]:
import gc

In [63]:
@tf.function
def train_step(inp, targ, enc_hidden):
    loss = 0

    with tf.GradientTape() as tape:
        enc_output, enc_hidden = encoder(inp, enc_hidden)

        dec_hidden = enc_hidden

        dec_input = tf.expand_dims([lang.word_index['<start>']] * BATCH_SIZE, 1)

        # print(targ.shape[1])
        # using Teacher forcing
        for t in range(1, targ.shape[1]):

            # passing enc_output to the decoder
            # decoder creates a context vector based on the input

            predictions, dec_hidden, _ = decoder(dec_input, dec_hidden, enc_output)

            loss += loss_function(targ[:, t], predictions)

            dec_input = tf.expand_dims(targ[:, t], 1)
            del predictions
            gc.collect()

    del dec_hidden
    del dec_input
    gc.collect()

    batch_loss = (loss / int(targ.shape[1]))

    variables = encoder.trainable_variables + decoder.trainable_variables

    gradients = tape.gradient(loss, variables)

    optimizer.apply_gradients(zip(gradients, variables))

    del variables
    del gradients
    gc.collect()

    return batch_loss

In [67]:
EPOCHS = 30

for epoch in range(EPOCHS):
    start = time.time()

    enc_hidden = encoder.initialize_hidden_state()
    total_loss = 0

    for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
        batch_loss = train_step(inp, targ, enc_hidden)
        total_loss += batch_loss

        if batch % 10 == 0:
            print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, batch, batch_loss.numpy()))
        
    #   saving the model every 10 epochs
    if (epoch + 1) % 5 == 0:
        checkpoint.save(file_prefix = checkpoint_prefix)

    print('Epoch {} Loss {:.4f}'.format(epoch + 1, total_loss / steps_per_epoch))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 1.7373
Epoch 1 Batch 10 Loss 1.6272
Epoch 1 Loss 1.7040
Time taken for 1 epoch 7.753515720367432 sec

Epoch 2 Batch 0 Loss 1.7368
Epoch 2 Batch 10 Loss 1.6692
Epoch 2 Loss 1.6434
Time taken for 1 epoch 7.656006097793579 sec

Epoch 3 Batch 0 Loss 1.6193
Epoch 3 Batch 10 Loss 1.6094
Epoch 3 Loss 1.5911
Time taken for 1 epoch 7.641568422317505 sec

Epoch 4 Batch 0 Loss 1.5565
Epoch 4 Batch 10 Loss 1.5510
Epoch 4 Loss 1.5421
Time taken for 1 epoch 7.670429944992065 sec

Epoch 5 Batch 0 Loss 1.4945
Epoch 5 Batch 10 Loss 1.8029
Epoch 5 Loss 1.4991
Time taken for 1 epoch 7.633963584899902 sec

Epoch 6 Batch 0 Loss 1.2832
Epoch 6 Batch 10 Loss 1.5200
Epoch 6 Loss 1.4522
Time taken for 1 epoch 7.6587982177734375 sec

Epoch 7 Batch 0 Loss 1.4204
Epoch 7 Batch 10 Loss 1.3455
Epoch 7 Loss 1.3915
Time taken for 1 epoch 7.647753477096558 sec

Epoch 8 Batch 0 Loss 1.4039
Epoch 8 Batch 10 Loss 1.4676
Epoch 8 Loss 1.3608
Time taken for 1 epoch 7.657830238342285 sec

Epoch 9 Batch 0

In [None]:
# saving the model
encoder.save("my_model/encoder")
decoder.save("my_model/decoder")

In [None]:
!ls {checkpoint_dir}

In [None]:
from google.colab import files
files.download('./training_checkpoints')

Evaluating the Model

In [69]:
import re

In [70]:
def preprocess_sentence(w):

    # creating a space between a word and the punctuation following it
    w = re.sub(r'[" "]+', " ", w)

    # replacing everything with space except (a-z, A-Z, ".", "?", "!", ",")
    w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)

    w = w.strip()
    w = w.lower()

    w = '<start> ' + w + ' <end>'
    # functions for spelling mistakes removals ans contractions are in other file...leave them be for now.

    return w

In [71]:
def evaluate(sentence):

    # modifying the input sentence to feed into the model
    sentence = preprocess_sentence(sentence)
    inputs = [lang.word_index[i] for i in sentence.split(' ') if i in lang.word_index.keys()] # conversion into tokens, skipping unknown words
    inputs = tf.keras.preprocessing.sequence.pad_sequences([inputs], maxlen=max_len_input, padding='post')
    inputs = tf.convert_to_tensor(inputs)

    result = ''

    hidden = [tf.zeros((1, units))]
    enc_out, enc_hidden = encoder(inputs, hidden)

    dec_hidden = enc_hidden
    dec_input = tf.expand_dims([lang.word_index['<start>']], 0)

    for t in range(max_len_target):
        predictions, dec_hidden, attention_weights = decoder(dec_input, dec_hidden, enc_out)

        predicted_id = tf.argmax(predictions[0]).numpy()

        result += lang.index_word[predicted_id] + ' '

        # stopping the sentence prediction process as soon as end is predicted
        if lang.index_word[predicted_id] == '<end>':
            return result

    # feeding the predicted Id back into the model
    dec_input = tf.expand_dims([predicted_id], 0)

    return result

In [72]:
def bot_says(sentence):
    result = evaluate(sentence)

    # print('Input: %s' % (sentence))
    print('Jolly : {}'.format(result))

In [None]:
# restoring the latest checkpoint in checkpoint_dir
# checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

In [None]:
bot_says('It makes me sad to go on like this without hope')
bot_says('I do not know what to do')
bot_says('I feel tired all the time')
bot_says('I think no one likes me ')


In [None]:
greeting_words = ['hi', 'hello', 'wassup', 'good morning', 'good evening', 'good afternoon']
# the chatting loop
while True:
    sentence = input('User : ')
    if 'bye' in sentence.split(' '):
        print('Jolly : Okay bye and take care.')
        break
    elif set(greeting_words) & set(sentence.split(' ')):
        print('Jolly : Hello, I am a Jolly. I am here to help you. Please share your problem')
    else:
        bot_says(sentence)
