In [None]:
import os 
import numpy as np
import pandas as pd
import re
import pickle #serialization and deserialization
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer

os.environ['TF_CPP_MIN_LOG_LEVEL']='2'#limit the tensorflow warnings

In [10]:
"""
INPUTS => Encoder=>EVC OUTPUTS,THOUGHT VECTOR =>

ATTENTION NETWORK => ATTENTION WEIGHTS (X ENC OUTPUTS) =>ATTENTION OUTPUT

ATTENTION OUTPUT,ACTUAL OUTPUT(input) =>DECODER => FINAL OUTPUT

*total words [he sghe name is deas ghj]

name=> Embedding layer => embedding for 'name'(one hot encoding)*

** ENCODER ARCHITECTURE:-

INPUTS->EMBEDDING -> GRU
**
ATTENTION VECTOR ARCHITECTURE

ENC OUTPUTS => ENC LAYER------------
                   +              ------- ACTIVATION => FINAL LAYER => ATTENTION WEIGHTS  (here rather performing activation on each layes it would be great if we do activation on combined layer)
THOUGHT VECTOR => THOUGHT LAYER -----

-------------------------------------------------"""


"\nINPUTS => Encoder=>EVC OUTPUTS,THOUGHT VECTOR =>\n\nATTENTION NETWORK => ATTENTION WEIGHTS (X ENC OUTPUTS) =>ATTENTION OUTPUT\n\nATTENTION OUTPUT,ACTUAL OUTPUT(input) =>DECODER => FINAL OUTPUT\n\n*total words [he sghe name is deas ghj]\n\nname=> Embedding layer => embedding for 'name'(one hot encoding)*\n\n** ENCODER ARCHITECTURE:-\n\nINPUTS->EMBEDDING -> GRU\n**\nATTENTION VECTOR ARCHITECTURE\n\nENC OUTPUTS => ENC LAYER------------\n                   +              ------- ACTIVATION => FINAL LAYER => ATTENTION WEIGHTS  (here rather performing activation on each layes it would be great if we do activation on combined layer)\nTHOUGHT VECTOR => THOUGHT LAYER -----\n\n-------------------------------------------------"

In [2]:
#tf.keras.Model==>Parent class
#encoder_units==> no.of parameters gru uses like how neurons in neural network
class Encoder(tf.keras.Model):
    def __init__(self,vocab_size,embedding,encoder_units,batch_size):
        super(Encoder,self).__init__()
        
        self.batch_size=batch_size
        self.enc_units=encoder_units
        self.embedding=tf.keras.layers.Embedding(vocab_size,embedding) #the embedding layer works this way like it takes word and send the embedding for that word. The embedding layer takes the input in one hot encoding so that is the reason the vocab_size is sent as input and next parameter enbedding is about the size of the embedding which we want
        self.gru=tf.keras.layers.GRU(self.enc_units,return_sequences=True,return_state=True,recurrent_initializer='glorot_uniform', kernel_regularizer=tf.keras.regularizers.L2(0.001)) #return_sequences=True The every output is used as next input so True is kept,return_state=True-->the thougth vector,kernel_regularizer-->it is used to because sometimes gru ,lst models make the model overfit to avoid that we used this parameter we have regularizer models L1,L2 the L2 is best than L1
    
    def call(self,inputs,hidden_state):
        embedded_inputs = self.embedding(inputs)
        enc_outputs,thought_vector=self.gru(embedded_inputs,initial_state=hidden_state)
        return enc_outputs,thought_vector

In [3]:
class Attention(tf.keras.layers.Layer):
    def __init__(self,units):
        super(Attention,self).__init__()
        
        self.enc_output_layer=tf.keras.layers.Dense(units,kernel_regularizer=tf.keras.regularizers.L2(0.001))
        self.thought_layer   =tf.keras.layers.Dense(units,kernel_regularizer=tf.keras.regularizers.L2(0.001))
        self.final_layer     =tf.keras.layers.Dense(1    ,kernel_regularizer=tf.keras.regularizers.L2(0.001))
    
    def call(self,enc_outputs,thought_vector):
        thought_matrix= tf.expand_dims(thought_vector,1) # converts the vector to a matrix
        
        
        scores = self.final_layer(tf.keras.activations.tanh(self.enc_output_layer(enc_outputs)+self.thought_layer(thought_matrix)))
        attention_weights = tf.keras.activations.softmax(scores,axis=-1)
        
        attention_output = attention_weights * enc_outputs # shape (batch_size, num_outputs , output_size)
        attention_output = tf.reduce_sum(attention_output,axis=1)#New shape (batch_size,output_size) the axis consists of the dimension which we don't want
        
        return attention_output, attention_weights

In [4]:
class Decoder(tf.keras.Model):
    def __init__(self,vocab_size,embedding,decoder_units,batch_size):
        super(Decoder,self).__init__()
        
        self.batch_size=batch_size
        self.dec_units=decoder_units
        self.embedding=tf.keras.layers.Embedding(vocab_size,embedding)
        self.gru=tf.keras.layers.GRU(self.dec_units,return_sequences=True,return_state=True,recurrent_initializer='glorot_uniform', kernel_regularizer=tf.keras.regularizers.L2(0.001))
        
        self.attention = Attention(self.dec_units)
        self.word_output = tf.keras.layers.Dense(vocab_size,kernel_regularizer=tf.keras.regularizers.L2(0.001))

    def call(self,inputs,enc_outputs,thought_vector):
        attention_output,attention_weights=self.attention(enc_outputs,thought_vector)
        #shape of attention_output (batch_size,size_of_embedding)
        embedded_inputs=self.embedding(inputs) #shape (batch_size,num_words,size_of_embedding)
        attention_output=tf.expand_dims(attention_output,1)#shape of attention_output (batch_size,1,size_of_embedding) why this inorder to make the actual ouput and attention output to smae dimensions
        concat_inputs = tf.concat([attention_output,embedded_inputs],axis=-1)
        
        decoder_outputs , hidden_state = self.gru(concat_inputs)#shape (batch_size,1,size_of embedding)
        decoder_outputs = tf.reshape(decoder_outputs,(-1,decoder_outputs.shape[2])) #shape (batch_size,size_of embedding)
        
        final_outputs = self.word_output(decoder_outputs) #gives one hot encoding of vocab size
        return final_outputs, hidden_state,attention_weights

In [5]:
#custom class
class Train:
    def __init__(self):
        self.optimizer=tf.keras.optimizers.Adam()
        self.base_loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,reduction='none')#why SparseCategoricalCrossentropy() because of multi class classification #from_logits=True thi stells that the model has not applied any softmax on loss function #reduction='none' just apply the softmax after the output
        
        
    def loss_function(self,y_real,y_pred):
        base_mask = tf.math.logical_not(tf.math.equal(y_real,0))
        base_loss = self.base_loss_function(y_real,y_pred)
        
        mask = tf.cast(base_mask,dtype=base_loss.dtype)#converts to one datatype to another datattype
        final_loss=mask*base_loss
        
        return tf.reduce_mean(final_loss)
    
    def train_step(self,train_data,label_data,enc_hidden,encoder,decoder,batch_size,label_tokenizer):
        #train data [Neha  lives in noida]
        #label_data [Neha noida me rahatha hey] shape(embedding_size,num_words)
        #enc_hidden [very starting we need to send a values to start]
        #batch_size how many examples we are taking at once
        #label_tokenizer converts vocab to one hot encoding
        
        loss=0
        
        with tf.GradientTape() as tape:
            enc_outputs,thought_vector = encoder(train_data,enc_hidden)
            dec_hidden = thought_vector
            dec_input = tf.expand_dims([label_tokenizer.word_index['<start>']]*batch_size,1)# the 1 is used to make the num_words to 1 in decoder
            
            for index in range(1,label_data.shape[1]):
                outputs,dec_hidden, _ = decoder(dec_input,enc_outputs,dec_hidden)
                
                dec_input = tf.expand_dims(label_data[:,index],1)
                loss= loss+self.loss_function(label_data[:,index],outputs)
        word_loss = loss / int(label_data.shape[1])
        
        variables = encoder.trainable_variables + decoder.trainable_variables
        gradients = tape.gradient(loss,variables)
        self.optimizer.apply_gradients(zip(gradients,variables)) #performs w=w+alpha*dw
        
        return word_loss

In [15]:
#def loss_function
"""[he she it name this that thses those their you]

the output should be [0,0,0,0,1,0,0,0,0,0]

predictions [0.001,0.001,0.001,0.001,0.9,0.001,0.001,0.001,0.001,0.002]-->(sum up to 1) the model is making good enough precdictions

here now we are gonna penelize the places wehre it should be 0 but action not 0 and excusing the 1 place i.e is 0.9 in order make things easy"""

'[he she it name this that thses those their you]\n\nthe output should be [0,0,0,0,1,0,0,0,0,0]\n\npredictions [0.001,0.001,0.001,0.001,0.9,0.001,0.001,0.001,0.001,0.002]-->(sum up to 1) the model is making good enough precdictions\n\nhere now we are gonna penelize the places wehre it should be 0 but action not 0 and excusing the 1 place i.e is 0.9 in order make things easy'

In [16]:
#def loss_function
"""
y_real = [0,0,0,0,'1',0,0,0,0,0]
math.equal()=[True,True,True,True,False,True,True,True,True,True]
logical not =[False,False,False,False,True,False,False,False,False,False]
mask = [1,1,1,1,'0',1,1,1,1,1]
predictions [0.001,0.001,0.001,0.001,0.9,0.001,0.001,0.001,0.001,0.002]
[1,1,1,1,'0',1,1,1,1,1]*[0.001,0.001,0.001,0.001,0.9,0.001,0.001,0.001,0.001,0.002]
final_loss = [0.001,0.001,0.001,0.001,0,0.001,0.001,0.001,0.001,0.002]
"""

"\ny_real = [0,0,0,0,'1',0,0,0,0,0]\nmath.equal()=[True,True,True,True,False,True,True,True,True,True]\nlogical not =[False,False,False,False,True,False,False,False,False,False]\nmask = [1,1,1,1,'0',1,1,1,1,1]\npredictions [0.001,0.001,0.001,0.001,0.9,0.001,0.001,0.001,0.001,0.002]\n[1,1,1,1,'0',1,1,1,1,1]*[0.001,0.001,0.001,0.001,0.9,0.001,0.001,0.001,0.001,0.002]\nfinal_loss = [0.001,0.001,0.001,0.001,0,0.001,0.001,0.001,0.001,0.002]\n"

In [30]:
#def train_step 
"""1)inputs-> ------- -> final outputs
2)calculate the loss between final outputs and label data
3)use this to calculate gradients and then update paramters
here 1,2 are done by us the 3 one is taken care by tensor flow

decoder function
                             OUTPUT
ENC OUTPUTS,THOUGHT VECTOR ->[DECODER -1(iteration)] -> HIDDEN STATE, ENC OUTPUTS =>[DECODER - 2(iteration)]
"""

'1)inputs-> ------- -> final outputs\n2)calculate the loss between final outputs and label data\n3)use this to calculate gradients and then update paramters\nhere 1,2 are done by us the 3 one is taken care by tensor flow\n\ndecoder function\n                             OUTPUT\nENC OUTPUTS,THOUGHT VECTOR ->[DECODER -1(iteration)] -> HIDDEN STATE, ENC OUTPUTS =>[DECODER - 2(iteration)]\n'

In [6]:
class Data_Preprocessing:
    def __init__(self):
        self.temp=None
        
    def get_data(self,path):
        file=open(path,'r').read()
        lists = [f.split('\t') for f in file.split('\n')]
        
        questions = [x[0] for x in lists]
        answers = [x[1] for x in lists]
        
        return questions,answers
    
    def process_sentence(self,line):
        line = line.lower().strip()
        
        line = re.sub(r"([?!.,])", r" \1 ",line) #it replaces the ?!., these all with ( ?),( !)Like every symbol before there willl be a space
        
        line = re.sub(r'[" "]+', " ",line) #it replaces the extra spaces with single space
        line = re.sub(r"[^a-zA-Z?!.,]+", " ", line)
        line = line.strip()
        
        line = '<start> '+line+' <end>'
        return line

    def word_to_vec(self,inputs):
        tokenizer = Tokenizer(filters='')
        tokenizer.fit_on_texts(inputs)#one hot encoding
        
        vectors = tokenizer.texts_to_sequences(inputs)
        vectors = pad_sequences(vectors,padding='post')#padding is done at the end of the sentence
        
        return vectors,tokenizer
    

In [7]:
data = Data_Preprocessing()
questions,answers = data.get_data('/kaggle/input/chatbot-dataset/chatbot.txt')


In [8]:
questions = [data.process_sentence(str(sentence)) for sentence in questions]
answers = [data.process_sentence(str(sentence)) for sentence in answers]

In [9]:
questions

['<start> hi <end>',
 '<start> who are you ? <end>',
 '<start> what do you do ? <end>',
 '<start> hi , how are you doing ? <end>',
 '<start> i m fine . how about yourself ? <end>',
 '<start> i m pretty good . thanks for asking . <end>',
 '<start> no problem . so how have you been ? <end>',
 '<start> i ve been great . what about you ? <end>',
 '<start> i ve been good . i m in school right now . <end>',
 '<start> what school do you go to ? <end>',
 '<start> i go to pcc . <end>',
 '<start> do you like it there ? <end>',
 '<start> it s okay . it s a really big campus . <end>',
 '<start> good luck with school . <end>',
 '<start> how s it going ? <end>',
 '<start> i m doing well . how about you ? <end>',
 '<start> never better , thanks . <end>',
 '<start> so how have you been lately ? <end>',
 '<start> i ve actually been pretty good . you ? <end>',
 '<start> i m actually in school right now . <end>',
 '<start> which school do you attend ? <end>',
 '<start> i m attending pcc right now . <end>

In [10]:
train_vectors, train_tokenizer = data.word_to_vec(questions)
label_vectors, label_tokenizer = data.word_to_vec(answers)

In [11]:
train_vectors

array([[  1, 213,   2, ...,   0,   0,   0],
       [  1, 120,  18, ...,   0,   0,   0],
       [  1,  12,  15, ...,   0,   0,   0],
       ...,
       [  1,  17,  47, ...,   0,   0,   0],
       [  1,  26,   6, ...,   0,   0,   0],
       [  1,  12,  17, ...,   0,   0,   0]], dtype=int32)

In [12]:
max_length_train = train_vectors.shape[1]
max_length_label = label_vectors.shape[1]

In [13]:
batch_size = 64
buffer_size = train_vectors.shape[0]#the no. of examples we have
embedding_dim = 256
steps_per_epoch = buffer_size//batch_size
units = 1024


In [14]:
train_tokenizer.word_index

{'<start>': 1,
 '<end>': 2,
 '.': 3,
 '?': 4,
 'i': 5,
 'you': 6,
 ',': 7,
 'to': 8,
 'the': 9,
 'it': 10,
 'a': 11,
 'what': 12,
 's': 13,
 'that': 14,
 'do': 15,
 't': 16,
 'is': 17,
 'are': 18,
 'have': 19,
 'of': 20,
 'and': 21,
 'like': 22,
 'how': 23,
 'for': 24,
 'in': 25,
 'did': 26,
 'can': 27,
 'they': 28,
 'my': 29,
 'so': 30,
 'm': 31,
 'was': 32,
 'he': 33,
 'me': 34,
 'your': 35,
 'we': 36,
 'be': 37,
 'don': 38,
 'but': 39,
 'on': 40,
 'about': 41,
 'yes': 42,
 'go': 43,
 'no': 44,
 'why': 45,
 'not': 46,
 'this': 47,
 'with': 48,
 'want': 49,
 'going': 50,
 'll': 51,
 'good': 52,
 '!': 53,
 'would': 54,
 'there': 55,
 'think': 56,
 'really': 57,
 'know': 58,
 'too': 59,
 'at': 60,
 're': 61,
 'get': 62,
 'will': 63,
 'all': 64,
 'well': 65,
 'one': 66,
 'need': 67,
 'she': 68,
 'just': 69,
 'see': 70,
 'time': 71,
 'should': 72,
 'much': 73,
 'then': 74,
 'out': 75,
 'if': 76,
 'right': 77,
 'today': 78,
 'nice': 79,
 'money': 80,
 'people': 81,
 'didn': 82,
 've': 83,


In [15]:
vocab_train = len(train_tokenizer.word_index)+1
vocab_label = len(label_tokenizer.word_index)+1

In [16]:
dataset = tf.data.Dataset.from_tensor_slices((train_vectors, label_vectors))
dataset = dataset.shuffle(buffer_size)
dataset = dataset.batch(batch_size,drop_remainder=True) #drop_remainder=True some times some examples will be left over so we drop them


In [17]:
encoder = Encoder(vocab_train,embedding_dim,units,batch_size)
decoder = Decoder(vocab_label,embedding_dim,units,batch_size)
trainer = Train()


In [18]:
EPOCHS = 20

for epoch in range(1,EPOCHS+1):
    enc_hidden = tf.zeros((batch_size,units))
    total_loss = 0
    
    for (batch_num, (train_data,label_data)) in enumerate(dataset.take(steps_per_epoch)):
        batch_loss = trainer.train_step(train_data,label_data,enc_hidden,encoder,decoder,batch_size,label_tokenizer)
        
        total_loss = total_loss+batch_loss
    print(f"Epoch: {epoch}, Loss: {total_loss/steps_per_epoch}")

Epoch: 1, Loss: 1.8844118118286133
Epoch: 2, Loss: 1.5712765455245972
Epoch: 3, Loss: 1.425916314125061
Epoch: 4, Loss: 1.30825936794281
Epoch: 5, Loss: 1.1927138566970825
Epoch: 6, Loss: 1.069082498550415
Epoch: 7, Loss: 0.9441434741020203
Epoch: 8, Loss: 0.8177129626274109
Epoch: 9, Loss: 0.7130023837089539
Epoch: 10, Loss: 0.619486391544342
Epoch: 11, Loss: 0.5413640141487122
Epoch: 12, Loss: 0.4729330837726593
Epoch: 13, Loss: 0.40991875529289246
Epoch: 14, Loss: 0.3492942750453949
Epoch: 15, Loss: 0.2995589077472687
Epoch: 16, Loss: 0.2531910836696625
Epoch: 17, Loss: 0.21724654734134674
Epoch: 18, Loss: 0.18526099622249603
Epoch: 19, Loss: 0.16190499067306519
Epoch: 20, Loss: 0.13967257738113403


In [19]:
class Chatbot:
    def __init__(self,encoder,decoder,train_tokenizer,label_tokenizer,max_length_train,units):
        self.train_tokenizer=train_tokenizer
        self.label_tokenizer=label_tokenizer
        self.encoder=encoder
        self.decoder=decoder
        self.units=units
        self.data=Data_Preprocessing()
        self.maxlen=max_length_train
    
    def clean_answer(self,answer):
        answer = answer[:-1]
        answer = ' '.join(answer)
        return answer
        
    def predict(self, sentence):
        sentence = self.data.process_sentence(sentence)
        sentence_mat = []
        for word in sentence.split(" "):
            try:
                sentence_mat.append(self.train_tokenizer.word_index[word])

            except:

                return "Could not understand that, can you re-phrase?"

        sentence_mat = pad_sequences([sentence_mat], maxlen=self.maxlen, padding='post') 
        sentence_mat = tf.convert_to_tensor(sentence_mat)

        enc_hidden = [tf.zeros((1, self.units))]

        encoder_outputs, thought_vector= self.encoder(sentence_mat, enc_hidden)

        dec_hidden=thought_vector

        dec_input = tf.expand_dims([label_tokenizer.word_index['<start>']], 0)

        answer = []

        for i in range(1, self.maxlen):
            pred, dec_hidden,_= decoder(dec_input, encoder_outputs, dec_hidden)
            word = self.label_tokenizer.index_word[np.argmax(pred[0])]
            answer.append(word)
            
            

            if word == '<end>':

                return self.clean_answer(answer)

                I

            dec_input = tf.expand_dims([np.argmax(pred[0])], 0)

        return self.clean_answer(answer)

In [20]:
bot = Chatbot(encoder,decoder,train_tokenizer,label_tokenizer,max_length_train,units)

In [None]:
question=''
while True:
    question = str(input('You'))
    if question == 'quit' or question =='Quit':
        break
    answer = bot.predict(question)
    print(f'Bot: {answer}')
    

You Hello


Bot: how may i help you ?


You Hi


Bot: how can i help you ?


You thre


Bot: Could not understand that, can you re-phrase?


You What is the weather?


Bot: i don t know .


You Please let me know


Bot: are you know


You Wat is My dress colour?


Bot: Could not understand that, can you re-phrase?


You what is my dress colour?


Bot: Could not understand that, can you re-phrase?


You What is the weather?


Bot: i don t know .


You Hi


Bot: how can i help you ?


You Bye


Bot: bye


You Good Bye


Bot: good bye


You Take Care


Bot: yes , that s all i think that s all i think that s all i think that s all i think


You Hello?


Bot: hello
