In [1]:
! pwd

/content


In [2]:
! mkdir simple_bot

In [3]:
# Importing Libraries for text processing.
import nltk
import numpy as np

**Tokenization**

It is the process of breaking down sentence or paragraphs into smaller chunks of words called tokens.

In [4]:
nltk.download('punkt')      # Downloading model to tokenize message.
from nltk.tokenize import word_tokenize

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


**Stop Words Removal**

by removing these words theh meaning of the sentences dose not changes. Words like not, like, etc. are called stopwords and should be removed before feeding to any algorithm.

In [5]:
nltk.download('stopwords')
from nltk.corpus import stopwords

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


**Lemmatization**

It is the process of converting a word into its base form. 
Eg.: 'Moving' -> Lemmatization -> 'Move'

In [6]:
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


**Vectorization**

We need to convert the strings to numbers based on their importance. Here we use TF_IDF vectorization to convert those text to vector of importance.

In [7]:
def clean_corpus(corpus):
  corpus = [doc.lower() for doc in corpus]
  cleaned_corpus = []
  stop_words = stopwords.words('english')
  wordnet_lemmatizer = WordNetLemmatizer()
  # iteration over every text
  for doc in corpus :
    tokens = word_tokenize(doc)
    cleaned_sentence = []
    for token in tokens :
      if token not in stop_words and token.isalpha():
        cleaned_sentence.append(wordnet_lemmatizer.lemmatize(token))
    cleaned_corpus.append(''.join(cleaned_sentence))
  return cleaned_corpus

In [8]:
import json
with open('/content/simple_bot/intents.json') as file:
  intents = json.load(file)

Clean and stored in form of vectors.

In [9]:
corpus = []
tags = []
for intent in intents['intents']:
  for pattern in intent['patterns']:
    corpus.append(pattern)
    tags.append(intent['tag'])

In [10]:
cleaned_corpus = clean_corpus(corpus)
cleaned_corpus

['hi',
 'anyone',
 'hey',
 'hola',
 'hello',
 'goodday',
 'bye',
 'seelater',
 'goodbye',
 'nicechattingbye',
 'tillnexttime',
 '',
 'thanks',
 'thank',
 'helpful',
 'awesomethanks',
 'thankshelping',
 'couldhelp',
 'helpprovide',
 'helpful',
 'supportoffered',
 'pleasecheckorderstatus',
 'ablecheckorderstatus',
 'helporderstatus',
 'orderstatus',
 'order',
 'food',
 'trackorder',
 'trackfood',
 'hiwantcancelorder',
 'wantcancelorder',
 'pleasecancelorder',
 'cancelorder',
 'wantadddeliveryinstruction',
 'pleaseadddeliveryinstruction',
 'includedeliveryinstruction',
 'telljoke',
 'feelingbored',
 'jokeplease',
 'makelaugh',
 'wantlaugh']

**Vectorising Intents**

In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(cleaned_corpus)

**Reshaping the vectors for our neural network**

In [14]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder()
y = encoder.fit_transform(np.array(tags).reshape(-1,1))

**Training Neural Network**

In [16]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential([
                    Dense(128, input_shape=(X.shape[1],), activation='relu'),
                    Dropout(0.2),
                    Dense(64, activation = 'relu'),
                    Dropout(0.2),
                    Dense(y.shape[1], activation = 'softmax')
])
model.compile(loss= 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               5120      
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 8)                 520       
Total params: 13,896
Trainable params: 13,896
Non-trainable params: 0
_________________________________________________________________


In [20]:
history = model.fit(X.toarray(), y.toarray(), epochs=21, batch_size=1)

Epoch 1/21
Epoch 2/21
Epoch 3/21
Epoch 4/21
Epoch 5/21
Epoch 6/21
Epoch 7/21
Epoch 8/21
Epoch 9/21
Epoch 10/21
Epoch 11/21
Epoch 12/21
Epoch 13/21
Epoch 14/21
Epoch 15/21
Epoch 16/21
Epoch 17/21
Epoch 18/21
Epoch 19/21
Epoch 20/21
Epoch 21/21


**Intent Classification**

In [25]:
# Defining the function to predict intent tag of a particular message.

# if the prediction for every tag is low, then ew want to classify that message as noanswer.
INTENT_NOT_FOUND_THRESHOLD = 0.40

def predict_intent_tag(message):
  message = clean_corpus([message])
  X_test = vectorizer.transform(message)
  y = model.predict(X_test.toarray())
  # if probability of all intents is low, classify it as noanswer.
  if y.max() < INTENT_NOT_FOUND_THRESHOLD:
    return 'noanswer'

  prediction = np.zeros_like(y[0])
  prediction[y.argmax()] = 1
  tag = encoder.inverse_transform([prediction])[0][0]
  return tag

print(predict_intent_tag('How you could help me ?'))
print(predict_intent_tag('Swiggy chat bot'))
print(predict_intent_tag('Where\'s my order ?'))

options
goodbye
order-status-request


**Define the function to fetch the tag from the intent**

In [26]:
import random
import time

def get_intent(tag):
  # to return complete intent from intent tag.
  for intent in intents['intents']:
    if intent['tag'] == tag:
      return intent

**Intent Classifiaction**





In [27]:
def perform_action(action_code, intent):
  # function to perform an action which is required by intent.
  if action_code == 'CHECK_ORDER_STATUS':
    print('\n Checking database \n')
    time.sleep(2)
    order_status = ['in kitchen', 'with delivery executive']
    delivery_time = []
    return {'intent_tag' : intent['next-intent-tag'][0],
            'order status' : random.choice(order_status),
            'delivery_time' : random.randint(10, 30)}

  elif action_code == 'ORDER_CANCEL_CONFIRMATION':
    ch = imput('BOT: Do you want to continue (Y/N) ?')
    if ch == 'y' or ch == 'Y':
      choice = 0
    else : 
      choice = 1
    return {'intent_tag' : intent['next-intent-tag'][choice]}

  elif action_code == 'ADD_DELIVERY_INSTRUCTIONS':
    instructions = input('Your Instructions: ')
    return {'intent-tag' : intent['next-intent-tag'][0]}    

**Complete ChatBot**

In [None]:
while True:
  # get message from user.
  message = input('You: ')
  # predict the intent tag using the trained neural network.
  tag = predict_intent_tag(message)
  # get complete intent from intent tag.
  intent = get_intent(tag)
  # generate random response from the intent.
  response = random.choice(intent['responses'])
  print('Bot : ', response)

  # check if there is any need to perform some action.
  if 'action' in intent.keys():
    action_code = intent['action']
    # perform action
    data = perform_action(action_code, intent)
    # get follow up intent after performing action.
    followup_intent = get_intent(data['intent-tag'])
    # generate random response from follow up intent.
    response = random.choice(followup_intent['responses'])

    # print randomly selected response.
    if len(data.keys()) > 1:
      print('Bot: ', response.format(**data))
    else:
      print('Bot: ', response)
  # break loop if intent was goodbye
  if tag == 'goodbye':
    break    

You: hi
Bot :  Good to see you again
