# Importing libraries and downloading packages

In [1]:
import nltk
import numpy as np
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [2]:
# downloading model to tokenize messages
nltk.download('punkt', quiet=True)
# downloading stopwords
nltk.download('stopwords', quiet=True)
# downloading wordnet, which contains all lemmas of english language
nltk.download('wordnet', quiet=True)

True

In [3]:
stop_words = stopwords.words('english')
stop_words

['i',
 'me',
 'my',
 'myself',
 'we',
 'our',
 'ours',
 'ourselves',
 'you',
 "you're",
 "you've",
 "you'll",
 "you'd",
 'your',
 'yours',
 'yourself',
 'yourselves',
 'he',
 'him',
 'his',
 'himself',
 'she',
 "she's",
 'her',
 'hers',
 'herself',
 'it',
 "it's",
 'its',
 'itself',
 'they',
 'them',
 'their',
 'theirs',
 'themselves',
 'what',
 'which',
 'who',
 'whom',
 'this',
 'that',
 "that'll",
 'these',
 'those',
 'am',
 'is',
 'are',
 'was',
 'were',
 'be',
 'been',
 'being',
 'have',
 'has',
 'had',
 'having',
 'do',
 'does',
 'did',
 'doing',
 'a',
 'an',
 'the',
 'and',
 'but',
 'if',
 'or',
 'because',
 'as',
 'until',
 'while',
 'of',
 'at',
 'by',
 'for',
 'with',
 'about',
 'against',
 'between',
 'into',
 'through',
 'during',
 'before',
 'after',
 'above',
 'below',
 'to',
 'from',
 'up',
 'down',
 'in',
 'out',
 'on',
 'off',
 'over',
 'under',
 'again',
 'further',
 'then',
 'once',
 'here',
 'there',
 'when',
 'where',
 'why',
 'how',
 'all',
 'any',
 'both',
 'each

# Function to clean the text

### tokenizing, lemmatizing and removing stopwords

In [4]:
def clean_corpus(corpus):
  # lowering every word in text
  corpus = [ doc.lower() for doc in corpus]
  cleaned_corpus = []
  
  stop_words = stopwords.words('english')
  wordnet_lemmatizer = WordNetLemmatizer()

  # iterating over every text
  for doc in corpus:
    # tokenizing text
    tokens = word_tokenize(doc)
    cleaned_sentence = [] 
    for token in tokens: 
      # removing stopwords, and punctuation
      if token not in stop_words and token.isalpha(): 
        # applying lemmatization
        cleaned_sentence.append(wordnet_lemmatizer.lemmatize(token)) 
    cleaned_corpus.append(' '.join(cleaned_sentence))
  return cleaned_corpus

# Loading and Cleaning our intents

In [5]:
import json
with open('intents.json') as file:
    intents = json.load(file)

In [6]:
corpus = []
tags = []

for intent in intents['intents']:
    # taking all patterns in intents to train neural network
    for pattern in intent['patterns']:
        corpus.append(pattern)
        tags.append(intent['tag'])

In [7]:
cleaned_corpus = clean_corpus(corpus)
cleaned_corpus

['hello',
 'hey',
 'hi',
 '',
 'good day',
 'greeting',
 '',
 'going',
 'see later',
 'goodbye',
 'leaving',
 'good day',
 'bye',
 'greeting',
 'see ya',
 'bbye',
 'name',
 'call',
 'whats name',
 '',
 'tell',
 'thanks',
 'thank',
 'helpful',
 'awesome thanks',
 'thanks helping',
 'tell joke',
 'feeling bored',
 'joke please',
 'make laugh',
 'want laugh']

# Vectorizing the intents

In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(cleaned_corpus)

In [9]:
X.shape

(31, 30)

In [10]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder()
Y = encoder.fit_transform(np.array(tags).reshape(-1,1))

In [11]:
Y.shape

(31, 5)

# Training Neural Network

In [12]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

model = Sequential([
    Dense(128, input_shape=(X.shape[1],), activation='relu'),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(Y.shape[1], activation='softmax'),
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics='accuracy')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               3968      
_________________________________________________________________
dropout (Dropout)            (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 5)                 325       
Total params: 12,549
Trainable params: 12,549
Non-trainable params: 0
_________________________________________________________________


In [13]:
hist = model.fit(X.toarray(), Y.toarray(), epochs=20, batch_size=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


# Classifying messages to intents

In [14]:
def predict_intent_tag(message):
    message = clean_corpus([message])
    X_test = vectorizer.transform(message)
    y = model.predict(X_test.toarray())

    prediction = np.zeros_like(y[0])
    prediction[y.argmax()] = 1
    tag = encoder.inverse_transform([prediction])[0][0]
    return tag


print(predict_intent_tag('Hello'))
print(predict_intent_tag('Tell me a joke'))
print(predict_intent_tag('What is your name?'))

greetings
joke
name


In [15]:
import random

In [16]:
def get_intent(tag):
    # to return complete intent from intent tag
    for intent in intents['intents']:
        if intent['tag'] == tag:
            return intent

# Complete Chatbot

In [17]:
while True:
    message = input('You: ')
    # predict intent tag using trained neural network
    tag = predict_intent_tag(message)
    # get complete intent from intent tag
    intent = get_intent(tag)
    # generate random response from intent
    response = random.choice(intent['responses'])
    print('Bot: ', response)

    if tag == 'goodbye':
        break

You: Hey
Bot:  What can I do for you?
You: How are you
Bot:  Hello!
You: What is your name?
Bot:  I'm Mike the assistant of Mihir!
You: Tell me a joke
Bot:  What has T in the beginning, T in the middle, and T at the end? 
- A teapot.
You: oh thanks
Bot:  Happy to help!
You: goodbye
Bot:  Goodbye!
