# Importing Libraries

In [10]:
import tensorflow as tf
import numpy as np
import pandas as pd
import json
from nltk.stem import WordNetLemmatizer
import nltk
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Bidirectional, Dropout, Flatten
from tensorflow import keras
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import TensorBoard , ReduceLROnPlateau 
from tensorflow.keras.optimizers import Adam
import random
import re
import pickle
import os

In [11]:
# Creating Object for Lemmatizer
lemmatizer = WordNetLemmatizer()

# Content loading

Content is a JSON file containing tag, intent and response.

In [12]:
with open("intents.json") as ChatbotData:
  intents = json.load(ChatbotData)

In [13]:
intents

{'intents': [{'tag': 'ml_intro',
   'patterns': ['What is machine learning?',
    'Explain machine learning',
    'What are the basics of ML?',
    'Define machine learning'],
   'responses': ['Machine learning is a subset of artificial intelligence (AI) that enables computers to learn from data without being explicitly programmed. It involves developing algorithms and models that can improve their performance over time as they are exposed to more data.',
    'In machine learning, computers learn patterns and insights from data to make predictions, decisions, and recommendations. It encompasses various techniques like supervised learning, unsupervised learning, and reinforcement learning.',
    'The basics of machine learning include understanding key concepts like algorithms, models, features, labels, training data, testing data, evaluation metrics, and optimization techniques.',
    'Machine learning refers to the process of teaching computers to learn from data and improve their per

In [14]:
def clean_text(text):
    text = text.lower()
    text = re.sub(r"i'm", "i am", text)
    text = re.sub(r"he's", "he is", text)
    text = re.sub(r"she's", "she is", text)
    text = re.sub(r"that's", "that is", text)
    text = re.sub(r"what's", "what is", text)
    text = re.sub(r"where's", "where is", text)
    text = re.sub(r"how's", "how is", text)
    text = re.sub(r"wasn't","was not",text)
    text = re.sub(r"\'ll", "will", text)
    text = re.sub(r"\'ve", "have", text)
    text = re.sub(r"\'re", "are", text)
    text = re.sub(r"\'d", "would", text)
    text = re.sub(r"n't", "not", text)
    text = re.sub(r"won't", "will not", text)
    text = re.sub(r"can't", "cannot", text)
    text = re.sub(r"[-()\"#/@;:<>{}`+=~|.!?,]", "", text)

    # Tokenize the text
    words = nltk.word_tokenize(text)
    
    # Lemmatize and remove stop words
    words = [lemmatizer.lemmatize(word.lower()) for word in words]
    #words = [lemmatizer.lemmatize(word.lower()) for word in words if word.lower() not in stop_words]
    
    return ' '.join(words)

In [52]:
tags = []
patterns = []
responses = {}
for intent in intents['intents']:
    responses[intent['tag']] = intent["responses"]
    for lines in intent['patterns']:
        patterns.append(clean_text(lines))
        tags.append(intent['tag'])


print("this is tags",tags)
print("this is input",patterns)
print("this is responses",responses)

this is tags ['ml_intro', 'ml_intro', 'ml_intro', 'ml_intro', 'ml_types', 'ml_types', 'ml_types', 'ml_types', 'ml_algorithms', 'ml_algorithms', 'ml_algorithms', 'ml_algorithms', 'ml_preprocessing', 'ml_preprocessing', 'ml_preprocessing', 'ml_preprocessing', 'ml_evaluation', 'ml_evaluation', 'ml_evaluation', 'ml_evaluation', 'ml_applications', 'ml_applications', 'ml_applications', 'ml_applications', 'ml_workflow', 'ml_workflow', 'ml_workflow', 'ml_tools', 'ml_tools', 'ml_tools', 'ml_performance', 'ml_performance', 'ml_performance', 'ml_hyperparameters', 'ml_hyperparameters', 'ml_hyperparameters', 'ml_bias_variance', 'ml_bias_variance', 'ml_bias_variance', 'ml_interpretability', 'ml_interpretability', 'ml_interpretability', 'ml_deployment', 'ml_deployment', 'ml_deployment', 'ml_automation', 'ml_automation', 'ml_automation', 'ml_ethics', 'ml_ethics', 'ml_ethics', 'ml_security', 'ml_security', 'ml_security', 'ml_responsibility', 'ml_responsibility', 'ml_responsibility', 'ml_types', 'ml_typ

# Data Preprocessing

Now we construct a dataframe consist of patterns and their respective tags.

In [16]:
data = pd.DataFrame({"inputs":patterns,"tags":tags})

In [17]:
data.head(10)

Unnamed: 0,inputs,tags
0,what is machine learning,ml_intro
1,explain machine learning,ml_intro
2,what are the basic of ml,ml_intro
3,define machine learning,ml_intro
4,what are the type of machine learning,ml_types
5,explain supervised learning,ml_types
6,what is unsupervised learning,ml_types
7,tell me about reinforcement learning,ml_types
8,what are some common ml algorithm,ml_algorithms
9,explain linear regression,ml_algorithms


# Tokenizing & Padding

In [53]:
oov_token = "<OOV>"  # Add out of vocabulary token

tokenizer = Tokenizer(num_words = 4000 , oov_token=oov_token)

In [54]:
tokenizer.fit_on_texts(data["inputs"])

In [55]:
tokenizer.word_index

{'<OOV>': 1,
 'what': 2,
 'explain': 3,
 'learning': 4,
 'me': 5,
 'tell': 6,
 'about': 7,
 'is': 8,
 'the': 9,
 'in': 10,
 'are': 11,
 'of': 12,
 'you': 13,
 'machine': 14,
 'ml': 15,
 'algorithm': 16,
 'how': 17,
 'ai': 18,
 'and': 19,
 'for': 20,
 'model': 21,
 'supervised': 22,
 'clustering': 23,
 'decision': 24,
 'tree': 25,
 'who': 26,
 'technique': 27,
 'do': 28,
 'ensemble': 29,
 'network': 30,
 'deep': 31,
 'svm': 32,
 'pca': 33,
 'classification': 34,
 'dimensionality': 35,
 'neural': 36,
 'data': 37,
 'concept': 38,
 'unsupervised': 39,
 'regression': 40,
 'a': 41,
 'to': 42,
 'statistical': 43,
 'method': 44,
 'name': 45,
 'used': 46,
 'with': 47,
 'reduction': 48,
 'i': 49,
 'your': 50,
 'some': 51,
 'project': 52,
 'dl': 53,
 'prove': 54,
 'agent': 55,
 'search': 56,
 'linear': 57,
 'artificial': 58,
 'risk': 59,
 'bayes': 60,
 'classifier': 61,
 'kmeans': 62,
 'tensorflow': 63,
 'can': 64,
 'strategy': 65,
 'preprocessing': 66,
 'role': 67,
 'training': 68,
 'tradeoff': 

In [56]:
data['inputs']

0                               what is machine learning
1                               explain machine learning
2                               what are the basic of ml
3                                define machine learning
4                  what are the type of machine learning
                             ...                        
381          explain the representation of decision tree
382                   what doe a decision tree look like
383    what is the basic decision tree learning algor...
384         explain the decision tree learning algorithm
385             how doe the decision tree algorithm work
Name: inputs, Length: 386, dtype: object

In [59]:
for i in patterns:
    train = tokenizer.texts_to_sequences([i])

In [60]:
train

[[17, 96, 9, 24, 25, 16, 152]]

In [13]:
# Finding the Max length Sentence
def get_maxlen(data):
    maxlen=0
    for sent in data:
        maxlen=max(maxlen,len(sent))
    return maxlen
maxlen=get_maxlen(train)
print(maxlen)

12


In [14]:
x_train = pad_sequences(train, maxlen=maxlen, padding='post', truncating='post')

In [15]:
x_train

array([[ 2,  8, 14, ...,  0,  0,  0],
       [ 3, 14,  4, ...,  0,  0,  0],
       [ 2, 11,  9, ...,  0,  0,  0],
       ...,
       [ 2,  8,  9, ...,  0,  0,  0],
       [ 3,  9, 24, ...,  0,  0,  0],
       [17, 96,  9, ...,  0,  0,  0]])

In [16]:
x_train.shape

(386, 12)

# Encoding the output

In [17]:
lbl_encoder = LabelEncoder()

In [18]:
y_train = lbl_encoder.fit_transform(data["tags"])

In [19]:
y_train

array([ 64,  64,  64,  64,  71,  71,  71,  71,  54,  54,  54,  54,  67,
        67,  67,  67,  60,  60,  60,  60,  55,  55,  55,  55,  72,  72,
        72,  70,  70,  70,  66,  66,  66,  62,  62,  62,  57,  57,  57,
        63,  63,  63,  58,  58,  58,  56,  56,  56,  59,  59,  59,  69,
        69,  69,  68,  68,  68,  71,  71,  71,  71,   9,   9,   9,   9,
         5,   5,   5,   5,  65,  65,  65,  65,  61,  61,  61,  61,   6,
         6,   6,   6,   7,   7,   7,   8,   8,   8,  92,  92,  92,  93,
        93,  93,  99,  99,  99,  98,  98,  98,  97,  97,  97,  88,  88,
        88,  89,  89,  89,  30,  30,  30,  94,  94,  94,  29,  29,  29,
        26,  26,  26,  74,  74,  74,  48,  48,  48,  95,  95,  95,  13,
        13,  13,  73,  73,  73,  86,  86,  86,  31,  31,  31, 105, 105,
       105,  11,  11,  11,  84,  84,  84,  14,  14,  14,  91,  91,  91,
        49,  49,  49,  79,  79,  79,  96,  96,  96,  75,  75,  75, 100,
       100, 100,  17,  17,  17,  43,  43,  43,  47,  47,  47, 10

In [20]:
input_shape = x_train.shape[1]
print("input sentence len : ",input_shape)

input sentence len :  12


In [21]:
word2index = tokenizer.word_index
unique_words = len(word2index)
output_length = lbl_encoder.classes_.shape[0]
print("Number of unique words: ", unique_words)
print(tokenizer.word_index)
print("Output size: ", output_length)

Number of unique words:  383
{'<OOV>': 1, 'what': 2, 'explain': 3, 'learning': 4, 'me': 5, 'tell': 6, 'about': 7, 'is': 8, 'the': 9, 'in': 10, 'are': 11, 'of': 12, 'you': 13, 'machine': 14, 'ml': 15, 'algorithm': 16, 'how': 17, 'ai': 18, 'and': 19, 'for': 20, 'model': 21, 'supervised': 22, 'clustering': 23, 'decision': 24, 'tree': 25, 'who': 26, 'technique': 27, 'do': 28, 'ensemble': 29, 'network': 30, 'deep': 31, 'svm': 32, 'pca': 33, 'classification': 34, 'dimensionality': 35, 'neural': 36, 'data': 37, 'concept': 38, 'unsupervised': 39, 'regression': 40, 'a': 41, 'to': 42, 'statistical': 43, 'method': 44, 'name': 45, 'used': 46, 'with': 47, 'reduction': 48, 'i': 49, 'your': 50, 'some': 51, 'project': 52, 'dl': 53, 'prove': 54, 'agent': 55, 'search': 56, 'linear': 57, 'artificial': 58, 'risk': 59, 'bayes': 60, 'classifier': 61, 'kmeans': 62, 'tensorflow': 63, 'can': 64, 'strategy': 65, 'preprocessing': 66, 'role': 67, 'training': 68, 'tradeoff': 69, 'using': 70, 'am': 71, 'have': 72, 

## Constructing a Neural Network


The initial layer is an Embedding layer, facilitating the transformation of input tokens into dense vectors of fixed size. Subsequently, two Bidirectional Long Short-Term Memory (LSTM) layers are configured to return sequences. A Dropout layer follows, providing regularization to prevent overfitting. The model then flattens the output and passes it through a Dense layer. Finally, the output layer consists of a Dense layer with output_length units and a softmax activation function.

In [26]:
embed_size=100
model = tf.keras.Sequential()
model.add(Embedding(unique_words + 1, embed_size , input_length=input_shape))
model.add(Bidirectional(LSTM(1024, return_sequences=True)))
model.add(Bidirectional(LSTM(1024)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(units=512, activation='relu'))
model.add(Dense(units=output_length, activation='softmax'))




In [27]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 12, 100)           38400     
                                                                 
 bidirectional (Bidirection  (None, 12, 2048)          9216000   
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 2048)              25174016  
 onal)                                                           
                                                                 
 dropout (Dropout)           (None, 2048)              0         
                                                                 
 flatten (Flatten)           (None, 2048)              0         
                                                                 
 dense (Dense)               (None, 512)               1

In [24]:
# CALLBACKS
chechpoint = ModelCheckpoint('model.h5' , monitor='loss' , verbose=1 , save_best_only=True , mode='auto')
reduces = ReduceLROnPlateau(monitor='loss', factor=0.2 , patience=3 , min_lr=0.0001 , verbose=1)
logdir = './logs'
tensorboard_vis = TensorBoard(log_dir=logdir)


In [28]:
model.compile(loss = "sparse_categorical_crossentropy", optimizer =Adam(lr=0.001), metrics=['accuracy'])



# Training the Model

Here, I am train the neural network. The training is performed on a dataset for 400 epochs.

In [34]:
chatbot = model.fit(x_train, y_train, epochs=150 ,batch_size=64, callbacks=[chechpoint , reduces , tensorboard_vis])

Epoch 1/150
Epoch 1: loss improved from 1.01018 to 0.93924, saving model to chatbot.h5
Epoch 2/150
Epoch 2: loss did not improve from 0.93924
Epoch 3/150
Epoch 3: loss improved from 0.93924 to 0.92544, saving model to chatbot.h5
Epoch 4/150
Epoch 4: loss did not improve from 0.92544
Epoch 5/150
Epoch 5: loss improved from 0.92544 to 0.84132, saving model to chatbot.h5
Epoch 6/150
Epoch 6: loss improved from 0.84132 to 0.78257, saving model to chatbot.h5
Epoch 7/150
Epoch 7: loss improved from 0.78257 to 0.73008, saving model to chatbot.h5
Epoch 8/150
Epoch 8: loss improved from 0.73008 to 0.68253, saving model to chatbot.h5
Epoch 9/150
Epoch 9: loss improved from 0.68253 to 0.67333, saving model to chatbot.h5
Epoch 10/150
Epoch 10: loss improved from 0.67333 to 0.64780, saving model to chatbot.h5
Epoch 11/150
Epoch 11: loss improved from 0.64780 to 0.62060, saving model to chatbot.h5
Epoch 12/150
Epoch 12: loss improved from 0.62060 to 0.61100, saving model to chatbot.h5
Epoch 13/150
E

# Visualizing Accuracy & Loss


In [29]:
%load_ext tensorboard
%tensorboard --logdir='./logs'

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 19028), started 6:21:15 ago. (Use '!kill 19028' to kill it.)

In [36]:
print("Accuracy: ",chatbot.history['accuracy'][-1])

Accuracy:  0.9689119458198547


Saving the Tokenizer & Label_Encoder

In [37]:
#Saving Tokenizer
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Saving Label_Encoder
with open('label_encoder.pickle', 'wb') as ecn_file:
    pickle.dump(lbl_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)