Importing Libraries

In [None]:
%tensorflow_version 2.x

import tensorflow as tf
import nltk
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import random
import json

nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Unzipping corpora/wordnet.zip.


True

Loading intents and initialization

In [None]:
with open("/content/drive/MyDrive/Colab Data/AmazonML/new_intents.json") as f:
    data = json.load(f)

lemmatizer = WordNetLemmatizer()
vectorizer = TfidfVectorizer()

Processing intents

In [None]:
patterns = []
tags = []
responses = {}

for intent in data["intents"]:
    responses[intent["tag"]] = intent["responses"]

    for pattern in intent["patterns"]:
        words = nltk.word_tokenize(pattern)
        words = [lemmatizer.lemmatize(x.lower()) for x in words]

        patterns.append(" ".join(words))
        tags.append(intent["tag"])

print(patterns[: 5])
print(tags[: 5])
print(responses)

['hi', 'how are you', 'is anyone there ?', 'hello', 'good day']
['greeting', 'greeting', 'greeting', 'greeting', 'greeting']
{'greeting': ['Hello, how do you feel', 'Hi, How is it going?', 'How are you doing?', 'How do you do?', 'Hi, nice to meet you.', 'It is a pleasure to talk with you.'], 'affirmative': ['__affirmative__'], 'negative': ['__negative__'], 'query': ['__query__'], 'information': ['__info__'], 'nearby': ['__nearby__'], 'precaution': ['__precaution__']}


Converting sentences to vectors using TF-IDF vectorizer

In [None]:
vectors = vectorizer.fit_transform(patterns)
X = vectors.toarray()

print(len(X[0]))
print(X)

612
[[0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.48868897 0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


Saving the vectorizer

In [None]:
import pickle

pickle.dump(vectorizer, open("/content/drive/MyDrive/Colab Data/AmazonML/amazon_tfidf_vector.pkl", "wb"))

Preparing data

In [None]:
all_tags = list(set(tags))
all_tags.sort()

print("All tags:", all_tags)
for i, t in enumerate(all_tags):
    print(i, t)

All tags: ['affirmative', 'greeting', 'information', 'nearby', 'negative', 'precaution', 'query']
0 affirmative
1 greeting
2 information
3 nearby
4 negative
5 precaution
6 query


In [None]:
Y = list()

for tag in tags:
    ohe = [0 for _ in range(len(all_tags))]
    ohe[all_tags.index(tag)] = 1

    # Y.append(np.array(ohe))
    Y.append(all_tags.index(tag))

Defining model

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

x = keras.Input(shape=(len(X[0]),))
y = layers.Dense(16, activation='relu')(x)
y = layers.Dense(16, activation='relu')(y)
y = layers.Dense(len(all_tags), activation='softmax')(y)
model = keras.Model(x, y)

model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 612)]             0         
_________________________________________________________________
dense (Dense)                (None, 16)                9808      
_________________________________________________________________
dense_1 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_2 (Dense)              (None, 7)                 119       
Total params: 10,199
Trainable params: 10,199
Non-trainable params: 0
_________________________________________________________________


Compiling model

In [None]:
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

Training the model

In [None]:
model.fit(np.array(X), np.array(Y), steps_per_epoch=5, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7fea293dbb90>

Function to preprocess the input text and convert it into a vector using vectorizer

In [None]:
def process_text(message):
    words = nltk.word_tokenize(message)
    words = [lemmatizer.lemmatize(x.lower()) for x in words]
    lematized = " ".join(words)

    vec = vectorizer.transform([lematized])
    return vec.toarray()

Predicting intent

In [None]:
X_test = process_text("I am not feeling well")
y_pred = model.predict(X_test)

print(y_pred)
index = np.argmax(y_pred)
print(f"Intent: {all_tags[index]}")

[[8.7546305e-06 4.5303782e-06 2.7234115e-05 1.3364858e-07 2.7238257e-06
  1.3641272e-04 9.9982029e-01]]
Intent: query


Saving the model

In [None]:
model.save("/content/drive/MyDrive/Colab Data/AmazonML/amazon_cb.h5")