Libraries :

In [1]:
import nltk
import json
import tensorflow as tf
from nltk.stem import WordNetLemmatizer
import pickle

import random
import numpy as np

from keras.models import Sequential
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD

from keras.models import load_model

lemmatizer = WordNetLemmatizer()
ignore = ['!', '?']


In [2]:
import nltk
nltk.download('punkt')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\ASUS\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

## Implementation :

In [3]:
file = open("C:/Users/ASUS/Desktop/Projects/AI_ChatBot/data/data.json").read()
data = json.loads(file)

words_Repository = []
corpus = []
classes = []


for record in data['info']:
    for request in record['requests']:
        word = nltk.word_tokenize(request)
        word = [ lemmatizer.lemmatize(w.lower()) for w in word if w not in ignore]
        words_Repository.extend(word) 
        t =record['title']
        corpus.append((word,t))
        if t not in classes:
            classes.append(t)
len(set(words_Repository))

88

In [4]:
words = list(set(words_Repository))

words = sorted(words)
classes = sorted(classes)

pickle.dump(words,open('C:/Users/ASUS/Desktop/Projects/AI_ChatBot/words.pkl','wb'))
pickle.dump(classes,open('C:/Users/ASUS/Desktop/Projects/AI_ChatBot/classes.pkl','wb'))
pickle.dump(corpus,open('C:/Users/ASUS/Desktop/Projects/AI_ChatBot/corpus.pkl','wb'))

In [5]:
### load data
words = pickle.load(open('C:/Users/ASUS/Desktop/Projects/AI_ChatBot/words.pkl','rb'))
classes = pickle.load(open('C:/Users/ASUS/Desktop/Projects/AI_ChatBot/classes.pkl','rb'))
corpus = pickle.load(open('C:/Users/ASUS/Desktop/Projects/AI_ChatBot/corpus.pkl','rb'))
model = load_model('C:/Users/ASUS/Desktop/Projects/AI_ChatBot/model/model.h5')



### Digital Representation

In [6]:
dataSet = []
for request, class_r in corpus:
    input_vec = [0] * len(words)
    output_vec = [0] * len(classes)

    for w in request:
        input_vec[words.index(w)]=1

    output_vec[classes.index(class_r)]=1

    dataSet.append([input_vec,output_vec])

random.shuffle(dataSet)
dataSet = np.array(dataSet,dtype=object)

x1 =list(dataSet[:,0])
y1 =list(dataSet[:,1])
x = np.array(x1)
y = np.array(y1)

## Build Model

In [7]:
def createModel(input_size, output_size):
    model = Sequential()
    model.add(tf.keras.Input(shape=(input_size,))) 
    model.add(Dense(128,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(output_size,activation='softmax'))

    return model  
model = createModel(len(words), len(classes))
model.summary()

### Train the Model

In [8]:
sgd = SGD(learning_rate=0.01,momentum=0.8,nesterov=True)
model.compile(optimizer=sgd,
              loss = 'categorical_crossentropy',
               metrics=['accuracy'])

model.fit( x, y, batch_size=3, epochs=200, verbose=1)

Epoch 1/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 11ms/step - accuracy: 0.1820 - loss: 2.1554
Epoch 2/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3083 - loss: 1.9935
Epoch 3/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3899 - loss: 1.8894
Epoch 4/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2883 - loss: 1.9721
Epoch 5/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4152 - loss: 1.8011
Epoch 6/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4555 - loss: 1.6758
Epoch 7/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5478 - loss: 1.5709
Epoch 8/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5953 - loss: 1.3879
Epoch 9/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1cf44982150>

In [9]:
model.save('C:/Users/ASUS/Desktop/Projects/AI_ChatBot/model.h5')



### preprocessing methods

In [36]:
sen = "bye"
def senwords(sen):
    swords = nltk.word_tokenize(sen)
    swords = [w.lower() for w in swords if w not in ignore]

    return swords

def digitize(sen , words):
    input_vec = [0] * len(words)
    swords = senwords(sen)

    for sw in swords:
        if sw in words:
            index = words.index(sw)
            input_vec[index] = 1
    return input_vec

def predictClass(sen , words,classes, model):
    sample = digitize(sen,words)
    value = model.predict(np.array([sample]))[0]
    
    
    indx = list(value).index(max(value))
    return classes[indx]

def getResponse(sen , words,classes, model):   
    tag = predictClass(sen,words, classes,model)
    for record in data['info']:
        if record['title'] == tag:
            return random.choice(record['responses'])

getResponse(sen , words,classes, model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step


'Bye! Come back again soon.'