In [1]:
import json 
import numpy as np 
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

In [2]:
with open('intents.json') as file:
    data = json.load(file)

In [3]:
training_sentences = []
training_labels = []
labels = []
responses = []


for intent in data['intents']:
    for pattern in intent['patterns']:
        training_sentences.append(pattern)
        training_labels.append(intent['tag'])
    responses.append(intent['responses'])
    
    if intent['tag'] not in labels:
        labels.append(intent['tag'])
        
num_classes = len(labels)

In [4]:
lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)
training_labels = lbl_encoder.transform(training_labels)


In [5]:
vocab_size = 1000
embedding_dim = 16
max_len = 20
oov_token = "<OOV>"

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_token) # adding out of vocabulary token
tokenizer.fit_on_texts(training_sentences)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(training_sentences)
padded_sequences = pad_sequences(sequences, truncating='post', maxlen=max_len)

In [6]:
model = Sequential()
model.add(Embedding(vocab_size, embedding_dim, input_length=max_len))
model.add(GlobalAveragePooling1D())
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 20, 16)            16000     
_________________________________________________________________
global_average_pooling1d (Gl (None, 16)                0         
_________________________________________________________________
dense (Dense)                (None, 16)                272       
_________________________________________________________________
dense_1 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_2 (Dense)              (None, 10)                170       
Total params: 16,714
Trainable params: 16,714
Non-trainable params: 0
_________________________________________________________________


In [8]:
epochs = 550
history = model.fit(padded_sequences, np.array(training_labels), epochs=epochs)

h 349/550
Epoch 350/550
Epoch 351/550
Epoch 352/550
Epoch 353/550
Epoch 354/550
Epoch 355/550
Epoch 356/550
Epoch 357/550
Epoch 358/550
Epoch 359/550
Epoch 360/550
Epoch 361/550
Epoch 362/550
Epoch 363/550
Epoch 364/550
Epoch 365/550
Epoch 366/550
Epoch 367/550
Epoch 368/550
Epoch 369/550
Epoch 370/550
Epoch 371/550
Epoch 372/550
Epoch 373/550
Epoch 374/550
Epoch 375/550
Epoch 376/550
Epoch 377/550
Epoch 378/550
Epoch 379/550
Epoch 380/550
Epoch 381/550
Epoch 382/550
Epoch 383/550
Epoch 384/550
Epoch 385/550
Epoch 386/550
Epoch 387/550
Epoch 388/550
Epoch 389/550
Epoch 390/550
Epoch 391/550
Epoch 392/550
Epoch 393/550
Epoch 394/550
Epoch 395/550
Epoch 396/550
Epoch 397/550
Epoch 398/550
Epoch 399/550
Epoch 400/550
Epoch 401/550
Epoch 402/550
Epoch 403/550
Epoch 404/550
Epoch 405/550
Epoch 406/550
Epoch 407/550
Epoch 408/550
Epoch 409/550
Epoch 410/550
Epoch 411/550
Epoch 412/550
Epoch 413/550
Epoch 414/550
Epoch 415/550
Epoch 416/550
Epoch 417/550
Epoch 418/550
Epoch 419/550
Epoch 420/

In [9]:
# saving model
model.save("chat_model")

import pickle

# saving tokenizer
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
    

# saving label encoder
with open('label_encoder.pickle', 'wb') as ecn_file:
    pickle.dump(lbl_encoder, ecn_file, protocol=pickle.HIGHEST_PROTOCOL)

INFO:tensorflow:Assets written to: chat_model\assets
