In [None]:
#kinjalkumari Dhimmar: 301239901
import json
import pickle
import tensorflow as tf
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense

# Load data from intents json file
with open('kinjalkumari_intents.json') as file:
    data = json.load(file)

# Extract patterns, responses, and intents from data
intents = []
patterns = []
responses = []
tags = []

for intent in data['intents']:
    tags.append(intent['tag'])
    for pattern in intent['patterns']:
        patterns.append(pattern)
        intents.append(intent['tag'])
    for response in intent['responses']:
        responses.append(response)

# Encode the list of intents
le = LabelEncoder()
encoded_labels = le.fit_transform(intents)

# Use Keras tokenizer to tokenize the patterns
tokenizer = Tokenizer()
tokenizer.fit_on_texts(patterns)
word_index = tokenizer.word_index
sequences = tokenizer.texts_to_sequences(patterns)
padded_sequences = pad_sequences(sequences, maxlen=35, padding='post', truncating='post')

# Define the model architecture
model = Sequential()
model.add(Embedding(input_dim=1100, output_dim=20, input_length=35))
model.add(GlobalAveragePooling1D())
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=10, activation='sigmoid'))
model.add(Dense(units=len(set(intents)), activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Print the model summary
model.summary()

# Fit the data to the model for 500 epochs
history = model.fit(padded_sequences, encoded_labels, epochs=500, verbose=1)

# Fit the data to the model for 1000 epochs
history = model.fit(padded_sequences, encoded_labels, epochs=1000, verbose=1)

# Save the tokenizer to a folder on the hard disk
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Save the encoder to a folder on the hard disk
with open('encoder.pickle', 'wb') as handle:
    pickle.dump(le, handle, protocol=pickle.HIGHEST_PROTOCOL)

# Save the model to a folder on the hard disk
model.save('chatbot_model.h5')


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_2 (Embedding)     (None, 35, 20)            22000     
                                                                 
 global_average_pooling1d_2   (None, 20)               0         
 (GlobalAveragePooling1D)                                        
                                                                 
 dense_6 (Dense)             (None, 16)                336       
                                                                 
 dense_7 (Dense)             (None, 10)                170       
                                                                 
 dense_8 (Dense)             (None, 13)                143       
                                                                 
Total params: 22,649
Trainable params: 22,649
Non-trainable params: 0
__________________________________________________