In [2]:
import json
import os
import logging
import numpy as np
import pickle
import keras
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Dense, Embedding
from keras.layers import LSTM

In [3]:
file_path = 'D:\AI Bot\data.json'
with open(file_path) as file:
    data = json.load(file)
logging.info("imported json data to preprocess")    

In [7]:
training_sentence = []
training_labels = list()
labels = []
responses = []

In [8]:
for intent in data['intents']:
    for inputs in intent['input']:
      training_sentence.append(inputs)
      training_labels.append(intent['tag'])
    responses.append(intent['response'])
    
    if intent['tag'] not in labels:
        labels.append(intent['tag'])  
logging.info("Created seperated file for my training sentences and training labels and label and reponses")

num_classes = len(labels)

lbl_encoder = LabelEncoder()
lbl_encoder.fit(training_labels)
training_labels = lbl_encoder.transform(training_labels)
logging.info("Converted labels into numbers of  training labels")
      

In [10]:
##my unique words in sentences
vocab_size = 1000
##25 will be my  features
embedding_dim = 25
#this for padding max to max 21 words user can put 
max_len = 20

oov_token = "<OOV>"
tokenizer = Tokenizer(num_words=vocab_size, oov_token = oov_token )
tokenizer.fit_on_texts(training_sentence)

word_index = tokenizer.word_index
sequence = tokenizer.texts_to_sequences(training_sentence)
padded_sequences = pad_sequences(sequence, truncating='post',maxlen=max_len)
#print(padded_sequences)
logging.info("Done Creating Embedding Layer")


In [14]:
logging.info("Stared Creating Model")
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim))
model.add(LSTM(128, input_shape=(max_len, embedding_dim)))
model.add(Dense(16, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy',optimizer='adam', metrics=['accuracy'])
logging.info("Compiled the model")

model.summary()
epochs = 100
logging.info("Started training the model")  
model.fit(padded_sequences,np.array(training_labels), epochs=epochs)
    
artifacts_folder = "artifacts"
os.makedirs(artifacts_folder, exist_ok=True)

# Save the trained model to the "artifacts" folder

model_filename = os.path.join(artifacts_folder, 'chat_model.h5')
model.save(model_filename)
logging.info('Model saved to ' + model_filename)

#Save the trained model to the "artifacts" folder

tokenizer_filename = os.path.join(artifacts_folder, 'tokenizer.pickle')
with open(tokenizer_filename,'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print(f"Tokenizer saved to {tokenizer_filename}")
logging.info("Tokenizer saved to {tokenizer_filename}")   

#saving the fitted label encoder
# save the label encoder object to the "artifacts" folder

lbl_encoder_filename = os.path.join(artifacts_folder, 'label_encoder.pickle')
with open(lbl_encoder_filename, 'wb') as enc:
    pickle.dump(lbl_encoder,enc,protocol=pickle.HIGHEST_PROTOCOL)
    print(f"Label Encoder saved to{lbl_encoder_filename}")
logging.info("label saved to{lbl_encoder_filename}")     

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.3125 - loss: 1.6097
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.2500 - loss: 1.6065
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.2500 - loss: 1.6041
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step - accuracy: 0.2500 - loss: 1.6016
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - accuracy: 0.2500 - loss: 1.5993
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step - accuracy: 0.2500 - loss: 1.5966
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step - accuracy: 0.2500 - loss: 1.5930
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step - accuracy: 0.2500 - loss: 1.5902
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m



Tokenizer saved to artifacts\tokenizer.pickle
Label Encoder saved toartifacts\label_encoder.pickle


Label Encoder saved toartifacts\label_encoder.pickle


In [15]:
import pandas as pd
from pandas import json_normalize

data = {
    "intents": [
        {
            "tag": "greeting",
            "input": ["hello", "hi", "hey", "hii", "heya"],
            "response": ["hi, Welcome to Decentrawood", "hi"]
        },
        {
            "tag": "goodbye",
            "input": ["bye", "ok bye", "thanks bye", "goodbye"],
            "response": ["bye", "Visit again", "goodbye"]
        },
        {
            "tag": "howami",
            "input": ["how are you?", "whats up", "how you doing?"],
            "response": ["im fine, how are you?", "im great, thank you", "im doing good"]
        },
        {
            "tag": "default",
            "input": ["23", "kya karu mai", "I have to catch a train today", " ", "i don't know", "24",
                      "i keep my books in my bag", "hii i don' know you", "i lost my phone", "where are your clothes"],
            "response": ["I'm sorry, I didn't understand that. Could you please rephrase or ask another question Related to Game?"]
        },
        {
            "tag": "gamequestion",
            "input": ["how to play this game?", "how do you play this game"],
            "response": ["you can play this game testings"]
        }
    ]
}
# Convert list elements to strings
for intent in data['intents']:
    intent['input'] = ', '.join(intent['input'])
    intent['response'] = ', '.join(intent['response'])
# Normalize JSON data into DataFrame
df = json_normalize(data['intents'])

# Display DataFrame
print(df)


            tag                                              input  \
0      greeting                        [hello, hi, hey, hii, heya]   
1       goodbye                 [bye, ok bye, thanks bye, goodbye]   
2        howami           [how are you?, whats up, how you doing?]   
3       default  [23, kya karu mai, I have to catch a train tod...   
4  gamequestion  [how to play this game?, how do you play this ...   

                                            response  
0                  [hi, Welcome to Decentrawood, hi]  
1                        [bye, Visit again, goodbye]  
2  [im fine, how are you?, im great, thank you, i...  
3  [I'm sorry, I didn't understand that. Could yo...  
4                  [you can play this game testings]  
