In [2]:
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import json
import pickle

import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
import random

words=[]
classes = []
documents = []
ignore_words = ['?', '!']
data_file = open('data.json').read()
intents = json.loads(data_file)


for intent in intents['intents']:
    for pattern in intent['patterns']:

        # take each word and tokenize it
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        # adding documents
        documents.append((w, intent['tag']))

        # adding classes to our class list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

classes = sorted(list(set(classes)))

print (len(documents), "documents")

print (len(classes), "classes", classes)

print (len(words), "unique lemmatized words", words)


pickle.dump(words,open('words.pkl','wb'))
pickle.dump(classes,open('classes.pkl','wb'))

# initializing training data
training = []
output_empty = [0] * len(classes)
train_x = []
train_y = []

for doc in documents:
    # Initialize bag of words
    bag = []
    # Tokenized words for the pattern
    pattern_words = doc[0]
    # Lemmatize each word
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # Create bag of words
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # Output is a '0' for each tag and '1' for current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    train_x.append(bag)
    train_y.append(output_row)

# Shuffle training data and convert to NumPy arrays
random.shuffle(training)

# Convert lists to numpy arrays
train_x = np.vstack(train_x)
train_y = np.vstack(train_y)

print("Training data created")


# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
# sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
# model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('chatbot_model.h5', hist)

print("model created")

80 documents
27 classes ['advancement of ayurveda', 'arthritis', 'ayurbot', 'ayurveda and homeopathy', 'ayurvedic medication', 'body pain', 'cancer', 'cold', 'cough', 'diabetes', 'drugs and its uses', 'fever', 'goodbye', 'greetings', 'headache', 'heart disease', 'history', 'joints pain', 'pregnancy', 'red eyes', 'running nose', 'side effects of ayurveda', 'sour throat', 'stomachache', 'thanks', 'wet cough', 'why ayurveda']
131 unique lemmatized words ['&', "'s", '(', ')', ',', '-', ':', 'a', 'about', 'advancement', 'advantage', 'all', 'and', 'anjana', 'application', 'are', 'aristab', 'arka', 'arthritis', 'asava', 'avaleha', 'ayurveda', 'ayurvedic', 'bad', 'better', 'between', 'bhasma', 'bodypain', 'bye', 'c', 'cancer', 'chuma', 'churna', 'churna/kashayam', 'cold', 'cough', 'd', 'describe', 'diabetes', 'difference', 'disadvantage', 'disease', 'drug', 'dry', 'e', 'effect', 'eye', 'f', 'for', 'g', 'ghrita', 'goodbye', 'guggulu', 'gutika', 'h', 'have', 'headache', 'heart', 'hello', 'hi', '

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/shreyaschandrashekar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/shreyaschandrashekar/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 712us/step - accuracy: 0.0464 - loss: 3.3395    
Epoch 2/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 570us/step - accuracy: 0.0491 - loss: 3.2468  
Epoch 3/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 521us/step - accuracy: 0.0766 - loss: 3.2632  
Epoch 4/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 537us/step - accuracy: 0.0755 - loss: 3.2588  
Epoch 5/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 499us/step - accuracy: 0.1156 - loss: 3.1569
Epoch 6/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 459us/step - accuracy: 0.1214 - loss: 3.1852  
Epoch 7/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 461us/step - accuracy: 0.2117 - loss: 3.1036
Epoch 8/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 425us/step - accuracy: 0.2283 - loss: 3.0405
Epoch 9/200
[1m16/16[0m [32m━



model created


In [4]:
import nltk
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import pickle
import numpy as np

#from keras.models import load_model
from tensorflow.keras.models import load_model
from tensorflow.keras.metrics import CategoricalAccuracy
model = load_model('chatbot_model.h5')
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[CategoricalAccuracy()])
import json
import random
intents = json.loads(open('data.json', encoding="utf8").read())
words = pickle.load(open('words.pkl','rb'))
classes = pickle.load(open('classes.pkl','rb'))


def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence

def bow(sentence, words, show_details=True):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words - matrix of N words, vocabulary matrix
    bag = [0]*len(words)
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s:
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)
    return(np.array(bag))

def predict_class(sentence, model):
    # filter out predictions below a threshold
    p = bow(sentence, words,show_details=False)
    res = model.predict(np.array([p]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
    return return_list

def getResponse(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if(i['tag']== tag):
            result = random.choice(i['responses'])
            break
    return result

def chatbot_response(msg):
    ints = predict_class(msg, model)
    res = getResponse(ints, intents)
    return res



In [7]:
import tensorflow as tf
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
model = tf.keras.models.load_model('chatbot_model.h5')



In [9]:
import streamlit as st
import nltk
from nltk.stem import WordNetLemmatizer
import pickle
import numpy as np
import json
import random
from tensorflow.keras.models import load_model
from tensorflow.keras.metrics import CategoricalAccuracy

# Load pre-trained model and other necessary data
model = load_model('chatbot_model.h5')
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=[CategoricalAccuracy()])
intents = json.loads(open('data.json', encoding="utf8").read())
words = pickle.load(open('words.pkl', 'rb'))
classes = pickle.load(open('classes.pkl', 'rb'))

lemmatizer = WordNetLemmatizer()

def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

def bow(sentence, words, show_details=True):
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print("found in bag: %s" % w)
    return np.array(bag)

def predict_class(sentence, model):
    p = bow(sentence, words, show_details=False)
    res = model.predict(np.array([p]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
    return return_list

def getResponse(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if i['tag'] == tag:
            result = random.choice(i['responses'])
            break
    return result

def chatbot_response(msg):
    ints = predict_class(msg, model)
    res = getResponse(ints, intents)
    return res

# Streamlit UI
st.title("Chatbot")

user_input = st.text_input("You: ")

if st.button("Send"):
    response = chatbot_response(user_input)
    st.text_area("Bot:", value=response, height=100, max_chars=None, key=None)


2024-04-27 19:31:54.716 
  command:

    streamlit run /Users/shreyaschandrashekar/Library/Python/3.11/lib/python/site-packages/ipykernel_launcher.py [ARGUMENTS]
