#**Training the Model using Naive-Base Algo**


In [1]:
# Loading the necessary Libraries
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import pandas as pd
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
import joblib
import re


Using Google Drive access

In [3]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


### Model Training
- Training the model based on the MultinomialNB Module from the Scikit Learn Pipeline
- Test the accuracy of the model using console result & Heatmap Confusion Matrix visualization

In [22]:
import pandas as pd
import seaborn as sns
import json
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
import joblib
import matplotlib.pyplot as plt

# Setting seaborn style
sns.set()

# Define the file path for the dataset
data_path = '/content/drive/MyDrive/Dev/Chatbot/dataTraining.json'

# Loading the dataset from the JSON file
with open(data_path, 'r') as file:
    data = json.load(file)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(data)

# Debugging: Check the first few rows of the DataFrame
print("First few rows of the DataFrame:")
print(df.head())

# Removing the 'FIELD134' column if it exists
if 'FIELD134' in df.columns:
    df = df.drop('FIELD134', axis=1)

# Separating features and target variable
X = df.drop('prognosis', axis=1)
y = df['prognosis']

# Handling columns with lists or sequences
for col in X.columns:
    if X[col].apply(lambda x: isinstance(x, list)).any():
        X[col] = X[col].apply(lambda x: sum(x) if isinstance(x, list) else x)

# Checking for missing values
print("Checking for missing values:")
print(X.isnull().sum())

# Splitting the data into 80% training and 20% testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing the Multinomial Naive Bayes model
model = MultinomialNB()

# Training the model with the training data
model.fit(X_train, y_train)

# Predicting the prognosis for the test data
y_pred = model.predict(X_test)

# Calculating accuracy of the model on the test set
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Creating confusion matrix
conf_mat = confusion_matrix(y_test, y_pred, labels=model.classes_)

# Plotting the heatmap of the confusion matrix
plt.figure(figsize=(10,7))
sns.heatmap(conf_mat, annot=True, fmt='d', xticklabels=model.classes_, yticklabels=model.classes_, cmap='Blues')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Define the file path where you want to save the model
model_file_path = '/content/drive/MyDrive/Dev/Chatbot/naive_bayes_model.pkl'

# Save the model using Joblib
joblib.dump(model, model_file_path)
print(f"Model saved successfully at {model_file_path}")


First few rows of the DataFrame:
   itching  skin_rash  nodal_skin_eruptions  continuous_sneezing  shivering  \
0        1          1                     1                    0          0   
1        0          1                     1                    0          0   
2        1          0                     1                    0          0   
3        1          1                     0                    0          0   
4        1          1                     1                    0          0   

   chills  joint_pain  stomach_pain  acidity  ulcers_on_tongue  ...  scarring  \
0       0           0             0        0                 0  ...         0   
1       0           0             0        0                 0  ...         0   
2       0           0             0        0                 0  ...         0   
3       0           0             0        0                 0  ...         0   
4       0           0             0        0                 0  ...         0   

   sk

ValueError: empty vocabulary; perhaps the documents only contain stop words

### Use Case Test: Function
- Use case 1 - Change the User symptom variable via function

In [13]:

def prognosis(model, symptoms, symptom_list):
    """
    Predict the prognosis based on the provided symptoms.

    :param model: Trained Multinomial Naive Bayes model.
    :param symptoms: List of symptoms provided by the user.
    :param symptom_list: List of all possible symptoms from the dataset.
    :return: Predicted prognosis (disease).
    """
    # Initialize an empty array for the input symptoms
    input_symptoms = np.zeros(len(symptom_list))

    # Set the corresponding symptom values to 1 based on user input
    for symptom in symptoms:
        if symptom in symptom_list:
            index = symptom_list.index(symptom)
            input_symptoms[index] = 1

    # Convert input_symptoms to a DataFrame with correct column names
    input_symptoms_df = pd.DataFrame([input_symptoms], columns=symptom_list)

    # Predict the prognosis based on the input symptoms
    prediction = model.predict(input_symptoms_df)

    # Return the predicted prognosis directly
    return prediction[0]

# Example usage:
# Extract symptom names from the DataFrame columns
symptom_list = X.columns.tolist()

# Example user input symptoms
user_symptoms = ['fatigue', 'family_history', 'mucoid_sputum', 'breathlessness']  # Replace with actual symptom names from your dataset

# Predict the disease based on the provided symptoms
predicted_disease = prognosis(model, user_symptoms, symptom_list)
print(f"Predicted Disease: {predicted_disease}")


Predicted Disease: Bronchial Asthma


#**Chatbot Feature:**
Training the TFID Vectorizer module on the disease prediction dataset

### Chatbot Vector Training (Pleasantries)
- Training Pleasantries for the vector training & model utilization during user & chatbot interaction.
- This time, we are using the NLTK library with pre-trained words for ease of use. This is just semi-unsupervised training

In [None]:
import random
import json
import pickle
import numpy as np
import tensorflow as tf

import nltk
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
nltk.download('punkt')
nltk.download('wordnet')
intents = json.loads(open('/content/drive/MyDrive/Dev/Chatbot/pleasantries.json').read())
med =

words = []
classes = []
documents = []
ignoreLetters = ['?', '!', '.', ',']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        wordList = nltk.word_tokenize(pattern)
        words.extend(wordList)
        documents.append((wordList, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

words = [lemmatizer.lemmatize(word) for word in words if word not in ignoreLetters]
words = sorted(set(words))

classes = sorted(set(classes))

pickle.dump(words, open('/content/drive/MyDrive/Dev/Chatbot/words.pkl', 'wb'))
pickle.dump(classes, open('/content/drive/MyDrive/Dev/Chatbot/classes.pkl', 'wb'))

training = []
outputEmpty = [0] * len(classes)

for document in documents:
    bag = []
    wordPatterns = document[0]
    wordPatterns = [lemmatizer.lemmatize(word.lower()) for word in wordPatterns]
    for word in words:
        bag.append(1) if word in wordPatterns else bag.append(0)

    outputRow = list(outputEmpty)
    outputRow[classes.index(document[1])] = 1
    training.append(bag + outputRow)

random.shuffle(training)
training = np.array(training)

trainX = training[:, :len(words)]
trainY = training[:, len(words):]


model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(128, input_shape=(len(trainX[0]),), activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(64, activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(len(trainY[0]), activation='softmax'))

sgd = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

hist = model.fit(np.array(trainX), np.array(trainY), epochs=200, batch_size=5, verbose=1)
model.save('/content/drive/MyDrive/Dev/Chatbot/chatbot_model.h5', hist)
print('Done')





[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Epoch 1/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.0785 - loss: 2.5816    
Epoch 2/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1449 - loss: 2.5759 
Epoch 3/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1657 - loss: 2.5404 
Epoch 4/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2805 - loss: 2.3648  
Epoch 5/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2932 - loss: 2.2434  
Epoch 6/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2517 - loss: 2.2932  
Epoch 7/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3847 - loss: 2.0797  
Epoch 8/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.3584 - loss: 2.0949  
Epoch 9/200
[1m10/10[0m [32m━

KeyboardInterrupt: 

Usage of the chatbot pleasantries model

###**Chatbot Code build 1:**
- Here is where the chatbot magic happens

In [None]:
#QWorking nlp + naïve bayes model

import re
import joblib
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB

from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
import nltk

# Download required NLTK data
nltk.download('wordnet')
nltk.download('omw-1.4')
#An NLTK interface for WordNet. WordNet is a lexical database of English. Using synsets, helps find conceptual relationships between words such as hypernyms, hyponyms, synonyms, antonyms etc.

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

from nltk.stem import WordNetLemmatizer
import json


# Load the trained model
model_file_path = '/content/drive/MyDrive/Dev/Chatbot/naive_bayes_model.pkl'
model = joblib.load(model_file_path)

# Loading the dataset
data_path = '/content/drive/MyDrive/Dev/Chatbot/dataTraining.json'
with open(data_path, 'r') as file:
    data = json.load(file)

df = pd.DataFrame(data)
X = df.drop('prognosis', axis=1)
y = df['prognosis']

# Initialize the TF-IDF Vectorizer and transform the dataset
symptom_list = X.columns.tolist()
tfidf_vectorizer = TfidfVectorizer(vocabulary=symptom_list, tokenizer=lambda x: x.split(','))
X_tfidf = tfidf_vectorizer.fit_transform(X.apply(lambda row: ','.join([symptom for symptom, present in zip(symptom_list, row) if present == 1]), axis=1))

# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

def get_wordnet_pos(word):
    """Map POS tag to first character lemmatize() accepts."""
    tag = wordnet.synsets(word)
    return tag[0].pos() if tag else wordnet.NOUN

def prognosis(model, symptoms, symptom_list):
    """
    Predict the prognosis based on the provided symptoms.
    """
    # Ensure the symptoms are in the same order as the symptom_list
    input_symptoms = np.zeros(len(symptom_list))
    for symptom in symptoms:
        if symptom in symptom_list:
            index = symptom_list.index(symptom)
            input_symptoms[index] = 1

    # Transform the symptoms into the TF-IDF representation
    input_symptoms_str = ','.join(symptoms)
    input_symptoms_tfidf = tfidf_vectorizer.transform([input_symptoms_str])

    # Predict the prognosis based on the input symptoms
    prediction = model.predict(input_symptoms_tfidf)
    return prediction[0]

def extract_symptoms_from_text(text, symptom_list):
    """
    Extract symptoms from the user input text using lemmatization and word similarity.
    """
    symptoms_found = []
    words = text.lower().split()

    for word in words:
        lemma = lemmatizer.lemmatize(word, get_wordnet_pos(word))
        for symptom in symptom_list:
            if re.search(r'\b' + re.escape(symptom.lower()) + r'\b', lemma):
                symptoms_found.append(symptom)
            # Additional check for similarity
            elif symptom.lower().startswith(lemma) or lemma.startswith(symptom.lower()):
                symptoms_found.append(symptom)

    return symptoms_found

def chatbot():
    """
    Interactive chatbot for diagnosing diseases based on user symptoms.
    """
    print("Hello! Welcome to your personal health assistant. I'm here to help you identify potential health concerns based on your symptoms.")
    print("Please write down a list of at least 3 symptoms in your chat for me to provide a prognosis.\nTo exit simply type 'done' or 'q' to finish)")

    n_sym_count = 0  # Count of times user input is invalid
    while True:
        user_input = input("You: ").strip()

        if user_input.lower() in ["quit", "exit", "done", "q"]:
            print("Thank you for chatting with me. Take care and stay healthy!")
            break

        # Extract symptoms from user input
        symptoms = extract_symptoms_from_text(user_input, symptom_list)

        # Check how many symptoms were input by the user
        sym_in_count = len(symptoms)

        # Ensure the user inputs at least 3 symptoms
        if sym_in_count < 3:
            print("Sorry, due to the lack of symptom input, I cannot give an accurate prediction of what disease you might have.\nPlease input a list of symptoms in one chat, identifying at least 3 symptoms for me to provide a prognosis.")
            continue  # Prompt the user to try again

        # If no symptoms were extracted
        if not symptoms:
            n_sym_count += 1
            print("I didn't catch any symptoms in that. Could you describe your symptoms again? For a more accurate prognosis, list at least 4 symptoms.")
            if n_sym_count >= 3:
                print("Here's a list of symptoms I can recognize:")
                print(", ".join(symptom_list))
        else:
            print(f"Okay, based on your symptoms, my knowledge of the nearest symptoms match are: {', '.join(symptoms)}")
            predicted_disease = prognosis(model, symptoms, symptom_list)
            print(f"I think you might have: {predicted_disease}.\Please Consult your nearest doctor for verification.\nThank you for your input! (Type 'done' or 'q' to finish)")



[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [None]:
chatbot()

###**Chatbot Code build 2:**
- Added the pleasantries & the prognosis model

####**Call Chatbot**

In [None]:
import random
import json
import pickle
import numpy as np
import nltk

from nltk.stem import WordNetLemmatizer
from keras.models import load_model

# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

# Load intents from your pleasantries JSON file
intents = json.loads(open('/content/drive/MyDrive/Dev/Chatbot/pleasantries.json').read())

# Load the pre-trained words and classes
words = pickle.load(open('/content/drive/MyDrive/Dev/Chatbot/words.pkl', 'rb'))
classes = pickle.load(open('/content/drive/MyDrive/Dev/Chatbot/classes.pkl', 'rb'))

# Load the trained model
model = load_model('/content/drive/MyDrive/Dev/Chatbot/chatbot_model.h5')

def clean_up_sentence(sentence):
    """Tokenizes and lemmatizes the input sentence."""
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

def bag_of_words(sentence):
    """Converts a sentence into a bag-of-words array."""
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for w in sentence_words:
        for i, word in enumerate(words):
            if word == w:
                bag[i] = 1
    return np.array(bag)

def predict_class(sentence):
    """Predicts the class (intent) of the input sentence."""
    bow = bag_of_words(sentence)
    res = model.predict(np.array([bow]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]

    # Sort by probability of the prediction
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({'intent': classes[r[0]], 'probability': str(r[1])})
    return return_list

def get_response(intents_list, intents_json):
    """Generates a response based on the predicted intent."""
    tag = intents_list[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if i['tag'] == tag:
            result = random.choice(i['responses'])
            break
    return result

# Start the chatbot
print("GO! Bot is running!")

while True:
    message = input("")
    ints = predict_class(message)
    res = get_response(ints, intents)
    print(res)




GO! Bot is running!
good morning
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Howdy-do, my pleasure, toodle-oo!
hmmmmm ... what are you?
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
I'm a med chatbot, the best of both worlds.
where are you by the way?
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Can't complain, another day in paradise.
where?
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Not much, just chilling.
hmmm so what do you do?
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Let's just say your data goes on a thrilling adventure through cyberspace. It might even come back with some wild stories to tell!
how old are you?
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Let's just say I'm vintage enough to remember when 'floppy disk' wasn't a save icon.
who made you?
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

KeyboardInterrupt: Interrupted by user

In [None]:
# Usage:
chatbot()

Hello! Welcome to your personal health assistant. I'm here to help you with your health concerns or just have a friendly chat.
You can ask about symptoms or just chat with me. To exit, simply type 'done' or 'q'.
You: hey chat, I am experiencing fatigue, high fever & cough. Sometimes i'm also feeling breathless
Bot: cant complain another day in paradise
You: q
Thank you for chatting with me. Take care and stay healthy!


###**Chatbot Code build 3:**
- Added the pleasantries & the prognosis model
- Different Training Algorithm using the keras pipeline

Training using the tensorflow library

In [5]:
import json

# Load the pleasantries intents dataset
pleasantries_intents = json.loads(open('/content/drive/MyDrive/Dev/Chatbot/pleasantries.json').read())

# Initialize a new dictionary to hold the combined intents
intents = {'intents': []}

# Add pleasantries intents to the combined intents
intents['intents'].extend(pleasantries_intents['intents'])

# Load the symptoms and prognosis data
with open('/content/drive/MyDrive/Dev/Chatbot/dataTraining.json', 'r') as file:
    symptom_data = json.load(file)

# Convert symptom data into intents
for item in symptom_data:
    symptoms = [symptom for symptom, value in item.items() if value == 1 and symptom != "prognosis" and symptom != "FIELD134"]
    prognosis = item.get('prognosis', None)

    if prognosis and symptoms:
        intent = {
            'tag': prognosis,
            'patterns': [' '.join(symptoms)],  # Combine symptoms into a single pattern (phrase)
            'responses': [f"The diagnosis is likely {prognosis}."]
        }
        intents['intents'].append(intent)

# Save the combined intents as a new JSON file (optional)
with open('/content/drive/MyDrive/Dev/Chatbot/combined_intents.json', 'w') as outfile:
    json.dump(intents, outfile, indent=4)


In [14]:
import random
import json
import pickle
import numpy as np
import tensorflow as tf
import joblib

import nltk
from nltk.stem import WordNetLemmatizer

# Download the 'punkt' package
nltk.download('punkt')
nltk.download('wordnet')

# Load Naive Bayes model (symptom recognition)
model_file_path = '/content/drive/MyDrive/Dev/Chatbot/naive_bayes_model.pkl'
naive_bayes_model = joblib.load(model_file_path)

# Load pleasantries intents dataset
lemmatizer = WordNetLemmatizer()

# Load pleasantries data
pleasantries_intents = json.loads(open('/content/drive/MyDrive/Dev/Chatbot/pleasantries.json').read())

# Assuming we can merge the datasets in memory
# Create a combined dataset including symptoms and pleasantries
intents = {'intents': []}
# Add pleasantries intents
intents['intents'].extend(pleasantries_intents['intents'])

# The Naive Bayes model doesn't have an intent file, so it has symptoms as X and prognosis as y.
# We'll add this information to the combined intents
# Ensure you have the variables `X` and `y` for symptoms and prognosis data
X = [...]  # List of symptoms
y = [...]  # List of corresponding prognoses

naive_bayes_intents = {
    'intents': [
        {
            'tag': prognosis,  # Each prognosis is treated as a "tag"
            'patterns': [symptom]  # Patterns are symptoms
        }
        for symptom, prognosis in zip(X, y)
    ]
}

# Add the intents from the Naive Bayes model's data
intents['intents'].extend(naive_bayes_intents['intents'])

words = []
classes = []
documents = []
ignore_letters = ['?', '!', '.', ',']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        # Ensure pattern is a string
        if isinstance(pattern, list):
            # Flatten the list to a single string
            pattern = ' '.join(pattern)
        elif not isinstance(pattern, str):
            # If it's not a string or list, skip this pattern
            continue

        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))

        if intent['tag'] not in classes:
            classes.append(intent['tag'])


words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_letters]
words = sorted(set(words))

classes = sorted(set(classes))

pickle.dump(words, open('/content/drive/MyDrive/Dev/Chatbot/words.pkl', 'wb'))
pickle.dump(classes, open('/content/drive/MyDrive/Dev/Chatbot/classes.pkl', 'wb'))

training = []
output_empty = [0] * len(classes)

for document in documents:
    bag = []
    word_patterns = document[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training.append(bag + output_row)

random.shuffle(training)
training = np.array(training)

train_x = training[:, :len(words)]
train_y = training[:, len(words):]

# TensorFlow model definition and training
tf_model = tf.keras.Sequential()
tf_model.add(tf.keras.layers.Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
tf_model.add(tf.keras.layers.Dropout(0.5))
tf_model.add(tf.keras.layers.Dense(64, activation='relu'))
tf_model.add(tf.keras.layers.Dropout(0.5))
tf_model.add(tf.keras.layers.Dense(len(train_y[0]), activation='softmax'))

sgd = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
tf_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

hist = tf_model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
tf_model.save('/content/drive/MyDrive/Dev/Chatbot/chatbot_model.h5', hist)
print('Done')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.0336 - loss: 2.6693    
Epoch 2/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0330 - loss: 2.5411      
Epoch 3/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0360 - loss: 2.5414     
Epoch 4/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.1385 - loss: 2.4617
Epoch 5/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2148 - loss: 2.3542  
Epoch 6/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1981 - loss: 2.4444      
Epoch 7/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2360 - loss: 2.2464 
Epoch 8/200
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1907 - loss: 2.2571     
Epoch 9/200
[1m10/



Done


In [16]:
import json
import pickle
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
import joblib

# Load the trained TensorFlow model
tf_model = tf.keras.models.load_model('/content/drive/MyDrive/Dev/Chatbot/chatbot_model.h5')

# Load the trained Naive Bayes model
naive_bayes_model = joblib.load('/content/drive/MyDrive/Dev/Chatbot/naive_bayes_model.pkl')

# Load the words and classes
words = pickle.load(open('/content/drive/MyDrive/Dev/Chatbot/words.pkl', 'rb'))
classes = pickle.load(open('/content/drive/MyDrive/Dev/Chatbot/classes.pkl', 'rb'))

# Load the intents file (which contains pleasantries and symptoms data)
intents = json.loads(open('/content/drive/MyDrive/Dev/Chatbot/pleasantries.json').read())

# Initialize the lemmatizer
lemmatizer = WordNetLemmatizer()

# Function to clean up user input sentence
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

# Function to convert user input into a bag of words
def bow(sentence, words, show_details=True):
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
    return np.array(bag)

# Function to predict the class using the TensorFlow model
def predict_class(sentence, model):
    p = bow(sentence, words, show_details=False)
    res = model.predict(np.array([p]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]

    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
    return return_list

# Function to get the appropriate response from the intents file
def get_response(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if i['tag'] == tag:
            result = i['responses']
            break
    return result

# Main function to handle chatbot response using the TensorFlow model
def chatbot_response(text):
    ints = predict_class(text, tf_model)  # Using TensorFlow model
    res = get_response(ints, intents)
    return res

# Function to handle chatbot response using the Naive Bayes model
def naive_bayes_response(text):
    bag_of_words = bow(text, words, show_details=False)
    prediction = naive_bayes_model.predict([bag_of_words])
    return prediction[0]

# Example function to interact with the chatbot
def interact_with_chatbot():
    print("Start talking with the bot (type 'quit' to stop)!")
    while True:
        message = input("")
        if message.lower() == "quit":
            break

        # Check response from TensorFlow model (pleasantries and symptoms)
        tf_response = chatbot_response(message)
        print(f"Chatbot (TF): {tf_response}")

        # Check response from Naive Bayes model (symptoms and prognosis)
        nb_response = naive_bayes_response(message)
        print(f"Chatbot (NB): {nb_response}")

# Start the chatbot interaction
interact_with_chatbot()




Start talking with the bot (type 'quit' to stop)!
i am vomiting, & have loss of  appetite, as well I have passage of gases, Im also internal itching
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 110ms/step
Chatbot (TF): ["Lay it on me, I'm an open book!", "Absolutely, fire away and I'll do my best to assist.", "Of course, I'm all ears. What can I help you with?"]




ValueError: X has 90 features, but MultinomialNB is expecting 131 features as input.