In [1]:
# Cell 1: Installations and Imports
# ---------------------------------------------------------------------------
# Install necessary packages for text-to-speech functionality in Colab
%pip install pyttsx3
!sudo apt-get -qq install espeak

# Import all required libraries
import pandas as pd
import numpy as np
import csv
import re
import pyttsx3
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier, _tree
from sklearn.svm import SVC
import warnings

# Suppress the specific UserWarning from scikit-learn about feature names
warnings.filterwarnings("ignore", category=UserWarning, module='sklearn')


# Cell 2: Initializations and Helper Functions
# ---------------------------------------------------------------------------

# --- Text-to-Speech Initialization ---
engine = pyttsx3.init()

def text_to_speech(text):
    """Converts the given text to speech."""
    engine.setProperty('rate', 150)
    engine.setProperty('volume', 0.9)
    engine.say(text)
    engine.runAndWait()

# --- Data Dictionaries ---
severityDictionary = {}
description_list = {}
precautionDictionary = {}
symptoms_dict = {}

# --- Helper Functions to Load Data from CSVs ---
def getSeverityDict():
    """Loads symptom severity data from CSV."""
    global severityDictionary
    with open('/content/Symptom_severity.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            # Ensure row has at least 2 elements and the second is an integer
            if len(row) > 1 and row[1].isdigit():
                severityDictionary[row[0]] = int(row[1])

def getDescription():
    """Loads symptom description data from CSV."""
    global description_list
    with open('/content/symptom_Description.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            if len(row) > 1:
                description_list[row[0]] = row[1]

def getprecautionDict():
    """Loads disease precaution data from CSV."""
    global precautionDictionary
    with open('/content/symptom_precaution.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            if len(row) > 4:
                precautionDictionary[row[0]] = [row[1], row[2], row[3], row[4]]

# Cell 3: Data Loading and Model Training
# ---------------------------------------------------------------------------

# Load training and testing datasets
training = pd.read_csv('/content/Training.csv')
testing = pd.read_csv('/content/Testing.csv')

# Prepare data for modeling
cols = training.columns[:-1]
x = training[cols]
y = training['prognosis']

# Encode the target variable (prognosis)
le = LabelEncoder()
le.fit(y)
y_encoded = le.transform(y)

# Split the data
x_train, x_test, y_train, y_test = train_test_split(x, y_encoded, test_size=0.33, random_state=42)

# Group data by prognosis to identify related symptoms later
reduced_data = training.groupby(training['prognosis']).max()

# Populate the symptoms dictionary
for index, symptom in enumerate(x.columns):
    symptoms_dict[symptom] = index

# --- Primary Decision Tree Classifier ---
clf = DecisionTreeClassifier()
clf.fit(x_train, y_train)
print("Primary Decision Tree Model trained.")
# print(f"Primary Model Accuracy: {clf.score(x_test, y_test)}")

# --- Secondary Decision Tree Classifier (for refined prediction) ---
# This model is trained once here to avoid retraining on every function call
rf_clf = DecisionTreeClassifier()
rf_clf.fit(x, y) # Train on the full dataset for better secondary prediction
print("Secondary Decision Tree Model trained.")


# Cell 4: Core Chatbot Logic
# ---------------------------------------------------------------------------

def check_pattern(dis_list, inp):
    """Finds symptoms in the list that match the user's input pattern."""
    inp = inp.replace(' ', '_')
    patt = f"^{inp}"
    regexp = re.compile(patt)
    pred_list = [item for item in dis_list if regexp.search(item)]
    return 1, pred_list if len(pred_list) > 0 else (0, [])

def sec_predict(symptoms_exp):
    """Predicts disease based on a list of expressed symptoms."""
    df = pd.read_csv('/content/Training.csv')
    X = df.iloc[:, :-1]
    input_vector = np.zeros(len(symptoms_dict))
    for item in symptoms_exp:
        if item in symptoms_dict:
            input_vector[symptoms_dict[item]] = 1
    # Pass a DataFrame with column names to avoid warnings
    input_df = pd.DataFrame([input_vector], columns=X.columns)
    return rf_clf.predict(input_df)

def print_disease(node):
    """Decodes the predicted disease code back to its string name."""
    node = node[0]
    val = node.nonzero()
    disease = le.inverse_transform(val[0])
    return list(map(lambda x: x.strip(), list(disease)))

def calc_condition(exp, days):
    """Calculates the condition's severity and advises accordingly."""
    total_severity = sum(severityDictionary.get(item, 0) for item in exp)
    if (total_severity * days) / (len(exp) + 1) > 13:
        print("\nYou should take the consultation from a doctor.")
        text_to_speech("You should take the consultation from a doctor.")
    else:
        print("\nIt might not be that bad but you should take precautions.")
        text_to_speech("It might not be that bad but you should take precautions.")

def getInfo():
    """Gets the user's name."""
    print("-----------------------------------HealthCare ChatBot-----------------------------------")
    print("\nYour Name?\t\t\t\t\t->", end="")
    name = input("")
    print(f"Hello {name}")
    text_to_speech(f"Hello {name}")

def tree_to_code(tree, feature_names):
    """The main function to navigate the decision tree and interact with the user."""
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]
    chk_dis = ",".join(feature_names).split(",")
    symptoms_present = []

    # Get the primary symptom from the user
    while True:
        print("\nEnter the symptom you are experiencing\t\t->", end="")
        text_to_speech("Enter the symptom you are experiencing")
        disease_input = input("")
        conf, cnf_dis = check_pattern(chk_dis, disease_input)
        if conf == 1:
            print("Searches related to input:")
            for num, it in enumerate(cnf_dis):
                print(f"{num} ) {it}")

            conf_inp = 0
            if len(cnf_dis) > 1:
                try:
                    conf_inp = int(input(f"Select the one you meant (0 - {len(cnf_dis)-1}): "))
                except ValueError:
                    print("Invalid input, selecting the first option.")
                    conf_inp = 0

            disease_input = cnf_dis[conf_inp]
            break
        else:
            print("Enter a valid symptom.")
            text_to_speech("Enter a valid symptom.")

    # Get the duration of the symptom
    while True:
        try:
            num_days = int(input(f"Okay. From how many days?: "))
            break
        except ValueError:
            print("Enter a valid number of days.")

    # Recursive function to traverse the tree
    def recurse(node, depth):
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]
            if name == disease_input:
                val = 1
            else:
                val = 0

            if val <= threshold:
                recurse(tree_.children_left[node], depth + 1)
            else:
                symptoms_present.append(name)
                recurse(tree_.children_right[node], depth + 1)
        else:
            present_disease = print_disease(tree_.value[node])
            red_cols = reduced_data.columns
            symptoms_given = red_cols[reduced_data.loc[present_disease].values[0].nonzero()]

            print("\nAre you experiencing any of the following symptoms?")
            text_to_speech("Are you experiencing any of the following symptoms?")

            symptoms_exp = []
            for syms in list(symptoms_given):
                inp = ""
                print(f"{syms.replace('_', ' ')}? : ", end='')
                text_to_speech(f"{syms.replace('_', ' ')}?")
                while True:
                    inp = input("").lower()
                    if inp in ["yes", "no"]:
                        break
                    else:
                        print("Please provide a 'yes' or 'no' response: ", end="")
                if inp == "yes":
                    symptoms_exp.append(syms)

            second_prediction = sec_predict(symptoms_exp)
            calc_condition(symptoms_exp, num_days)

            # Print diagnosis
            if present_disease and second_prediction:
                if present_disease[0] == second_prediction[0]:
                    print(f"\nYou may have {present_disease[0]}")
                    text_to_speech(f"You may have {present_disease[0]}")
                    print(description_list.get(present_disease[0], "No description available."))
                else:
                    print(f"\nYou may have {present_disease[0]} or {second_prediction[0]}")
                    text_to_speech(f"You may have {present_disease[0]} or {second_prediction[0]}")
                    print(description_list.get(present_disease[0], "No description available."))
                    print(description_list.get(second_prediction[0], "No description available."))

                # Print precautions
                precution_list = precautionDictionary.get(present_disease[0])
                if precution_list:
                    print("\nTake the following measures:")
                    text_to_speech("Take the following measures:")
                    for i, j in enumerate(precution_list):
                        print(f"{i+1} ) {j}")

    recurse(0, 1)


# Cell 5: Main Execution Block
# ---------------------------------------------------------------------------

# Load all necessary data into dictionaries
getSeverityDict()
getDescription()
getprecautionDict()

# Start the chatbot interaction
getInfo()
tree_to_code(clf, cols)

print("\n----------------------------------------------------------------------------------------")

Collecting pyttsx3
  Downloading pyttsx3-2.98-py3-none-any.whl.metadata (3.8 kB)
Downloading pyttsx3-2.98-py3-none-any.whl (34 kB)
Installing collected packages: pyttsx3
Successfully installed pyttsx3-2.98
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debconf/FrontEnd/Dialog.pm line 78, <> line 5.)
debconf: falling back to frontend: Readline
debconf: unable to initialize frontend: Readline
debconf: (This frontend requires a controlling tty.)
debconf: falling back to frontend: Teletype
dpkg-preconfigure: unable to re-open stdin: 
Selecting previously unselected package libportaudio2:amd64.
(Reading database ... 126308 files and directories currently installed.)
Preparing to unpack .../libportaudio2_19.6.0-1.1_amd64.deb ...
Unpacking libportaudio2:amd64 (19.6.0-1.1) ...
Selecting previously unselected package libsonic0:amd64.
Preparing to unpack .../libsonic0_0.2.0-11bu