In [1]:

import re
import pandas as pd
from sklearn.tree import DecisionTreeClassifier, _tree
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
import csv
import warnings

warnings.filterwarnings("ignore", category=DeprecationWarning)

In [2]:
# Define bot name
bot_name = "chatbot"


In [3]:
# Load training and testing data
training = pd.read_csv('/home/rgukt/Desktop/majorproject2/Training.csv')
testing = pd.read_csv('/home/rgukt/Desktop/majorproject2/Testing.csv')


In [4]:

# Extract columns and features
cols = training.columns
cols = cols[:-1]		
x = training[cols]
y = training['prognosis']
y1 = y


In [5]:
# Group data by prognosis
reduced_data = training.groupby(training['prognosis']).max()


In [6]:
# Encode labels
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(y)
y = le.transform(y)

In [7]:
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)
testx = testing[cols]
testy = testing['prognosis']
testy = le.transform(testy)

In [8]:
# Initialize decision tree classifier and fit the model
clf1 = DecisionTreeClassifier()
clf = clf1.fit(x_train, y_train)


In [9]:
# Initialize SVM classifier and fit the model
model = SVC()
model.fit(x_train, y_train)
print("SVM Score:", model.score(x_test, y_test))


SVM Score: 1.0


In [10]:
# Extract feature importances3
importances = clf.feature_importances_
indices = np.argsort(importances)[::-1]
features = cols

In [11]:
# Define a function to read text
def read_text(text):
    print(text)

In [12]:
# Create dictionaries for symptom severity, description, and precautions
severityDictionary = dict()
description_list = dict()
precautionDictionary = dict()

In [13]:
# Create a dictionary to map symptoms to their indices
symptoms_dict = {}


In [14]:
# Populate symptoms_dict with symptom-index mappings
for index, symptom in enumerate(x.columns):
    symptoms_dict[symptom] = index

In [15]:
# Calculate the condition based on symptoms and duration
def calc_condition(symptoms, days):
    sum = 0
    for item in symptoms:
        sum += severityDictionary[item]
    if ((sum * days) / (len(symptoms) + 1) > 13):
        print("You should seek consultation from a doctor.")
    else:
        print("It might not be that bad, but you should take precautions.")

In [16]:
# Load symptom descriptions from a CSV file
def getDescription():
    global description_list
    with open('/home/rgukt/Desktop/majorproject2/symptom_Description.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            _description = {row[0]: row[1]}
            description_list.update(_description)

In [17]:
# Load symptom severity information from a CSV file
def getSeverityDict():
    global severityDictionary
    with open('/home/rgukt/Desktop/majorproject2/Symptom_severity.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            if len(row) >= 2:
                _diction = {row[0]: int(row[1])}
                severityDictionary.update(_diction)

In [18]:

# Load symptom precautions from a CSV file
def getprecautionDict():
    global precautionDictionary
    with open('/home/rgukt/Desktop/majorproject2/symptom_precaution.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        for row in csv_reader:
            _prec = {row[0]: [row[1], row[2], row[3], row[4]]}
            precautionDictionary.update(_prec)


In [19]:
# Get user's name
def get_user_name():
    print("Hello! What's your name?")
    user_name = input()
    print(f"Hello, {user_name}!")


In [20]:
# Check if the input matches any symptom pattern
def check_pattern(dis_list, inp):
    pred_list = []
    inp = inp.replace(' ', '_')
    patt = f"{inp}"
    regexp = re.compile(patt)
    pred_list = [item for item in dis_list if regexp.search(item)]
    if len(pred_list) > 0:
        return 1, pred_list
    else:
        return 0, []

In [21]:
# Make a secondary prediction based on symptoms
def sec_predict(symptoms_exp):
    df = pd.read_csv('/home/rgukt/Desktop/majorproject2/Training.csv')
    X = df.iloc[:, :-1]
    y = df['prognosis']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=20)
    rf_clf = DecisionTreeClassifier()
    rf_clf.fit(X_train, y_train)

    symptoms_dict = {symptom: index for index, symptom in enumerate(X.columns)}
    input_vector = np.zeros(len(symptoms_dict))
    for item in symptoms_exp:
        input_vector[[symptoms_dict[item]]] = 1

    return rf_clf.predict([input_vector])

In [22]:
# Print the predicted disease
def print_disease(node):
    node = node[0]
    val = node.nonzero()
    disease = le.inverse_transform(val[0])
    return list(map(lambda x: x.strip(), list(disease)))

In [23]:

# Convert the decision tree to code
def tree_to_code(tree, feature_names):
    tree_ = tree.tree_
    feature_name = [
        feature_names[i] if i != _tree.TREE_UNDEFINED else "undefined!"
        for i in tree_.feature
    ]

    chk_dis = ",".join(feature_names).split(",")
    symptoms_present = []

    while True:
        print("Enter your symptom:")
        disease_input = input()
        conf, cnf_dis = check_pattern(chk_dis, disease_input)
        if conf == 1:
            print("Searches related to input:")
            for num, it in enumerate(cnf_dis):
                print(num, ")", it)
            if num != 0:
                print(f"Select the one you meant (0 - {num}): ", end="")
                selected_index = int(input())
                disease_input = cnf_dis[selected_index]

            break
        else:
            print("Enter a valid symptom.")

    while True:
        try:
            print("From how many days have you experienced this symptom? (Enter a count):")
            num_days = int(input())
            break
        except:
            print("Enter a valid number.")

    def recurse(node, depth):
        if tree_.feature[node] != _tree.TREE_UNDEFINED:
            name = feature_name[node]
            threshold = tree_.threshold[node]

            if name == disease_input:
                val = 1
            else:
                val = 0
            if val <= threshold:
                recurse(tree_.children_left[node], depth + 1)
            else:
                symptoms_present.append(name)
                recurse(tree_.children_right[node], depth + 1)
        else:
            present_disease = print_disease(tree_.value[node])

            red_cols = reduced_data.columns
            symptoms_given = red_cols[reduced_data.loc[present_disease].values[0].nonzero()]

            print("Are you experiencing any of the following symptoms?")
            symptoms_exp = []
            for syms in list(symptoms_given):
                print(f"{syms}? (yes/no): ", end='')
                inp = input()
                while inp not in ["yes", "no"]:
                    print("Please provide a valid answer (yes/no): ", end='')
                    inp = input()
                if inp == "yes":
                    symptoms_exp.append(syms)

            second_prediction = sec_predict(symptoms_exp)

            calc_condition(symptoms_exp, num_days)
            if present_disease[0] == second_prediction[0]:
                print("You may have", present_disease[0])
                print(description_list[present_disease[0]])
            else:
                print("You may have", present_disease[0], "or", second_prediction[0])
                print(description_list[present_disease[0]])
                print(description_list[second_prediction[0]])

            precution_list = precautionDictionary[present_disease[0]]
            print("Take the following measures:")
            for i, j in enumerate(precution_list):
                print(i + 1, ")", j)

    recurse(0, 1)


In [24]:

# Get symptom severity, description, and precaution data
getSeverityDict()
getDescription()
getprecautionDict()

In [25]:
# Get user's name
get_user_name()

Hello! What's your name?
chandana
Hello, chandana!


In [26]:
# Perform diagnosis using the decision tree
tree_to_code(clf, cols)

Enter your symptom:
fever
Searches related to input:
0 ) high_fever
1 ) mild_fever
Select the one you meant (0 - 1): 0
From how many days have you experienced this symptom? (Enter a count):
3
Are you experiencing any of the following symptoms?
muscle_weakness? (yes/no): yes
stiff_neck? (yes/no): no
swelling_joints? (yes/no): yes
movement_stiffness? (yes/no): no
painful_walking? (yes/no): yes
It might not be that bad, but you should take precautions.
You may have Arthritis
Arthritis is the swelling and tenderness of one or more of your joints. The main symptoms of arthritis are joint pain and stiffness, which typically worsen with age. The most common types of arthritis are osteoarthritis and rheumatoid arthritis.so prithvi mudra is the best solution for arithritis.First, sit in a relaxed position.Now, bring your ring finger and your thumb closer.Next, place your hands with palms facing upwards on the thighs or above the knees.Slowly fold your ring finger and tap the tip of the ring fin

In [None]:
# Display a thank you message
print("Thank you for using MediBot!")