In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
import warnings

In [2]:
warnings.filterwarnings("ignore")

In [6]:
# Load Dataset (Change path as needed)
df = pd.read_csv(r"C:\Users\ayesh\chatbot\disease.csv")  # Update the dataset file name if different

In [7]:
df.head()

Unnamed: 0,diseases,anxiety and nervousness,depression,shortness of breath,depressive or psychotic symptoms,sharp chest pain,dizziness,insomnia,abnormal involuntary movements,chest tightness,...,stuttering or stammering,problems with orgasm,nose deformity,lump over jaw,sore in nose,hip weakness,back swelling,ankle stiffness or tightness,ankle weakness,neck weakness
0,panic disorder,1,0,1,1,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
1,panic disorder,0,0,1,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,panic disorder,1,1,1,1,0,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,panic disorder,1,0,0,1,0,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
4,panic disorder,1,1,0,0,0,0,1,1,1,...,0,0,0,0,0,0,0,0,0,0


In [8]:
# Normalize column names
df.columns = df.columns.str.lower().str.strip()

In [17]:
# Extract Features and Labels
symptom_columns = df.columns[:-1]  # All columns except "diseases" are symptoms
y = df['diseases']  # Target variable

In [18]:

# Encode target labels
y_encoder = LabelEncoder()
y = y_encoder.fit_transform(y)

In [19]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(df[symptom_columns], y, test_size=0.2, random_state=42)

In [20]:
# Train Model
model = RandomForestClassifier(n_estimators=50, random_state=42)
model.fit(X_train, y_train)

In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 246945 entries, 0 to 246944
Columns: 378 entries, diseases to neck weakness
dtypes: int64(377), object(1)
memory usage: 712.2+ MB


In [15]:
print(df.dtypes)  # Check the data types of all columns
print(df.head())  # Print the first few rows of the dataset


diseases                            object
anxiety and nervousness              int64
depression                           int64
shortness of breath                  int64
depressive or psychotic symptoms     int64
                                     ...  
hip weakness                         int64
back swelling                        int64
ankle stiffness or tightness         int64
ankle weakness                       int64
neck weakness                        int64
Length: 378, dtype: object
         diseases  anxiety and nervousness  depression  shortness of breath  \
0  panic disorder                        1           0                    1   
1  panic disorder                        0           0                    1   
2  panic disorder                        1           1                    1   
3  panic disorder                        1           0                    0   
4  panic disorder                        1           1                    0   

   depressive or psychoti

In [16]:
# Handle missing values
df.fillna(0, inplace=True)  # Replace missing values with 0 (assuming 0 means absence of symptom)

# Ensure all symptom columns are numeric
for col in df.columns[:-1]:  # Exclude 'diseases'
    df[col] = df[col].apply(lambda x: 1 if x != 0 else 0)  # Convert symptoms to binary (1 if present, 0 if not)


In [21]:
# Function to Predict Disease
def predict_disease(symptom_list):
    """
    symptom_list: List of symptom names entered by the user.
    """
    print("Available Symptoms:", list(symptom_columns))  # Debugging
    print("User Symptoms:", symptom_list)  # Debugging
    
    symptom_list = [s.lower().strip() for s in symptom_list]  # Normalize user input
    
    user_symptoms = [1 if symptom in symptom_list else 0 for symptom in symptom_columns]
    symptom_array = np.array(user_symptoms).reshape(1, -1)
    prediction = model.predict(symptom_array)[0]
    return y_encoder.inverse_transform([prediction])[0]


In [22]:
# Chatbot Interaction
def chatbot():
    print("Hello! I am a healthcare chatbot. Please enter your symptoms as words (e.g., fever, headache).")
    while True:
        try:
            user_input = input("Enter symptoms as a comma-separated list: ")
            if user_input.lower() in ['exit', 'quit', 'stop']:
                print("Chatbot: Take care! Stay healthy.")
                break
            symptoms = [sym.strip().lower() for sym in user_input.split(',')]
            disease = predict_disease(symptoms)
            print(f"Chatbot: Based on your symptoms, you might have {disease}. Please consult a doctor.")
        except Exception as e:
            print("Chatbot: Error in processing input. Please enter valid symptom names.")

In [23]:
# Run Chatbot
if __name__ == "__main__":
    chatbot()

Hello! I am a healthcare chatbot. Please enter your symptoms as words (e.g., fever, headache).
Available Symptoms: ['diseases', 'anxiety and nervousness', 'depression', 'shortness of breath', 'depressive or psychotic symptoms', 'sharp chest pain', 'dizziness', 'insomnia', 'abnormal involuntary movements', 'chest tightness', 'palpitations', 'irregular heartbeat', 'breathing fast', 'hoarse voice', 'sore throat', 'difficulty speaking', 'cough', 'nasal congestion', 'throat swelling', 'diminished hearing', 'lump in throat', 'throat feels tight', 'difficulty in swallowing', 'skin swelling', 'retention of urine', 'groin mass', 'leg pain', 'hip pain', 'suprapubic pain', 'blood in stool', 'lack of growth', 'emotional symptoms', 'elbow weakness', 'back weakness', 'pus in sputum', 'symptoms of the scrotum and testes', 'swelling of scrotum', 'pain in testicles', 'flatulence', 'pus draining from ear', 'jaundice', 'mass in scrotum', 'white discharge from eye', 'irritable infant', 'abusing alcohol', 