#Medical Chatbot

In [None]:
import zipfile
import os

# Extracting the provided ZIP file
zip_path = "/content/Medical Chatbot Dataset (1).zip"
extract_folder = "medical_chatbot_dataset"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_folder)

# Listing the contents of the extracted folder
extracted_files = os.listdir(extract_folder)
extracted_files


['Testing.csv', 'Training.csv']

In [None]:
import pandas as pd

# Load the Training.csv file
training_data = pd.read_csv(os.path.join(extract_folder, "Training.csv"))

# Display the first few rows of the training data
training_data.head()


Unnamed: 0,itching,skin_rash,nodal_skin_eruptions,continuous_sneezing,shivering,chills,joint_pain,stomach_pain,acidity,ulcers_on_tongue,...,blackheads,scurring,skin_peeling,silver_like_dusting,small_dents_in_nails,inflammatory_nails,blister,red_sore_around_nose,yellow_crust_ooze,prognosis
0,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
1,0,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
2,1,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection
4,1,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Fungal infection


In [None]:
# Shape of the dataset
data_shape = training_data.shape

# Data types of each column
data_types = training_data.dtypes


In [None]:
# Check for any missing values in the dataset
missing_values = training_data.isnull().sum()


Disease Distribution

In [None]:
import plotly.express as px

# Disease distribution
fig1 = px.bar(training_data['prognosis'].value_counts().reset_index(),
              x='index', y='prognosis',
              title='Disease Distribution in Training Data',
              labels={'index': 'Disease', 'prognosis': 'Count'})
fig1.show()


Symptom Frequency

In [None]:
# Symptom frequency
symptom_counts = training_data.drop('prognosis', axis=1).sum().sort_values(ascending=False).reset_index()
symptom_counts.columns = ['Symptom', 'Frequency']

fig2 = px.bar(symptom_counts, x='Symptom', y='Frequency', title='Symptom Frequency across All Diseases')
fig2.show()


 Correlation Heatmap

In [None]:
import plotly.graph_objects as go

# Correlation heatmap
correlation_matrix = training_data.drop('prognosis', axis=1).corr()

fig3 = go.Figure(data=go.Heatmap(z=correlation_matrix.values,
                                 x=correlation_matrix.columns,
                                 y=correlation_matrix.columns,
                                 colorscale='Viridis',
                                 hoverongaps=False))
fig3.update_layout(title='Symptom Correlation Heatmap')
fig3.show()


 correlations between the top 20 most frequent symptoms

In [None]:
# Selecting the top 20 most frequent symptoms
top_symptoms = symptom_counts['Symptom'].head(20)

# Filtering the correlation matrix to include only the top symptoms
filtered_corr_matrix = correlation_matrix.loc[top_symptoms, top_symptoms]

# Creating the heatmap for the filtered correlation matrix
fig_filtered = go.Figure(data=go.Heatmap(z=filtered_corr_matrix.values,
                                         x=filtered_corr_matrix.columns,
                                         y=filtered_corr_matrix.columns,
                                         colorscale='Viridis',
                                         hoverongaps=False))
fig_filtered.update_layout(title='Correlation Heatmap for Top 20 Symptoms')
fig_filtered.show()


In [None]:
import plotly.express as px

# Selecting top 5 diseases and their associated symptoms
top_diseases = training_data['prognosis'].value_counts().index[:5]
subset_data = training_data[training_data['prognosis'].isin(top_diseases)]

# Melting the data for easier plotting
melted_data = subset_data.melt(id_vars=['prognosis'], var_name='Symptom', value_name='Presence')
melted_data = melted_data[melted_data['Presence'] == 1]  # Keeping only the rows where symptoms are present

# Creating the grouped bar chart
fig_grouped_bar = px.bar(melted_data, x='prognosis', color='Symptom', title='Symptoms Associated with Top 5 Diseases')
fig_grouped_bar.show()


In [None]:
from sklearn.model_selection import train_test_split

X = training_data.drop('prognosis', axis=1)
y = training_data['prognosis']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)


In [None]:
accuracy = rf_classifier.score(X_test, y_test)
print("Accuracy:", accuracy)


Accuracy: 1.0


In [None]:
!pip install streamlit


Collecting streamlit
  Downloading streamlit-1.27.2-py2.py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m48.1 MB/s[0m eta [36m0:00:00[0m
Collecting validators<1,>=0.2 (from streamlit)
  Downloading validators-0.22.0-py3-none-any.whl (26 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.40-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m21.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.8.1b0-py2.py3-none-any.whl (4.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m90.6 MB/s[0m eta [36m0:00:00[0m
Collecting watchdog>=2.1.5 (from streamlit)
  Downloading watchdog-3.0.0-py3-none-manylinux2014_x86_64.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m11.4 MB/s[0m eta [36m0:0

In [None]:
from joblib import dump, load

# Save the model
dump(rf_classifier, 'random_forest_model.joblib')

# Load the model
loaded_model = load('random_forest_model.joblib')


In [None]:
# Sample data (this is just an example; you should replace it with actual symptom values)
sample_data = [0]*132  # A list with 132 zeros, representing the absence of all symptoms

# Adjust the values in sample_data to represent the symptoms you want to test

prediction = loaded_model.predict([sample_data])[0]
print(f"Predicted Disease: {prediction}")


Predicted Disease: Arthritis



X does not have valid feature names, but RandomForestClassifier was fitted with feature names



In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Predictions on the test set
y_pred = loaded_model.predict(X_test)

# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Detailed classification report
print(classification_report(y_test, y_pred))


Accuracy: 1.0
                                         precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        18
                                   AIDS       1.00      1.00      1.00        30
                                   Acne       1.00      1.00      1.00        24
                    Alcoholic hepatitis       1.00      1.00      1.00        25
                                Allergy       1.00      1.00      1.00        24
                              Arthritis       1.00      1.00      1.00        23
                       Bronchial Asthma       1.00      1.00      1.00        33
                   Cervical spondylosis       1.00      1.00      1.00        23
                            Chicken pox       1.00      1.00      1.00        21
                    Chronic cholestasis       1.00      1.00      1.00        15
                            Common Cold       1.00      1.00      1.00        23
             

In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Predictions on the test set
y_pred = loaded_model.predict(X_test)

# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Detailed classification report
print(classification_report(y_test, y_pred))


Accuracy: 1.0
                                         precision    recall  f1-score   support

(vertigo) Paroymsal  Positional Vertigo       1.00      1.00      1.00        18
                                   AIDS       1.00      1.00      1.00        30
                                   Acne       1.00      1.00      1.00        24
                    Alcoholic hepatitis       1.00      1.00      1.00        25
                                Allergy       1.00      1.00      1.00        24
                              Arthritis       1.00      1.00      1.00        23
                       Bronchial Asthma       1.00      1.00      1.00        33
                   Cervical spondylosis       1.00      1.00      1.00        23
                            Chicken pox       1.00      1.00      1.00        21
                    Chronic cholestasis       1.00      1.00      1.00        15
                            Common Cold       1.00      1.00      1.00        23
             

In [None]:
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load the trained model (assuming you've already loaded it as 'loaded_model')

# Create a button for prediction
predict_button = widgets.Button(description="Predict")
symptoms_list = list(training_data.columns[:-1])

# Create checkboxes for symptoms
checkboxes = {symptom: widgets.Checkbox(value=False, description=symptom) for symptom in symptoms_list}

# Output widget to display prediction
output = widgets.Output()

def on_predict_button_clicked(button):
    # Convert user input into a format suitable for prediction
    input_data = [int(checkboxes[symptom].value) for symptom in symptoms_list]

    # Predict disease
    prediction = loaded_model.predict([input_data])[0]

    # Display the prediction
    with output:
        clear_output(wait=True)
        print(f"The predicted disease based on the symptoms is: {prediction}")
        print("\nDisclaimer: This prediction is based on a trained model and is not a substitute for professional medical advice.")

predict_button.on_click(on_predict_button_clicked)

# Display the checkboxes and the button
for symptom, checkbox in checkboxes.items():
    display(checkbox)
display(predict_button, output)


Checkbox(value=False, description='itching')

Checkbox(value=False, description='skin_rash')

Checkbox(value=False, description='nodal_skin_eruptions')

Checkbox(value=False, description='continuous_sneezing')

Checkbox(value=False, description='shivering')

Checkbox(value=False, description='chills')

Checkbox(value=False, description='joint_pain')

Checkbox(value=False, description='stomach_pain')

Checkbox(value=False, description='acidity')

Checkbox(value=False, description='ulcers_on_tongue')

Checkbox(value=False, description='muscle_wasting')

Checkbox(value=False, description='vomiting')

Checkbox(value=False, description='burning_micturition')

Checkbox(value=False, description='spotting_ urination')

Checkbox(value=False, description='fatigue')

Checkbox(value=False, description='weight_gain')

Checkbox(value=False, description='anxiety')

Checkbox(value=False, description='cold_hands_and_feets')

Checkbox(value=False, description='mood_swings')

Checkbox(value=False, description='weight_loss')

Checkbox(value=False, description='restlessness')

Checkbox(value=False, description='lethargy')

Checkbox(value=False, description='patches_in_throat')

Checkbox(value=False, description='irregular_sugar_level')

Checkbox(value=False, description='cough')

Checkbox(value=False, description='high_fever')

Checkbox(value=False, description='sunken_eyes')

Checkbox(value=False, description='breathlessness')

Checkbox(value=False, description='sweating')

Checkbox(value=False, description='dehydration')

Checkbox(value=False, description='indigestion')

Checkbox(value=False, description='headache')

Checkbox(value=False, description='yellowish_skin')

Checkbox(value=False, description='dark_urine')

Checkbox(value=False, description='nausea')

Checkbox(value=False, description='loss_of_appetite')

Checkbox(value=False, description='pain_behind_the_eyes')

Checkbox(value=False, description='back_pain')

Checkbox(value=False, description='constipation')

Checkbox(value=False, description='abdominal_pain')

Checkbox(value=False, description='diarrhoea')

Checkbox(value=False, description='mild_fever')

Checkbox(value=False, description='yellow_urine')

Checkbox(value=False, description='yellowing_of_eyes')

Checkbox(value=False, description='acute_liver_failure')

Checkbox(value=False, description='fluid_overload')

Checkbox(value=False, description='swelling_of_stomach')

Checkbox(value=False, description='swelled_lymph_nodes')

Checkbox(value=False, description='malaise')

Checkbox(value=False, description='blurred_and_distorted_vision')

Checkbox(value=False, description='phlegm')

Checkbox(value=False, description='throat_irritation')

Checkbox(value=False, description='redness_of_eyes')

Checkbox(value=False, description='sinus_pressure')

Checkbox(value=False, description='runny_nose')

Checkbox(value=False, description='congestion')

Checkbox(value=False, description='chest_pain')

Checkbox(value=False, description='weakness_in_limbs')

Checkbox(value=False, description='fast_heart_rate')

Checkbox(value=False, description='pain_during_bowel_movements')

Checkbox(value=False, description='pain_in_anal_region')

Checkbox(value=False, description='bloody_stool')

Checkbox(value=False, description='irritation_in_anus')

Checkbox(value=False, description='neck_pain')

Checkbox(value=False, description='dizziness')

Checkbox(value=False, description='cramps')

Checkbox(value=False, description='bruising')

Checkbox(value=False, description='obesity')

Checkbox(value=False, description='swollen_legs')

Checkbox(value=False, description='swollen_blood_vessels')

Checkbox(value=False, description='puffy_face_and_eyes')

Checkbox(value=False, description='enlarged_thyroid')

Checkbox(value=False, description='brittle_nails')

Checkbox(value=False, description='swollen_extremeties')

Checkbox(value=False, description='excessive_hunger')

Checkbox(value=False, description='extra_marital_contacts')

Checkbox(value=False, description='drying_and_tingling_lips')

Checkbox(value=False, description='slurred_speech')

Checkbox(value=False, description='knee_pain')

Checkbox(value=False, description='hip_joint_pain')

Checkbox(value=False, description='muscle_weakness')

Checkbox(value=False, description='stiff_neck')

Checkbox(value=False, description='swelling_joints')

Checkbox(value=False, description='movement_stiffness')

Checkbox(value=False, description='spinning_movements')

Checkbox(value=False, description='loss_of_balance')

Checkbox(value=False, description='unsteadiness')

Checkbox(value=False, description='weakness_of_one_body_side')

Checkbox(value=False, description='loss_of_smell')

Checkbox(value=False, description='bladder_discomfort')

Checkbox(value=False, description='foul_smell_of urine')

Checkbox(value=False, description='continuous_feel_of_urine')

Checkbox(value=False, description='passage_of_gases')

Checkbox(value=False, description='internal_itching')

Checkbox(value=False, description='toxic_look_(typhos)')

Checkbox(value=False, description='depression')

Checkbox(value=False, description='irritability')

Checkbox(value=False, description='muscle_pain')

Checkbox(value=False, description='altered_sensorium')

Checkbox(value=False, description='red_spots_over_body')

Checkbox(value=False, description='belly_pain')

Checkbox(value=False, description='abnormal_menstruation')

Checkbox(value=False, description='dischromic _patches')

Checkbox(value=False, description='watering_from_eyes')

Checkbox(value=False, description='increased_appetite')

Checkbox(value=False, description='polyuria')

Checkbox(value=False, description='family_history')

Checkbox(value=False, description='mucoid_sputum')

Checkbox(value=False, description='rusty_sputum')

Checkbox(value=False, description='lack_of_concentration')

Checkbox(value=False, description='visual_disturbances')

Checkbox(value=False, description='receiving_blood_transfusion')

Checkbox(value=False, description='receiving_unsterile_injections')

Checkbox(value=False, description='coma')

Checkbox(value=False, description='stomach_bleeding')

Checkbox(value=False, description='distention_of_abdomen')

Checkbox(value=False, description='history_of_alcohol_consumption')

Checkbox(value=False, description='fluid_overload.1')

Checkbox(value=False, description='blood_in_sputum')

Checkbox(value=False, description='prominent_veins_on_calf')

Checkbox(value=False, description='palpitations')

Checkbox(value=False, description='painful_walking')

Checkbox(value=False, description='pus_filled_pimples')

Checkbox(value=False, description='blackheads')

Checkbox(value=False, description='scurring')

Checkbox(value=False, description='skin_peeling')

Checkbox(value=False, description='silver_like_dusting')

Checkbox(value=False, description='small_dents_in_nails')

Checkbox(value=False, description='inflammatory_nails')

Checkbox(value=False, description='blister')

Checkbox(value=False, description='red_sore_around_nose')

Checkbox(value=False, description='yellow_crust_ooze')

Button(description='Predict', style=ButtonStyle())

Output()


X does not have valid feature names, but RandomForestClassifier was fitted with feature names


X does not have valid feature names, but RandomForestClassifier was fitted with feature names

