<a href="https://colab.research.google.com/github/Hamzah1507/Medical-Diagnosis-AI/blob/main/Medical_Diagnosis_AI_Assistant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install necessary libraries
# Note: sqlite3 is a built-in library, so we remove it from the pip install list.
# PyTorch is needed for TabNet, which requires a specific installation for the Colab environment/GPU
!pip install torch torchvision torchaudio
!pip install pytorch-tabnet shap lime streamlit scikit-learn xgboost pandas numpy

# Import core libraries
import pandas as pd
import numpy as np
import sqlite3 # Built-in, no install needed
import streamlit as st
import torch # Explicitly import torch

# ML/DL Libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from pytorch_tabnet.tab_model import TabNetClassifier

# Explainability
import shap
import lime
from lime import lime_tabular

# Utilities
import os
import pickle
import warnings
warnings.filterwarnings('ignore')

print("All required libraries installed and imported successfully!")

# Check PyTorch installation
print(f"PyTorch version: {torch.__version__}")

All required libraries installed and imported successfully!
PyTorch version: 2.8.0+cu126


In [None]:
# --- SIMULATE THE DATASET ---

# Define a list of 41 synthetic disease names
# We'll generate 40 names plus one 'Healthy' or 'No Disease' category
# --- SIMULATE THE DATASET (MODIFIED FOR REAL NAMES) ---

# Define a list of 41 synthetic disease names (40 real-world examples + No Disease)
disease_names = [
    'Influenza', 'Common_Cold', 'COVID-19', 'Strep_Throat', 'Pneumonia',
    'Bronchitis', 'Allergies', 'Migraine', 'Tension_Headache', 'Sinusitis',
    'Gastroenteritis', 'IBS', 'Appendicitis', 'UTI', 'Kidney_Stones',
    'Arthritis', 'Gout', 'Lupus', 'Dengue_Fever', 'Malaria',
    'Tuberculosis', 'Hepatitis_A', 'Hepatitis_B', 'HIV', 'Anemia',
    'Diabetes', 'Hypertension', 'Hypothyroidism', 'Hyperthyroidism', 'Asthma',
    'Eczema', 'Psoriasis', 'Acne', 'Anxiety_Disorder', 'Depression',
    'Bipolar_Disorder', 'Schizophrenia', 'Insomnia', 'Sleep_Apnea', 'Obesity',
    'No_Disease' # 41st category
]
N_DISEASES = len(disease_names) # 41
# Define a set of 50 common synthetic symptoms/features
N_FEATURES = 50
feature_names = [f'Symptom_{i+1}' for i in range(N_FEATURES)]

# Number of patient records
N_SAMPLES = 10000

# Generate synthetic data
# Features will be binary (0/1) indicating presence or absence of a symptom
np.random.seed(42) # for reproducibility

# Create the features (X)
# Generate random integers (0 or 1) for symptoms
X_data = np.random.randint(0, 2, size=(N_SAMPLES, N_FEATURES))
X_df = pd.DataFrame(X_data, columns=feature_names)

# Create the target (y)
# Randomly assign one of the 41 diseases
y_data = np.random.choice(disease_names, size=N_SAMPLES)
y_df = pd.DataFrame(y_data, columns=['Target_Disease'])

# Combine features and target
df = pd.concat([X_df, y_df], axis=1)

# Save the synthetic dataset to a CSV file (to simulate a real-world file load)
DATA_FILE = 'medical_diagnosis_data.csv'
df.to_csv(DATA_FILE, index=False)

# --- LOAD THE DATASET ---

# Load the data back from the generated CSV
data = pd.read_csv(DATA_FILE)

# Display the first few rows and summary
print(f"Data successfully created and loaded: '{DATA_FILE}'")
print(f"Shape of the dataset: {data.shape}")
print("\nFirst 5 rows of the dataset:")
print(data.head())
print("\nDistribution of the target disease (Top 5):")
print(data['Target_Disease'].value_counts().head())

Data successfully created and loaded: 'medical_diagnosis_data.csv'
Shape of the dataset: (10000, 51)

First 5 rows of the dataset:
   Symptom_1  Symptom_2  Symptom_3  Symptom_4  Symptom_5  Symptom_6  \
0          0          1          0          0          0          1   
1          0          1          0          1          0          1   
2          0          1          1          1          1          1   
3          0          0          0          0          0          1   
4          0          1          0          0          1          0   

   Symptom_7  Symptom_8  Symptom_9  Symptom_10  ...  Symptom_42  Symptom_43  \
0          0          0          0           1  ...           0           1   
1          1          0          0           0  ...           1           1   
2          1          1          1           0  ...           0           1   
3          0          1          0           1  ...           0           0   
4          1          1          1           0 

In [None]:
# --- 1. Separate Features (X) and Target (y) ---

# The last column is 'Target_Disease'
X = data.iloc[:, :-1]
y = data['Target_Disease']

# Convert all feature names (Symptom_1, etc.) to strings,
# as TabNet expects them for feature selection and indexing.
X.columns = X.columns.astype(str)

print(f"Features (X) shape: {X.shape}")
print(f"Target (y) shape: {y.shape}")

# --- 2. Encode the Target Variable ---

# Initialize the LabelEncoder
le = LabelEncoder()

# Fit and transform the categorical disease names into numerical labels (0 to 40)
y_encoded = le.fit_transform(y)

# Store the encoder for later use (to map predictions back to disease names)
ENCODER_FILE = 'label_encoder.pkl'
with open(ENCODER_FILE, 'wb') as f:
    pickle.dump(le, f)

print(f"Target encoded. Total classes: {len(le.classes_)}")
print(f"Label Encoder saved to: {ENCODER_FILE}")

# --- 3. Split the Data ---

# Split the data into training (80%) and testing (20%) sets
# Use the encoded target y_encoded
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Convert all data splits to NumPy arrays as required by TabNet
X_train = X_train.values.astype(np.float32)
X_test = X_test.values.astype(np.float32)
y_train = y_train.astype(np.int64)
y_test = y_test.astype(np.int64)

print("\nData Splitting Complete:")
print(f"X_train shape: {X_train.shape} | y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape} | y_test shape: {y_test.shape}")

Features (X) shape: (10000, 50)
Target (y) shape: (10000,)
Target encoded. Total classes: 41
Label Encoder saved to: label_encoder.pkl

Data Splitting Complete:
X_train shape: (8000, 50) | y_train shape: (8000,)
X_test shape: (2000, 50) | y_test shape: (2000,)


In [None]:
# Re-import necessary components for this cell
import torch
import pickle
from pytorch_tabnet.tab_model import TabNetClassifier
from xgboost import XGBClassifier
import numpy as np
# Assuming X_train, y_train, X_test, y_test, le are still defined from Step 3

# --- 1. Train TabNet Classifier (Revised Save Path) ---

print("Starting TabNet training...")
tabnet_model = TabNetClassifier(
    n_a=8, n_d=8,
    optimizer_fn=torch.optim.Adam,
    optimizer_params=dict(lr=2e-2),
    scheduler_params={"step_size":50, "gamma":0.9},
    scheduler_fn=torch.optim.lr_scheduler.StepLR,
    mask_type='sparsemax',
    seed=42
)

tabnet_model.fit(
    X_train=X_train, y_train=y_train,
    eval_set=[(X_test, y_test)],
    eval_name=['test'],
    max_epochs=20,
    patience=5,
    batch_size=1024,
    virtual_batch_size=128,
    num_workers=0,
    weights=0,
)

# Save the TabNet model weights with an explicit .zip extension for clarity
TABNET_MODEL_FILE = 'tabnet_model.zip'
# When saving with an explicit zip name, it often saves to the root directory without a folder
tabnet_model.save_model(TABNET_MODEL_FILE.replace('.zip', '')) # save_model only takes the name
print(f"TabNet Model saved as file: {TABNET_MODEL_FILE}")

# --- 2. Train XGBoost Classifier (Same as before) ---

print("\nStarting XGBoost training...")
xgb_model = XGBClassifier(
    objective='multi:softmax',
    num_class=len(le.classes_),
    use_label_encoder=False,
    eval_metric='merror',
    n_estimators=100,
    random_state=42,
    tree_method='hist'
)

xgb_model.fit(X_train, y_train)

# Save the XGBoost model
XGB_MODEL_FILE = 'xgb_model.pkl'
with open(XGB_MODEL_FILE, 'wb') as f:
    pickle.dump(xgb_model, f)

print(f"XGBoost Model saved to: {XGB_MODEL_FILE}")
print("\nModel training complete. Now ready for Step 5.")

Starting TabNet training...
epoch 0  | loss: 4.23227 | test_accuracy: 0.0235  |  0:00:01s
epoch 1  | loss: 3.75726 | test_accuracy: 0.0225  |  0:00:02s
epoch 2  | loss: 3.71584 | test_accuracy: 0.025   |  0:00:03s
epoch 3  | loss: 3.71309 | test_accuracy: 0.026   |  0:00:04s
epoch 4  | loss: 3.71248 | test_accuracy: 0.024   |  0:00:05s
epoch 5  | loss: 3.71132 | test_accuracy: 0.025   |  0:00:05s
epoch 6  | loss: 3.70971 | test_accuracy: 0.024   |  0:00:06s
epoch 7  | loss: 3.70894 | test_accuracy: 0.0235  |  0:00:06s
epoch 8  | loss: 3.70782 | test_accuracy: 0.024   |  0:00:07s

Early stopping occurred at epoch 8 with best_epoch = 3 and best_test_accuracy = 0.026
Successfully saved model at tabnet_model.zip
TabNet Model saved as file: tabnet_model.zip

Starting XGBoost training...
XGBoost Model saved to: xgb_model.pkl

Model training complete. Now ready for Step 5.


In [None]:
# Re-import necessary components for this cell
import numpy as np
import pickle
import torch
from pytorch_tabnet.tab_model import TabNetClassifier
from sklearn.metrics import accuracy_score

# --- 1. Load Models and Encoder ---

# Re-initialize TabNet model structure
tabnet_model_loaded = TabNetClassifier(n_a=8, n_d=8)
TABNET_MODEL_FILE = 'tabnet_model.zip'

# Load the weights from the file saved in the revised Step 4
# We use the file name without the .zip extension in the load_model function
TABNET_MODEL_LOAD_NAME = TABNET_MODEL_FILE.replace('.zip', '')
print(f"Attempting to load TabNet model from: {TABNET_MODEL_FILE}")
try:
    tabnet_model_loaded.load_model(TABNET_MODEL_LOAD_NAME)
except Exception as e:
    print(f"TabNet loading error: {e}")
    # Fallback to the explicit .zip path if the name without extension fails
    tabnet_model_loaded.load_model(f'{TABNET_MODEL_LOAD_NAME}.zip')

# Load XGBoost model
XGB_MODEL_FILE = 'xgb_model.pkl'
print(f"Loading XGBoost model from: {XGB_MODEL_FILE}")
with open(XGB_MODEL_FILE, 'rb') as f:
    xgb_model_loaded = pickle.load(f)

# Load Label Encoder
ENCODER_FILE = 'label_encoder.pkl'
print(f"Loading Label Encoder from: {ENCODER_FILE}")
with open(ENCODER_FILE, 'rb') as f:
    le_loaded = pickle.load(f)

# Assuming X_test, y_test are available from the environment

print("\nModels and Label Encoder loaded successfully.")

# --- 2. Define Ensemble Prediction Function ---

def ensemble_predict_proba(X):
    """Generates combined prediction probabilities using soft voting."""
    tabnet_proba = tabnet_model_loaded.predict_proba(X.astype(np.float32))
    xgb_proba = xgb_model_loaded.predict_proba(X)
    ensemble_proba = (tabnet_proba + xgb_proba) / 2
    return ensemble_proba

def ensemble_predict(X):
    """Generates the final class prediction based on ensemble probability."""
    ensemble_proba = ensemble_predict_proba(X)
    predictions = np.argmax(ensemble_proba, axis=1)
    return predictions


# --- 3. Evaluate Ensemble Model ---

print("\nEvaluating Ensemble Model on Test Set...")
y_pred_ensemble = ensemble_predict(X_test)
accuracy = accuracy_score(y_test, y_pred_ensemble)
print(f"TabNet + XGBoost Ensemble Accuracy: {accuracy:.4f}")

# --- 4. Get Example Prediction (for verification) ---

example_predictions = le_loaded.inverse_transform(y_pred_ensemble[:5])
example_true_labels = le_loaded.inverse_transform(y_test[:5])

print("\nExample Ensemble Predictions (True vs. Predicted):")
print(f"True Labels:   {example_true_labels}")
print(f"Predictions: {example_predictions}")

Attempting to load TabNet model from: tabnet_model.zip
TabNet loading error: [Errno 2] No such file or directory: 'tabnet_model'
Loading XGBoost model from: xgb_model.pkl
Loading Label Encoder from: label_encoder.pkl

Models and Label Encoder loaded successfully.

Evaluating Ensemble Model on Test Set...
TabNet + XGBoost Ensemble Accuracy: 0.0270

Example Ensemble Predictions (True vs. Predicted):
True Labels:   ['Migraine' 'Obesity' 'COVID-19' 'HIV' 'Dengue_Fever']
Predictions: ['Common_Cold' 'Hypothyroidism' 'Pneumonia' 'Sleep_Apnea' 'Depression']


In [None]:
# Re-import necessary components for this cell
import numpy as np
import shap
import lime
from lime import lime_tabular
import pandas as pd
import torch

# Assuming X_test, y_test, ensemble_predict, ensemble_predict_proba, le_loaded, X_train, and data are available

# --- SHAP Prediction Wrapper (Necessary to ensure 2D output) ---
def shap_predict_proba(X):
    """
    Wrapper for ensemble_predict_proba to ensure SHAP Explainer gets the 2D output it needs.
    """
    if X.ndim == 1:
        X = X.reshape(1, -1)

    proba = ensemble_predict_proba(X)
    return proba.reshape(X.shape[0], -1)


# Get a single patient's data for local explanation
EXPLAIN_INDEX = 100
X_sample = X_test[EXPLAIN_INDEX:EXPLAIN_INDEX+1]
y_sample_true_index = y_test[EXPLAIN_INDEX]
y_sample_true_label = le_loaded.inverse_transform([y_sample_true_index])[0]
y_sample_pred_index = ensemble_predict(X_sample)[0]
y_sample_pred_label = le_loaded.inverse_transform([y_sample_pred_index])[0]
y_sample_pred_proba = ensemble_predict_proba(X_sample)[0]

print(f"--- Explaining Patient Index {EXPLAIN_INDEX} ---")
print(f"True Label: {y_sample_true_label} | Predicted Label: {y_sample_pred_label}")
print(f"Predicted Class Index: {y_sample_pred_index}")
print("-" * 40)

# --- 1. SHAP Implementation ---

print("Generating SHAP Explanation...")

# SHAP needs a background dataset (subsampled X_train)
X_train_df = pd.DataFrame(X_train, columns=data.columns[:-1])

# Create a SHAP Explainer using the wrapper function
explainer = shap.KernelExplainer(
    shap_predict_proba,
    X_train_df.iloc[np.random.choice(X_train_df.shape[0], 100, replace=False)]
)

# Calculate SHAP values
X_sample_df = pd.DataFrame(X_sample, columns=data.columns[:-1])
shap_values = explainer.shap_values(X_sample_df)

print("SHAP values calculated successfully.")
print(f"SHAP values list size: {len(shap_values)}")


# --- FIX SHAP INDEXING ERROR ---
# If the explainer returns a list of size 1 (the error you received),
# we must assume that single element contains the correct SHAP values and ignore the class index.
if len(shap_values) == 1:
    # Use the single element in the list, which contains the values for the predicted class
    # shap_values[0] is the array of SHAP values for the single sample
    predicted_class_shap = shap_values[0][0]
else:
    # If it's a list of size 41, use the standard indexing
    predicted_class_shap = shap_values[y_sample_pred_index][0]

# Print the top 5 SHAP values for the predicted class
print(f"Top 5 SHAP features for predicted class: {y_sample_pred_label}")
shap_explanation = list(zip(X_sample_df.columns, predicted_class_shap))
shap_explanation.sort(key=lambda x: abs(x[1]), reverse=True)
print(shap_explanation[:5])

# --- 2. LIME Implementation (Kept successful logic) ---

print("\nGenerating LIME Explanation...")

# Create a LIME Explainer
explainer_lime = lime_tabular.LimeTabularExplainer(
    training_data=X_train,
    feature_names=data.columns[:-1].tolist(),
    class_names=le_loaded.classes_.tolist(),
    mode='classification',
    kernel_width=0.75,
    random_state=42
)

# Generate LIME explanation for the sample instance (X_sample[0])
explanation_lime = explainer_lime.explain_instance(
    data_row=X_sample[0],
    predict_fn=ensemble_predict_proba,
    num_features=5,
    top_labels=1
)

# Access the explanation using the predicted class index
print(f"LIME explanation for predicted class: {y_sample_pred_label}")
print(explanation_lime.as_list(label=y_sample_pred_index))

--- Explaining Patient Index 100 ---
True Label: No_Disease | Predicted Label: Arthritis
Predicted Class Index: 5
----------------------------------------
Generating SHAP Explanation...


  0%|          | 0/1 [00:00<?, ?it/s]

SHAP values calculated successfully.
SHAP values list size: 1
Top 5 SHAP features for predicted class: Arthritis
[('Symptom_18', np.float64(0.00449904964688879)), ('Symptom_20', np.float64(-0.0029454833398634564)), ('Symptom_37', np.float64(0.002325935336828263)), ('Symptom_1', np.float64(-0.0021669542773371008)), ('Symptom_7', np.float64(-0.0021506998635250265))]

Generating LIME Explanation...
LIME explanation for predicted class: Arthritis
[('Symptom_3 <= 0.00', 5.858082830970666e-06), ('0.00 < Symptom_8 <= 1.00', 5.652746402195897e-06), ('Symptom_1 <= 0.00', 2.953924574117862e-06), ('0.00 < Symptom_4 <= 1.00', 2.328269858710304e-06), ('0.00 < Symptom_37 <= 1.00', 2.3081918967291226e-06)]


In [None]:
# Create the app.py file
STREAMLIT_APP_FILE = 'app.py'

# Define the Streamlit code as a list of lines.
streamlit_lines = [
    "import streamlit as st",
    "import pandas as pd",
    "import numpy as np",
    "import pickle",
    "import shap",
    "import lime",
    "from lime import lime_tabular",
    "from pytorch_tabnet.tab_model import TabNetClassifier",
    "import torch",
    "import warnings",
    "warnings.filterwarnings('ignore')",
    "",
    "# --- 1. CONFIGURATION AND MODEL LOADING ---",
    'st.set_page_config(page_title="Medical Diagnosis AI Assistant", layout="wide")',
    "",
    "# Constants from training",
    "N_FEATURES = 50",
    "# FINAL FIX: Descriptive symptom names defined in one line and correctly formatted.",
    "FEATURE_NAMES = ['Fever', 'Cough', 'Headache', 'Fatigue', 'Sore_Throat', 'Difficulty_Breathing', 'Chest_Pain', 'Nausea', 'Vomiting', 'Diarrhea', 'Abdominal_Pain', 'Rash', 'Joint_Pain', 'Muscle_Aches', 'Loss_of_Smell', 'Loss_of_Taste', 'Chills', 'Runny_Nose', 'Body_Ache', 'Dizziness', 'Lightheadedness', 'Weakness', 'Loss_of_Appetite', 'Weight_Loss', 'Swollen_Glands', 'Blurred_Vision', 'Ear_Ache', 'Tingling_in_Limbs', 'Numbness', 'Anxiety', 'Depression', 'Insomnia', 'Hives', 'Itching', 'Jaundice', 'Dark_Urine', 'Pale_Stool', 'Tiredness', 'Night_Sweats', 'Constipation', 'Heart_Palpitations', 'Shortness_of_Breath', 'Frequent_Urination', 'Burning_Urination', 'Bloating', 'Back_Pain', 'Neck_Stiffness', 'Confusion', 'Fainting', 'Tremor']",
    "TABNET_MODEL_LOAD_NAME = 'tabnet_model'",
    "XGB_MODEL_FILE = 'xgb_model.pkl'",
    "ENCODER_FILE = 'label_encoder.pkl'",
    "",
    "# Helper function to load all assets",
    "@st.cache_resource",
    "def load_assets():",
    "    # Load Label Encoder",
    "    with open(ENCODER_FILE, 'rb') as f:",
    "        le_loaded = pickle.load(f)",
    "",
    "    # Load XGBoost model",
    "    with open(XGB_MODEL_FILE, 'rb') as f:",
    "        xgb_model_loaded = pickle.load(f)",
    "",
    "    # Load TabNet model",
    "    tabnet_model_loaded = TabNetClassifier(n_a=8, n_d=8)",
    "    try:",
    "        tabnet_model_loaded.load_model(TABNET_MODEL_LOAD_NAME)",
    "    except:",
    "        # Fallback load if the first method fails",
    "        tabnet_model_loaded.load_model(f'{TABNET_MODEL_LOAD_NAME}.zip')",
    "        ",
    "    # Dummy X_train data for LIME explainer",
    "    N_SAMPLES_LIME = 1000",
    "    X_train_dummy = np.random.randint(0, 2, size=(N_SAMPLES_LIME, N_FEATURES))",
    "    ",
    "    return le_loaded, xgb_model_loaded, tabnet_model_loaded, X_train_dummy",
    "",
    "le, xgb_model, tabnet_model, X_train_for_lime = load_assets()",
    "",
    "# --- 2. ENSEMBLE PREDICTION FUNCTIONS ---",
    "def ensemble_predict_proba(X):",
    '    """Generates combined prediction probabilities using soft voting."""',
    "    tabnet_proba = tabnet_model.predict_proba(X.astype(np.float32))",
    "    xgb_proba = xgb_model.predict_proba(X)",
    "    ensemble_proba = (tabnet_proba + xgb_proba) / 2",
    "    return ensemble_proba",
    "",
    "def ensemble_predict(X):",
    '    """Generates the final class prediction based on ensemble probability."""',
    "    ensemble_proba = ensemble_predict_proba(X)",
    "    predictions = np.argmax(ensemble_proba, axis=1)",
    "    return predictions",
    "",
    "",
    "# --- 3. EXPLAINABILITY FUNCTIONS (LIME) ---",
    "@st.cache_resource",
    "def get_lime_explainer():",
    '    """Initializes and caches the LIME explainer."""',
    "    # Define all 50 features as categorical (indices 0 through 49)",
    "    categorical_features_indices = list(range(N_FEATURES))",
    "    # Define human-readable names for the values 0 and 1 (CRITICAL FIX)",
    "    categorical_names_map = {0: 'No (0)', 1: 'Yes (1)'}",
    "    ",
    "    explainer_lime = lime_tabular.LimeTabularExplainer(",
    "        training_data=X_train_for_lime,",
    "        feature_names=FEATURE_NAMES,",
    "        class_names=le.classes_.tolist(),",
    "        mode='classification',",
    "        kernel_width=0.75,",
    "        random_state=42,",
    "        categorical_features=categorical_features_indices,",
    "        categorical_names=categorical_names_map,",
    "        discretize_continuous=False",
    "    )",
    "    return explainer_lime",
    "",
    "lime_explainer = get_lime_explainer()",
    "",
    "# --- 4. STREAMLIT UI LAYOUT ---",
    "",
    'st.title("👨‍⚕️ AI-Powered Medical Diagnosis Assistant with Explainable AI")',
    'st.markdown("Developed ensemble models (TabNet + XGBoost) for real-time risk assessment.")',
    "",
    "# --- SIDEBAR: Input Area ---",
    'st.sidebar.header("Patient Symptom Input")',
    "st.sidebar.markdown(\"Select 'Yes' (1) if the symptom is present, 'No' (0) if absent.\")",
    "",
    "user_input = {}",
    "# Create a selectbox for each of the 50 symptoms",
    "for i in range(N_FEATURES):",
    "    symptom = FEATURE_NAMES[i]",
    "    # Display name converts Underscore to Space (e.g., Sore_Throat -> Sore Throat)",
    "    selection = st.sidebar.selectbox(f\"**{symptom.replace('_', ' ')}**\", options=['No (0)', 'Yes (1)'], index=0, key=f'symptom_{i}')",
    "    user_input[symptom] = int(selection.split('(')[1].replace(')', ''))",
    "",
    "# Convert input to DataFrame for prediction",
    "input_df = pd.DataFrame([user_input])",
    "input_array = input_df.values.astype(np.float32)",
    "",
    "# --- MAIN PAGE: Results ---",
    'st.header("Diagnosis and Explainability Results")',
    "",
    "if st.sidebar.button(\"Analyze Patient Data\", key='analyze_btn'):",
    '    st.subheader("1. Real-Time Risk Assessment")',
    "    ",
    "    # 1. Prediction (Real-Time processing <2 seconds)",
    "    with st.spinner('Analyzing symptoms and generating ensemble prediction...'):",
    "        ",
    "        import time",
    "        start_time = time.time()",
    "        ",
    "        y_pred_index = ensemble_predict(input_array)[0]",
    "        y_pred_label = le.inverse_transform([y_pred_index])[0]",
    "        y_pred_proba_all = ensemble_predict_proba(input_array)[0]",
    "        ",
    "        y_pred_proba = y_pred_proba_all[y_pred_index]",
    "        ",
    "        end_time = time.time()",
    "        processing_time = end_time - start_time",
    "    ",
    "    # Display Prediction",
    "    col_pred, col_time = st.columns(2)",
    "    col_pred.metric(",
    '        label="Predicted Diagnosis", ',
    "        value=y_pred_label, ",
    '        delta=f"Risk: {y_pred_proba:.2%}",',
    '        delta_color="off"',
    "    )",
    "    col_time.metric(",
    '        label="Processing Time",',
    '        value=f"{processing_time:.3f} seconds",',
    '        delta="< 2 seconds (Goal)",',
    '        delta_color="inverse" if processing_time >= 2 else "normal"',
    "    )",
    "",
    '    st.subheader("2. Explainable AI (LIME)")',
    '    st.markdown(f"**Top 5 symptoms influencing the diagnosis of `{y_pred_label}`.**")',
    "    ",
    "    # 2. Explainability (LIME)",
    "    with st.spinner('Generating LIME explanation...'):",
    "        explanation = lime_explainer.explain_instance(",
    "            data_row=input_array[0],",
    "            predict_fn=ensemble_predict_proba,",
    "            num_features=5,",
    "            top_labels=1",
    "        )",
    "        ",
    "        # Get explanation for the predicted class index",
    "        lime_results = explanation.as_list(label=y_pred_index)",
    "",
    "    # Display LIME results in a table",
    "    lime_df = pd.DataFrame(lime_results, columns=['Symptom', 'Impact Weight'])",
    "    ",
    "    st.dataframe(lime_df, use_container_width=True)",
    "    ",
    "    st.markdown(\"\"\"",
    "        * **Impact Weight > 0**: The symptom pushes the prediction **towards** the predicted diagnosis.",
    "        * **Impact Weight < 0**: The symptom pushes the prediction **away** from the predicted diagnosis.",
    "    \"\"\")",
    "    ",
    '    st.subheader("3. Patient Similarity Recommendations (Placeholder)")',
    '    st.info("This feature would typically query a database (SQLite) for similar patient records and their outcomes. *Implementation deferred for current scope.*")',
    "",
    "else:",
    "    st.info(\"👈 Enter patient symptoms in the sidebar and click 'Analyze Patient Data'.\")"
]

# Join the list of lines with newline characters
streamlit_code = "\n".join(streamlit_lines)

# Write the Streamlit code to the file
with open(STREAMLIT_APP_FILE, 'w') as f:
    f.write(streamlit_code)

print(f"Streamlit application code written to {STREAMLIT_APP_FILE}")
print("All models, encoder, and the application file are ready for deployment.")

Streamlit application code written to app.py
All models, encoder, and the application file are ready for deployment.


In [None]:
# 1. Install pyngrok and upgrade ngrok binary if needed
!pip install pyngrok -q
!pip install --upgrade ngrok -q

# 2. Authenticate ngrok using the shell command (safest method)
# *** CRITICAL: REPLACE "PASTE_YOUR_FULL_TOKEN_HERE" with your actual, complete ngrok token ***
YOUR_FULL_TOKEN = "cr_343E12HAMa4TitZT8f4I2ukWm83"
!ngrok authtoken $YOUR_FULL_TOKEN

print("ngrok authentication saved. Now ready for deployment.")

shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
The folder you are executing pip from can no longer be found.
shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
The folder you are executing pip from can no longer be found.
shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
panic: getwd: no such file or directory

goroutine 1 [running]:
github.com/go-martini/martini.init.0()
	github.com/go-martini/martini@v0.0.0-20170121215854-22fa46961aab/env.go:29 +0x9a
ngrok authentication saved. Now ready for deployment.
