In [15]:
!pip install -q xgboost shap pandas scikit-learn gradio uvicorn python-multipart

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.2/54.2 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.1/323.1 kB[0m [31m17.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m95.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [9]:
# Load dataset
import pandas as pd
from sklearn.datasets import load_breast_cancer

# Load dataset using scikit-learn
cancer = load_breast_cancer()
X = pd.DataFrame(cancer.data, columns=cancer.feature_names)
y = pd.Series(cancer.target, name="target")

# Split train-test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
# Train XGBoost Model

import xgboost as xgb
from sklearn.metrics import accuracy_score

# Train model
model = xgb.XGBClassifier(
    objective="binary:logistic",
    eval_metric="logloss",
    early_stopping_rounds=10,
    n_estimators=100,
)
model.fit(X_train, y_train, eval_set=[(X_test, y_test)])

# Evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy * 100:.2f}%")  # Expected ~88%

[0]	validation_0-logloss:0.46755
[1]	validation_0-logloss:0.35439
[2]	validation_0-logloss:0.28176
[3]	validation_0-logloss:0.24011
[4]	validation_0-logloss:0.20182
[5]	validation_0-logloss:0.17846
[6]	validation_0-logloss:0.16165
[7]	validation_0-logloss:0.15280
[8]	validation_0-logloss:0.14094
[9]	validation_0-logloss:0.13805
[10]	validation_0-logloss:0.13877
[11]	validation_0-logloss:0.13706
[12]	validation_0-logloss:0.13745
[13]	validation_0-logloss:0.13874
[14]	validation_0-logloss:0.14041
[15]	validation_0-logloss:0.13797
[16]	validation_0-logloss:0.13544
[17]	validation_0-logloss:0.13698
[18]	validation_0-logloss:0.13659
[19]	validation_0-logloss:0.13539
[20]	validation_0-logloss:0.13409
[21]	validation_0-logloss:0.13405
[22]	validation_0-logloss:0.13553
[23]	validation_0-logloss:0.13206
[24]	validation_0-logloss:0.13219
[25]	validation_0-logloss:0.13554
[26]	validation_0-logloss:0.13597
[27]	validation_0-logloss:0.13352
[28]	validation_0-logloss:0.13400
[29]	validation_0-loglos

In [17]:
import gradio as gr
import shap
import numpy as np
import matplotlib.pyplot as plt

# Initialize SHAP explainer
explainer = shap.TreeExplainer(model)

def predict_and_explain(*features):
    # Convert input to numpy array
    input_data = np.array(features).reshape(1, -1)

    # Make prediction
    prediction = model.predict(input_data)[0]
    confidence = model.predict_proba(input_data)[0][1]

    # Generate SHAP explanation
    shap_values = explainer.shap_values(input_data)

    # Create force plot
    plt.figure()
    shap.force_plot(
        explainer.expected_value,
        shap_values[0],
        input_data[0],
        feature_names=cancer.feature_names, # Changed from data.feature_names
        matplotlib=True,
        show=False
    )

    # Get top contributing features
    top_features = {
        cancer.feature_names[i]: float(shap_values[0][i]) # Changed from data.feature_names
        for i in np.argsort(-np.abs(shap_values[0]))[:3]
    }

    # Format output
    diagnosis = "Malignant (High Risk)" if prediction == 1 else "Benign (Low Risk)"

    # Save plot to file
    plt.savefig('shap_plot.png', bbox_inches='tight')
    plt.close()

    return {
        "Diagnosis": diagnosis,
        "Confidence Score": f"{confidence*100:.1f}%",
        "Top Contributing Factors": top_features,
        "Explanation Plot": 'shap_plot.png'
    }

# Create input components
inputs = []
for feature in cancer.feature_names: # Changed from data.feature_names
    inputs.append(
        gr.Number(label=feature, value=X_test.iloc[0][feature])
    )

# Create Gradio interface
demo = gr.Interface(
    fn=predict_and_explain,
    inputs=inputs,
    outputs=[
        gr.Label(label="Diagnosis"),
        gr.Label(label="Confidence Score"),
        gr.Label(label="Top Contributing Factors"),
        gr.Image(label="Explanation Plot")
    ],
    title="Early Disease Detection System",
    description="Predicts breast cancer risk with interpretability features",
    examples=[X_test.iloc[i].tolist() for i in range(3)]
)

# Launch the interface
demo.launch()

  return self.config_dict[name]


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://46e926a55ea32bde19.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


