# Project 3

In [1]:
!pip install explainerdashboard
!pip install pandas
!pip install numpy



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, roc_curve

## Dataset

The dataset consists of 278 alternatives each consisting of 7 continous criterion that range from 0 to 1, and class, either 1 or 2

In [3]:
column_names = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'class']
df = pd.read_csv('./breast_cancer.csv', header=None, names=column_names)
df['class'] = df['class'] - 1
df

Unnamed: 0,A,B,C,D,E,F,G,class
0,1.0,0.500000,0.000,1.0,1.0,1.0,0.0,1.0
1,0.5,0.500000,0.000,0.0,0.0,1.0,0.0,0.0
2,0.5,0.833333,0.000,0.0,0.5,0.0,0.0,1.0
3,1.0,0.833333,0.000,1.0,1.0,1.0,1.0,0.0
4,1.0,1.000000,0.125,1.0,0.5,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...
273,0.5,1.000000,0.250,1.0,0.5,0.0,0.0,0.0
274,1.0,0.833333,0.125,1.0,0.5,0.0,1.0,0.0
275,1.0,1.000000,0.250,1.0,0.5,1.0,0.0,0.0
276,1.0,0.500000,0.000,0.0,0.5,1.0,0.0,0.0


## Logistic Regression Model

In [4]:
X = df.iloc[:, :-1]
y = df['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
model = LogisticRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

In [6]:
print("Accuracy:", round(accuracy_score(y_test, y_pred), 4))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7679
Classification Report:
               precision    recall  f1-score   support

         0.0       0.81      0.90      0.85        42
         1.0       0.56      0.36      0.43        14

    accuracy                           0.77        56
   macro avg       0.68      0.63      0.64        56
weighted avg       0.75      0.77      0.75        56



In [7]:
y_proba = model.predict_proba(X_test)[:, 1]  # Probability of class 1

print("AUC Score:", round(roc_auc_score(y_test, y_proba), 4))

AUC Score: 0.7721


In [8]:
from explainerdashboard import ClassifierExplainer, ExplainerDashboard
explainer = ClassifierExplainer(model, X_test, y_test)

Note: model_output='probability' is currently not supported for linear classifiers models with shap. So defaulting to model_output='logodds' If you really need probability outputs use shap='kernel' instead.
Note: shap values for shap='linear' get calculated against X_background, but paramater X_background=None, so using X instead...
Generating self.shap_explainer = shap.LinearExplainer(model, X)...


In [9]:
def patched_run(self, port=8050, host='127.0.0.1', use_waitress=False, mode=None, **kwargs):
    if mode is not None:
        self.mode = mode
    app = self.app
    if use_waitress:
        from waitress import serve
        serve(app.server, host=host, port=port)
    else:
        # Dash 2.14+ uses app.run instead of app.run_server
        app.run(host=host, port=port, **kwargs)

# Patch it
ExplainerDashboard.run = patched_run

In [10]:
import dash_bootstrap_components as dbc

# Store original constructor
_original_dropdownmenu_init = dbc.DropdownMenu.__init__

# Define patched version
def _patched_dropdownmenu_init(self, *args, **kwargs):
    if "right" in kwargs:
        kwargs["align_end"] = kwargs.pop("right")
    _original_dropdownmenu_init(self, *args, **kwargs)

# Apply patch
dbc.DropdownMenu.__init__ = _patched_dropdownmenu_init

In [11]:
ExplainerDashboard(explainer).run()

Building ExplainerDashboard..
Detected google colab environment, setting mode='external'
For this type of model and model_output interactions don't work, so setting shap_interaction=False...
The explainer object has no decision_trees property. so setting decision_trees=False...
Generating layout...
Calculating shap values...



JupyterDash is deprecated, use Dash instead.
See https://dash.plotly.com/dash-in-jupyter for more details.



Calculating prediction probabilities...
Calculating metrics...
Calculating confusion matrices...
Calculating classification_dfs...
Calculating roc auc curves...
Calculating pr auc curves...
Calculating liftcurve_dfs...
Calculating dependencies...
Calculating permutation importances (if slow, try setting n_jobs parameter)...
Calculating predictions...
Calculating pred_percentiles...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...


<IPython.core.display.Javascript object>