<a href="https://colab.research.google.com/github/AkshataKishore/IML/blob/master/PCOS_Interpret.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount("/content/drive")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# After executing the cell above, Drive
# files will be present in "/content/drive/My Drive".
!ls "/content/drive/My Drive"
%cd /content/drive/My Drive/

In [None]:
import time,os,json
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm_notebook as tqdm
import matplotlib.pyplot as plt

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
plt.style.use('ggplot') # random state
SEED=100
time_start_notebook = time.time()
home = os.path.expanduser('~')
[(x.__name__,x.__version__) for x in [np,pd,sns]]

[('numpy', '1.18.5'), ('pandas', '1.1.3'), ('seaborn', '0.11.0')]

In [None]:
%%capture
# capture will not print in notebook

import os
import sys
ENV_COLAB = 'google.colab' in sys.modules

if ENV_COLAB:
    ## model evaluation
    !pip install -U watermark
    !pip install -U xgboost
    !pip install -U eli5
    !pip install -U shap
    !pip install -U pdpbox
    !pip install -U yellowbrick
    !pip install -U lime

    #### print
    print('Environment: Google Colaboratory.')

# NOTE: If we update modules in gcolab, we need to restart runtime.

In [None]:
df = pd.read_csv('PCOS_Data.csv')
from sklearn.model_selection import train_test_split

target = 'PCOS (Y/N)'

df_Xtrain, df_Xtest, ser_ytrain, ser_ytest = train_test_split(
    df.drop(target,axis=1), df[target],
    test_size=0.2, random_state=SEED, stratify=df[target])

ytrain = ser_ytrain.to_numpy().ravel()
ytest = ser_ytest.to_numpy().ravel()

features_train = df_Xtrain.columns.to_list()
features_train

[' Age (yrs)',
 'Weight (Kg)',
 'Height(Cm) ',
 'BMI',
 'Blood Group',
 'Pulse rate(bpm) ',
 'RR (breaths/min)',
 'Hb(g/dl)',
 'Cycle(R/I)',
 'Cycle length(days)',
 'Marraige Status (Yrs)',
 'Pregnant(Y/N)',
 'No. of aborptions',
 'FSH(mIU/mL)',
 'LH(mIU/mL)',
 'Hip(inch)',
 'Waist(inch)',
 'TSH (mIU/L)',
 'AMH(ng/mL)',
 'PRL(ng/mL)',
 'Vit D3 (ng/mL)',
 'PRG(ng/mL)',
 'RBS(mg/dl)',
 'Weight gain(Y/N)',
 'hair growth(Y/N)',
 'Skin darkening (Y/N)',
 'Hair loss(Y/N)',
 'Pimples(Y/N)',
 'Fast food (Y/N)',
 'Reg.Exercise(Y/N)',
 'BP _Systolic (mmHg)',
 'BP _Diastolic (mmHg)',
 'Follicle No. (L)',
 'Follicle No. (R)',
 'Avg. F size (L) (mm)',
 'Avg. F size (R) (mm)',
 'Endometrium (mm)']

In [None]:
!pip install interpret
from interpret.glassbox import ExplainableBoostingClassifier

ebm = ExplainableBoostingClassifier()
ebm.fit(df_Xtrain, ser_ytrain)

# or substitute with LogisticRegression, DecisionTreeClassifier, RuleListClassifier, ...
# EBM supports pandas dataframes, numpy arrays, and handles "string" data natively.



ExplainableBoostingClassifier(binning='quantile', early_stopping_rounds=50,
                              early_stopping_tolerance=0.0001,
                              feature_names=[' Age (yrs)', 'Weight (Kg)',
                                             'Height(Cm) ', 'BMI',
                                             'Blood Group', 'Pulse rate(bpm) ',
                                             'RR (breaths/min)', 'Hb(g/dl)',
                                             'Cycle(R/I)', 'Cycle length(days)',
                                             'Marraige Status (Yrs)',
                                             'Pregnant(Y/N)',
                                             'No. of aborptions', 'FSH(mIU/mL)',
                                             'LH(mIU/mL)', 'Hip(inch)'...
                                             'continuous', 'continuous',
                                             'continuous', 'continuous',
                                             'con

In [None]:
from interpret import show

ebm_global = ebm.explain_global()
show(ebm_global)

  detected_envs


In [None]:
ebm_local = ebm.explain_local(df_Xtest, ytest)
show(ebm_local)

In [None]:
ebm_global = ebm.explain_global(name='EBM')
show(ebm_global)

In [None]:
tree = ClassificationTree()
tree.fit(X_train_enc, y_train)

In [None]:
ebm_local = ebm.explain_local(df_Xtest[:5], ytest[:5], name='EBM')
show(ebm_local)

In [None]:
from interpret.perf import ROC

ebm_perf = ROC(ebm.predict_proba).explain_perf(df_Xtest, ytest, name='EBM')
show(ebm_perf)

In [None]:
from interpret.glassbox import LogisticRegression, ClassificationTree
tree = ClassificationTree()
tree.fit(df_Xtrain, ytrain)

<interpret.glassbox.decisiontree.ClassificationTree at 0x7fec00bca6d8>

In [None]:
tree_perf = ROC(tree.predict_proba).explain_perf(df_Xtest, ytest, name='Classification Tree')
show(tree_perf)

In [None]:
show([hist,  tree_global, tree_perf, ebm_global, ebm_perf], share_tables=True)

In [None]:
from interpret.data import ClassHistogram

hist = ClassHistogram().explain_data(df_Xtrain, ytrain, name = 'Train Data')
show(hist)

In [None]:
from interpret.blackbox import LimeTabular
from interpret import show

In [None]:
from interpret.blackbox import LimeTabular
from interpret import show

#Blackbox explainers need a predict function, and optionally a dataset
lime = LimeTabular(predict_fn=tree.predict_proba, data=df_Xtrain, random_state=1)

#Pick the instances to explain, optionally pass in labels if you have them
lime_local = lime.explain_local(df_Xtest[:5], ytest[:5], name='LIME')

show(lime_local)


In [None]:
from interpret.blackbox import ShapKernel
import numpy as np
features_train = df_Xtrain.columns.to_list()
features_train
background_val = np.median(df_Xtrain, axis=0).reshape(1, -1)
shap = ShapKernel(predict_fn=tree.predict_proba, data=background_val, feature_names=features_train)
shap_local = shap.explain_local(df_Xtest[:5], ytest[:5], name='SHAP')
show(shap_local)


HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




In [None]:
from interpret.blackbox import MorrisSensitivity

sensitivity = MorrisSensitivity(predict_fn=tree.predict_proba, data=df_Xtrain)
sensitivity_global = sensitivity.explain_global(name="Global Sensitivity")

show(sensitivity_global)

In [None]:
from interpret.blackbox import PartialDependence

pdp = PartialDependence(predict_fn=tree.predict_proba, data=df_Xtrain)
pdp_global = pdp.explain_global(name='Partial Dependence')

show(pdp_global)

In [None]:
show(shap_local)