# Notebook for running tests in the `ciu_tests` directory.

This notebook runs tests written in "pure Python" for different datasets. The current test functions can also be used elsewhere as a shortcut for loading the data, training a model, creating a CIU object and getting a test instance. 

Basic imports.

In [1]:
import pandas as pd
import numpy as np
import ciu_tests as ciu_tests

## Iris

In [2]:
from ciu_tests import iris_lda

np.random.seed(24) # We want to always get the same Random Forest model here.
CIU_iris, iris_lda_model, instance = iris_lda.get_iris_test()
CIUres_iris = CIU_iris.explain(instance)
display(CIUres_iris)
CIUres_iris = CIU_iris.explain(instance, output_inds=1)
display(CIUres_iris)
CIUres_iris = CIU_iris.explain(instance, output_inds=2)
display(CIUres_iris)

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
s_length,0.9887112,-4.425673e-31,-0.4943556,setosa,3.25415e-17,s_length,3.25415e-17,0.9887112,[0],[2.0],0.5,,
s_width,1.547326e-09,2.10308e-08,-7.736628e-10,setosa,3.25415e-17,s_width,1.3321279999999999e-24,1.547326e-09,[1],[3.2],0.5,,
p_length,5.536589e-10,5.877536e-08,-2.768294e-10,setosa,3.25415e-17,p_length,3.896831e-79,5.536589e-10,[2],[1.8],0.5,,
p_width,0.999997,2.875276e-17,-0.4999985,setosa,3.25415e-17,p_width,3.78883e-18,0.999997,[3],[2.4],0.5,,


Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
s_length,0.98782,0.813969,0.310145,versicolor,0.816231,s_length,0.01217566,0.999995,[0],[2.0],0.5,,
s_width,0.881239,0.795451,0.260363,versicolor,0.816231,s_width,0.1152488,0.996487,[1],[3.2],0.5,,
p_length,0.999546,0.816601,0.316458,versicolor,0.816231,p_length,3.465495e-17,0.999546,[2],[1.8],0.5,,
p_width,0.999997,0.816231,0.31623,versicolor,0.816231,p_width,3.002736e-06,1.0,[3],[2.4],0.5,,


Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
s_length,0.183769,1.0,0.091885,virginica,0.183769,s_length,1.152639e-11,0.183769,[0],[2.0],0.5,,
s_width,0.881485,0.204771,-0.26024,virginica,0.183769,s_width,0.003266355,0.884751,[1],[3.2],0.5,,
p_length,0.999546,0.183399,-0.316458,virginica,0.183769,p_length,0.0004537308,1.0,[2],[1.8],0.5,,
p_width,0.356344,0.515707,0.005597,virginica,0.183769,p_width,2.65873e-20,0.356344,[3],[2.4],0.5,,


## Boston housing

In [3]:
from ciu_tests import boston_gbm

np.random.seed(26) # We want to always get the same Random Forest model here.
CIU, boston_xgb_model, instance = boston_gbm.get_boston_gbm_test()
CIUres = CIU.explain(instance)
display(CIUres)

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
CRIM,0.029835,1.0,0.014917,Price,18.028278,CRIM,16.685719,18.028278,[0],[0.05735],0.5,,
ZN,0.0,0.0,-0.0,Price,18.028278,ZN,18.028278,18.028278,[1],[0.0],0.5,,
INDUS,0.049869,0.61105,0.005538,Price,18.028278,INDUS,16.657005,18.901133,[2],[4.49],0.5,,
CHAS,0.0,0.0,-0.0,Price,18.028278,CHAS,18.028278,18.028278,[3],[0.0],0.5,,
NOX,0.025628,1.0,0.012814,Price,18.028278,NOX,16.875021,18.028278,[4],[0.449],0.5,,
RM,0.088204,0.278436,-0.019543,Price,18.028278,RM,16.923109,20.892307,[5],[6.63],0.5,,
AGE,0.02741,1.0,0.013705,Price,18.028278,AGE,16.794827,18.028278,[6],[56.1],0.5,,
DIS,0.039394,1.0,0.019697,Price,18.028278,DIS,16.25555,18.028278,[7],[4.4377],0.5,,
RAD,0.00682,1.0,0.00341,Price,18.028278,RAD,17.721357,18.028278,[8],[3.0],0.5,,
TAX,0.006239,1.0,0.00312,Price,18.028278,TAX,17.747513,18.028278,[9],[247.0],0.5,,


## Titanic

In [4]:
from ciu_tests import titanic_rf

np.random.seed(26) # We want to always get the same Random Forest model here.
CIU_titanic, titanic_model, titanic_instance = titanic_rf.get_titanic_rf()
CIUres_titanic = CIU_titanic.explain(titanic_instance)
display(CIUres_titanic)

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
Pclass,0.0,0.0,-0.0,No,0.4,Pclass,0.4,0.4,[0],[1],0.5,,
Sex,0.309333,1.0,0.154667,No,0.4,Sex,0.090667,0.4,[1],[1],0.5,,
Age,0.63,0.047619,-0.285,No,0.4,Age,0.37,1.0,[2],[8],0.5,,
SibSp,0.16,0.1875,-0.05,No,0.4,SibSp,0.37,0.53,[3],[0],0.5,,
Parch,0.12,1.0,0.06,No,0.4,Parch,0.28,0.4,[4],[0],0.5,,
Fare,0.116667,0.292857,-0.024167,No,0.4,Fare,0.365833,0.4825,[5],[72],0.5,,
Embarked,0.03,1.0,0.015,No,0.4,Embarked,0.37,0.4,[6],[1],0.5,,


In [5]:
CIUres_voc_top_titanic = CIU_titanic.explain_voc(titanic_instance, nsamples=1000)
display(CIUres_voc_top_titanic)

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
Wealth,0.26,0.576923,0.02,No,0.4,Wealth,0.25,0.51,"[0, 5]","[1, 72]",0.5,,
Family,0.3,0.566667,0.02,No,0.4,Family,0.23,0.53,"[3, 4]","[0, 0]",0.5,,
Sex,0.309333,1.0,0.154667,No,0.4,Sex,0.090667,0.4,[1],[1],0.5,,
Age,0.63,0.047619,-0.285,No,0.4,Age,0.37,1.0,[2],[8],0.5,,
Embarked,0.03,1.0,0.015,No,0.4,Embarked,0.37,0.4,[6],[1],0.5,,


## Ames housing

In [6]:
from ciu_tests import ames_housing_gbm

np.random.seed(26) # We want to always get the same Random Forest model here.
CIU, ames_xgb_model, ames_instance = ames_housing_gbm.get_ames_gbm_test()
CIUres = CIU.explain(ames_instance)
display(CIUres)

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
Order,0.051470,1.000000,0.025735,Price,733.174316,Order,680.108276,733.174316,[0],[1561],0.5,,
PID,0.021009,0.211230,-0.006067,Price,733.174316,PID,728.598938,750.259583,[1],[2700],0.5,,
MSSubClass,0.011317,1.000000,0.005658,Price,733.174316,MSSubClass,721.506775,733.174316,[2],[11],0.5,,
MSZoning,0.015653,0.000000,-0.007827,Price,733.174316,MSZoning,733.174316,749.312622,[3],[6],0.5,,
LotFrontage,0.038355,0.428175,-0.002755,Price,733.174316,LotFrontage,716.242676,755.786438,[4],[20],0.5,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
MiscVal,0.000000,0.000000,-0.000000,Price,733.174316,MiscVal,733.174316,733.174316,[76],[0],0.5,,
MoSold,0.018343,0.326110,-0.003190,Price,733.174316,MoSold,727.007141,745.918457,[77],[6],0.5,,
YrSold,0.010276,0.976934,0.004901,Price,733.174316,YrSold,722.823730,733.418701,[78],[2],0.5,,
SaleType,0.000273,0.980273,0.000131,Price,733.174316,SaleType,732.898315,733.179871,[79],[9],0.5,,


In [7]:
CIUres_voc_top = CIU.explain_voc(ames_instance, nsamples=1000)
display(CIUres_voc_top)

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
Garage,0.120656,0.798777,0.036049,Price,733.174316,Garage,633.809265,758.20575,"[59, 60, 61, 62, 63, 64, 65]","[1, 103, 0, 2, 224, 5, 5]",0.5,,
Basement,0.174889,0.968412,0.08192,Price,733.174316,Basement,558.559692,738.869995,"[31, 32, 33, 34, 35, 36, 37, 38, 39, 48, 49]","[0, 5, 1, 2, 941, 6, 0, 81, 847, 1, 1]",0.5,,
Lot,0.079071,0.524314,0.001923,Price,733.174316,Lot,690.43103,771.953308,"[3, 4, 7, 8, 9, 10, 11]","[6, 20, 1, 0, 1, 0, 4]",0.5,,
Access,0.074935,0.893491,0.029486,Price,733.174316,Access,664.145142,741.402954,"[13, 14]","[6, 2]",0.5,,
House_type,0.143347,0.897362,0.056961,Price,733.174316,House_type,600.552795,748.343262,"[1, 15, 16, 21]","[2700, 2, 4, 55]",0.5,,
House_aesthetics,0.038237,0.958274,0.017523,Price,733.174316,House_aesthetics,695.396729,734.819275,"[22, 23, 24, 25, 26]","[3, 1, 5, 5, 4]",0.5,,
House_condition,0.504237,0.839262,0.171069,Price,733.174316,House_condition,296.868561,816.736633,"[20, 18, 21, 28, 19, 29]","[111, 6, 55, 2, 4, 4]",0.5,,
Electrical,0.015586,1.0,0.007793,Price,733.174316,Electrical,717.105469,733.174316,[43],[4],0.5,,
GrLivArea,0.117737,0.564275,0.007567,Price,733.174316,GrLivArea,664.679077,786.06543,[47],[711],0.5,,


## Heart disease

The target variable has been restricted to only two classes, which are "no disease" and "disease". The data originally classifies diseases into four diferent classes. 

In [8]:
from ciu_tests import heart_disease_rf

np.random.seed(26) # We want to always get the same Random Forest model here.
inst_ind = 2 # Instance 0 has no disease, instance 2 (for instance) has higher probability of disease than no disease
CIU_hd, hd_model, hd_instance = heart_disease_rf.get_heart_disease_rf(inst_ind)
print(hd_instance)
print(hd_model.predict_proba(hd_instance))
CIUres_hd = CIU_hd.explain(hd_instance)
display(CIUres_hd)
CIUres_hd = CIU_hd.explain(hd_instance, output_inds=1)
display(CIUres_hd)

      age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
106  57.0  1.0  3.0     128.0  229.0  0.0      2.0    150.0    0.0      0.4   

     slope   ca  thal  
106    2.0  1.0   7.0  
[[0.41 0.59]]


Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
age,0.1,0.2,-0.03,No,0.41,age,0.39,0.49,[0],[57.0],0.5,,
sex,0.03,0.0,-0.015,No,0.41,sex,0.41,0.44,[1],[1.0],0.5,,
cp,0.16,1.0,0.08,No,0.41,cp,0.25,0.41,[2],[3.0],0.5,,
trestbps,0.09,0.888889,0.035,No,0.41,trestbps,0.33,0.42,[3],[128.0],0.5,,
chol,0.11,0.181818,-0.035,No,0.41,chol,0.39,0.5,[4],[229.0],0.5,,
fbs,0.07,0.0,-0.035,No,0.41,fbs,0.41,0.48,[5],[0.0],0.5,,
restecg,0.09,0.0,-0.045,No,0.41,restecg,0.41,0.5,[6],[2.0],0.5,,
thalach,0.07,1.0,0.035,No,0.41,thalach,0.34,0.41,[7],[150.0],0.5,,
exang,0.07,0.0,-0.035,No,0.41,exang,0.41,0.48,[8],[0.0],0.5,,
oldpeak,0.25,0.92,0.105,No,0.41,oldpeak,0.18,0.43,[9],[0.4],0.5,,


Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
age,0.1,0.8,0.03,Yes,0.59,age,0.51,0.61,[0],[57.0],0.5,,
sex,0.03,1.0,0.015,Yes,0.59,sex,0.56,0.59,[1],[1.0],0.5,,
cp,0.16,0.0,-0.08,Yes,0.59,cp,0.59,0.75,[2],[3.0],0.5,,
trestbps,0.09,0.111111,-0.035,Yes,0.59,trestbps,0.58,0.67,[3],[128.0],0.5,,
chol,0.12,0.833333,0.04,Yes,0.59,chol,0.49,0.61,[4],[229.0],0.5,,
fbs,0.07,1.0,0.035,Yes,0.59,fbs,0.52,0.59,[5],[0.0],0.5,,
restecg,0.09,1.0,0.045,Yes,0.59,restecg,0.5,0.59,[6],[2.0],0.5,,
thalach,0.07,0.0,-0.035,Yes,0.59,thalach,0.59,0.66,[7],[150.0],0.5,,
exang,0.07,1.0,0.035,Yes,0.59,exang,0.52,0.59,[8],[0.0],0.5,,
oldpeak,0.25,0.08,-0.105,Yes,0.59,oldpeak,0.57,0.82,[9],[0.4],0.5,,
