# Notebook for running tests in the `ciu_tests` directory.

This notebook runs tests written in "pure Python" for different datasets. The current test functions can also be used elsewhere as a shortcut for loading the data, training a model, creating a CIU object and getting a test instance. 

Basic imports.

In [1]:
import pandas as pd
import numpy as np
import ciu_tests as ciu_tests

## Iris

In [2]:
from ciu_tests import iris_lda

np.random.seed(24) # We want to always get the same Random Forest model here.
CIU_iris, iris_lda_model, instance = iris_lda.get_iris_test()
CIUres_iris = CIU_iris.explain(instance)
display(CIUres_iris)
CIUres_iris = CIU_iris.explain(instance, output_inds=1)
display(CIUres_iris)
CIUres_iris = CIU_iris.explain(instance, output_inds=2)
display(CIUres_iris)

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
s_length,0.9887474,2.119259e-31,-0.4943737,setosa,3.25415e-17,s_length,3.25415e-17,0.9887474,[0],[2.0],0.5,,
s_width,2.782717e-09,1.169415e-08,-1.391358e-09,setosa,3.25415e-17,s_width,1.3321279999999999e-24,2.782717e-09,[1],[3.2],0.5,,
p_length,5.536589e-10,5.877536e-08,-2.768294e-10,setosa,3.25415e-17,p_length,3.381699e-79,5.536589e-10,[2],[1.8],0.5,,
p_width,0.999997,3.015588e-17,-0.4999985,setosa,3.25415e-17,p_width,2.385707e-18,0.999997,[3],[2.4],0.5,,


Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
s_length,0.988743,0.814143,0.310607,versicolor,0.816231,s_length,0.0112526,0.999995,[0],[2.0],0.5,,
s_width,0.881502,0.795213,0.260231,versicolor,0.816231,s_width,0.1152488,0.996751,[1],[3.2],0.5,,
p_length,0.999546,0.816601,0.316458,versicolor,0.816231,p_length,2.910413e-17,0.999546,[2],[1.8],0.5,,
p_width,0.999997,0.816231,0.31623,versicolor,0.816231,p_width,3.002736e-06,1.0,[3],[2.4],0.5,,


Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
s_length,0.183769,1.0,0.091885,virginica,0.183769,s_length,8.883252e-12,0.183769,[0],[2.0],0.5,,
s_width,0.881502,0.204787,-0.260231,virginica,0.183769,s_width,0.003249199,0.884751,[1],[3.2],0.5,,
p_length,0.999546,0.183399,-0.316458,virginica,0.183769,p_length,0.0004537308,1.0,[2],[1.8],0.5,,
p_width,0.462824,0.397061,-0.047643,virginica,0.183769,p_width,2.65873e-20,0.462824,[3],[2.4],0.5,,


## Boston housing

In [3]:
from ciu_tests import boston_gbm

np.random.seed(26) # We want to always get the same Random Forest model here.
CIU, boston_xgb_model, instance = boston_gbm.get_boston_gbm_test()
CIUres = CIU.explain(instance)
display(CIUres)

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
CRIM,0.001405,0.0,-0.000702,Price,25.574661,CRIM,25.574661,25.637869,[0],[0.05735],0.5,,
ZN,0.021048,0.0,-0.010524,Price,25.574661,ZN,25.574661,26.521812,[1],[0.0],0.5,,
INDUS,0.046109,0.138218,-0.016681,Price,25.574661,INDUS,25.28787,27.362782,[2],[4.49],0.5,,
CHAS,0.0,0.0,-0.0,Price,25.574661,CHAS,25.574661,25.574661,[3],[0.0],0.5,,
NOX,0.02534,1.0,0.01267,Price,25.574661,NOX,24.434345,25.574661,[4],[0.449],0.5,,
RM,0.074985,0.259624,-0.018025,Price,25.574661,RM,24.698601,28.072943,[5],[6.63],0.5,,
AGE,0.013894,1.0,0.006947,Price,25.574661,AGE,24.949446,25.574661,[6],[56.1],0.5,,
DIS,0.021946,0.568736,0.001509,Price,25.574661,DIS,25.012987,26.00057,[7],[4.4377],0.5,,
RAD,0.0,0.0,-0.0,Price,25.574661,RAD,25.574661,25.574661,[8],[3.0],0.5,,
TAX,0.034447,1.0,0.017224,Price,25.574661,TAX,24.024534,25.574661,[9],[247.0],0.5,,


## Titanic

In [4]:
from ciu_tests import titanic_rf

np.random.seed(26) # We want to always get the same Random Forest model here.
CIU_titanic, titanic_model, titanic_instance = titanic_rf.get_titanic_rf()
CIUres_titanic = CIU_titanic.explain(titanic_instance)
display(CIUres_titanic)

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
Pclass,0.0,0.0,-0.0,No,0.4,Pclass,0.4,0.4,[0],[1.0],0.5,,
Sex,0.309333,1.0,0.154667,No,0.4,Sex,0.090667,0.4,[1],[1.0],0.5,,
Age,0.63,0.047619,-0.285,No,0.4,Age,0.37,1.0,[2],[8.0],0.5,,
SibSp,0.16,0.1875,-0.05,No,0.4,SibSp,0.37,0.53,[3],[0.0],0.5,,
Parch,0.12,1.0,0.06,No,0.4,Parch,0.28,0.4,[4],[0.0],0.5,,
Fare,0.116667,0.292857,-0.024167,No,0.4,Fare,0.365833,0.4825,[5],[72.0],0.5,,
Embarked,0.03,1.0,0.015,No,0.4,Embarked,0.37,0.4,[6],[1.0],0.5,,


In [5]:
CIUres_voc_top_titanic = CIU_titanic.explain_voc(titanic_instance, nsamples=1000)
display(CIUres_voc_top_titanic)

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
Wealth,0.26,0.576923,0.02,No,0.4,Wealth,0.25,0.51,"[0, 5]","[1.0, 72.0]",0.5,,
Family,0.3,0.566667,0.02,No,0.4,Family,0.23,0.53,"[3, 4]","[0.0, 0.0]",0.5,,
Sex,0.309333,1.0,0.154667,No,0.4,Sex,0.090667,0.4,[1],[1.0],0.5,,
Age,0.63,0.047619,-0.285,No,0.4,Age,0.37,1.0,[2],[8.0],0.5,,
Embarked,0.03,1.0,0.015,No,0.4,Embarked,0.37,0.4,[6],[1.0],0.5,,


## Ames housing

In [6]:
from ciu_tests import ames_housing_gbm

np.random.seed(26) # We want to always get the same Random Forest model here.
CIU, ames_xgb_model, ames_instance = ames_housing_gbm.get_ames_gbm_test()
CIUres = CIU.explain(ames_instance)
display(CIUres)

1     2929.000000
2      901.941476
3     1521.297643
4     2250.343851
         ...     
94    1793.264166
95      39.343832
96    1119.645553
97    1990.607704
98    2675.715373
Name: 0, Length: 99, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  samples.iloc[:,numeric_indices] = numvals
1     2929.000000
2      752.835823
3      303.764511
4     2442.366993
         ...     
94    1742.420851
95     349.078810
96     388.215255
97     587.315982
98    2206.380098
Name: 0, Length: 99, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  samples.iloc[:,numeric_indices] = numvals
1     15.000000
2      1.763509
3      8.669379
4      6.910103
        ...    
94     2.228925
95     9.374908
96     7.223115
97    12.102343
98     0.814329
Name: 0, Length: 99, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  samples.iloc[:,num

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
Order,0.031089,0.975402,0.014780,Price,740.222046,Order,708.957336,741.010498,[0],[1561],0.5,,
PID,0.030742,0.807310,0.009447,Price,740.222046,PID,714.634338,746.329346,[1],[2700],0.5,,
MSSubClass,0.000000,0.000000,-0.000000,Price,740.222046,MSSubClass,740.222046,740.222046,[2],[11],0.5,,
MSZoning,0.003970,1.000000,0.001985,Price,740.222046,MSZoning,736.128540,740.222046,[3],[6],0.5,,
LotFrontage,0.035820,0.737216,0.008497,Price,740.222046,LotFrontage,712.996338,749.926758,[4],[20],0.5,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
MiscVal,0.000339,1.000000,0.000169,Price,740.222046,MiscVal,739.872620,740.222046,[76],[0],0.5,,
MoSold,0.022351,0.537845,0.000846,Price,740.222046,MoSold,727.828247,750.871704,[77],[6],0.5,,
YrSold,0.010858,1.000000,0.005429,Price,740.222046,YrSold,729.027527,740.222046,[78],[2],0.5,,
SaleType,0.042302,1.000000,0.021151,Price,740.222046,SaleType,696.609192,740.222046,[79],[9],0.5,,


In [7]:
CIUres_voc_top = CIU.explain_voc(ames_instance, nsamples=1000)
display(CIUres_voc_top)

1      0.000000
2      0.000000
3      0.000000
4      0.000000
         ...   
994    2.345969
995    5.298054
996    4.315336
997    4.131902
998    0.636644
Name: 0, Length: 999, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  samples.iloc[:,numeric_indices] = numvals
1       0.000000
2       0.000000
3       0.000000
4       0.000000
         ...    
994    46.944248
995    36.545344
996    68.060531
997    11.452274
998    44.734004
Name: 1, Length: 999, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  samples.iloc[:,numeric_indices] = numvals
1      0.000000
2      0.000000
3      0.000000
4      0.000000
         ...   
994    2.161661
995    2.997343
996    1.388873
997    0.253652
998    0.085650
Name: 2, Length: 999, dtype: float64' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  samples.iloc[:,numeric_indices] = numvals
1  

Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
Garage,0.064993,0.822862,0.020984,Price,740.222046,Garage,685.084045,752.091675,"[59, 60, 61, 62, 63, 64, 65]","[1, 103, 0, 2, 224, 5, 5]",0.5,,
Basement,0.199268,1.0,0.099634,Price,740.222046,Basement,534.776672,740.222046,"[31, 32, 33, 34, 35, 36, 37, 38, 39, 48, 49]","[0, 5, 1, 2, 941, 6, 0, 81, 847, 1, 1]",0.5,,
Lot,0.062837,0.798825,0.018777,Price,740.222046,Lot,688.470215,753.255127,"[3, 4, 7, 8, 9, 10, 11]","[6, 20, 1, 0, 1, 0, 4]",0.5,,
Access,0.066235,0.998431,0.033013,Price,740.222046,Access,672.041138,740.329163,"[13, 14]","[6, 2]",0.5,,
House_type,0.071751,0.700295,0.014371,Price,740.222046,House_type,688.417786,762.3927,"[1, 15, 16, 21]","[2700, 2, 4, 55]",0.5,,
House_aesthetics,0.056205,0.93865,0.024654,Price,740.222046,House_aesthetics,685.829773,743.7771,"[22, 23, 24, 25, 26]","[3, 1, 5, 5, 4]",0.5,,
House_condition,0.445799,0.770961,0.120794,Price,740.222046,House_condition,385.873962,845.492798,"[20, 18, 21, 28, 19, 29]","[111, 6, 55, 2, 4, 4]",0.5,,
Electrical,0.000295,1.0,0.000147,Price,740.222046,Electrical,739.918396,740.222046,[43],[4],0.5,,
GrLivArea,0.149149,0.894286,0.058807,Price,740.222046,GrLivArea,602.7052,756.477905,[47],[711],0.5,,


## Heart disease

The target variable has been restricted to only two classes, which are "no disease" and "disease". The data originally classifies diseases into four diferent classes. 

In [8]:
from ciu_tests import heart_disease_rf

np.random.seed(26) # We want to always get the same Random Forest model here.
inst_ind = 2 # Instance 0 has no disease, instance 2 (for instance) has higher probability of disease than no disease
CIU_hd, hd_model, hd_instance = heart_disease_rf.get_heart_disease_rf(inst_ind)
print(hd_instance)
print(hd_model.predict_proba(hd_instance))
CIUres_hd = CIU_hd.explain(hd_instance)
display(CIUres_hd)
CIUres_hd = CIU_hd.explain(hd_instance, output_inds=1)
display(CIUres_hd)

      age  sex   cp  trestbps   chol  fbs  restecg  thalach  exang  oldpeak  \
106  57.0  1.0  3.0     128.0  229.0  0.0      2.0    150.0    0.0      0.4   

     slope   ca  thal  
106    2.0  1.0   7.0  
[[0.41 0.59]]


Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
age,0.1,0.2,-0.03,No,0.41,age,0.39,0.49,[0],[57.0],0.5,,
sex,0.03,0.0,-0.015,No,0.41,sex,0.41,0.44,[1],[1.0],0.5,,
cp,0.16,1.0,0.08,No,0.41,cp,0.25,0.41,[2],[3.0],0.5,,
trestbps,0.09,0.888889,0.035,No,0.41,trestbps,0.33,0.42,[3],[128.0],0.5,,
chol,0.11,0.181818,-0.035,No,0.41,chol,0.39,0.5,[4],[229.0],0.5,,
fbs,0.07,0.0,-0.035,No,0.41,fbs,0.41,0.48,[5],[0.0],0.5,,
restecg,0.09,0.0,-0.045,No,0.41,restecg,0.41,0.5,[6],[2.0],0.5,,
thalach,0.07,1.0,0.035,No,0.41,thalach,0.34,0.41,[7],[150.0],0.5,,
exang,0.07,0.0,-0.035,No,0.41,exang,0.41,0.48,[8],[0.0],0.5,,
oldpeak,0.25,0.92,0.105,No,0.41,oldpeak,0.18,0.43,[9],[0.4],0.5,,


Unnamed: 0,CI,CU,Cinfl,outname,outval,feature,ymin,ymax,inputs,invals,neutralCU,target_concept,target_inputs
age,0.1,0.8,0.03,Yes,0.59,age,0.51,0.61,[0],[57.0],0.5,,
sex,0.03,1.0,0.015,Yes,0.59,sex,0.56,0.59,[1],[1.0],0.5,,
cp,0.16,0.0,-0.08,Yes,0.59,cp,0.59,0.75,[2],[3.0],0.5,,
trestbps,0.09,0.111111,-0.035,Yes,0.59,trestbps,0.58,0.67,[3],[128.0],0.5,,
chol,0.12,0.833333,0.04,Yes,0.59,chol,0.49,0.61,[4],[229.0],0.5,,
fbs,0.07,1.0,0.035,Yes,0.59,fbs,0.52,0.59,[5],[0.0],0.5,,
restecg,0.09,1.0,0.045,Yes,0.59,restecg,0.5,0.59,[6],[2.0],0.5,,
thalach,0.07,0.0,-0.035,Yes,0.59,thalach,0.59,0.66,[7],[150.0],0.5,,
exang,0.07,1.0,0.035,Yes,0.59,exang,0.52,0.59,[8],[0.0],0.5,,
oldpeak,0.25,0.08,-0.105,Yes,0.59,oldpeak,0.57,0.82,[9],[0.4],0.5,,
