Assess Spaceship Titanic with CredoAI - Lens
============================================

In [1]:
import pandas as pd
import joblib

import warnings
warnings.filterwarnings('ignore')

from credoai.lens import Lens
from credoai.artifacts import ClassificationModel, TabularData
from credoai.evaluators import ModelFairness, Performance

# For this to work, you need to "File / Save and export notebook as... / Executable Script" the notebook
import Spaceship_Titanic_data

(train) Number of rows = 8693 and Number of cols = 14
(test) Number of rows = 4277 and Number of cols = 13


Load trained model
------------------


In [9]:
feature_engineering = Spaceship_Titanic_data.feature_engineering

with open('model.jlb', 'rb') as file:
    model = joblib.load(file)
    
processor = model.steps[0][1]
drop_target = model.steps[1][1]
classifier = model.steps[2][1]

Run a battery of evaluators
---------------------------

Prepare the train data for evaluation

In [13]:
transformed_data = processor.fit_transform(Spaceship_Titanic_data.train_data)
dropped_data = drop_target.fit_transform(transformed_data)

imputed_data = Spaceship_Titanic_data.imputer.fit_transform(
                 Spaceship_Titanic_data.fe_eng.fit_transform(
                   Spaceship_Titanic_data.train_data))
ages = pd.qcut(imputed_data.Age, 10) # Quantize ages in 10 clusters from min age to max age

Wrap both the model and the data in CredoAI - Lens structures

In [4]:
credo_model = ClassificationModel(name="titanic_default_classifier",
                                  model_like=classifier)
credo_data = TabularData(
    name="titanic-default",
    X=dropped_data,
    y=transformed_data.Transported,
    sensitive_features=ages
)

Create a Lens and execute evaluators

In [12]:
lens = Lens(model=credo_model, assessment_data=credo_data)

# Precission score: % of true positives versus false positives https://credoai-lens.readthedocs.io/en/latest/pages/metrics.html#precision-score
# Recall score: probability of positive test if conditions are positive https://credoai-lens.readthedocs.io/en/latest/pages/metrics.html#true-positive-rate

metrics = ['precision_score', 'recall_score', 'equal_opportunity']
lens.add(ModelFairness(metrics=metrics))
lens.add(Performance(metrics=metrics))
lens.run()

2023-04-19 10:52:53,416 - lens - INFO - Evaluator ModelFairness added to pipeline. Sensitive feature: Age
2023-04-19 10:52:53,492 - lens - INFO - Evaluator Performance added to pipeline. 
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.1s finished


<credoai.lens.lens.Lens at 0x7efeeca63880>

In [21]:
fairness_results = lens.get_results(evaluator_name='ModelFairness')[0]

display(fairness_results['results'][0])
display(fairness_results['results'][1])

Unnamed: 0,type,value
0,equal_opportunity,0.197538
0,precision_score_parity,0.090087
1,recall_score_parity,0.197538


Unnamed: 0,Age,type,value
0,"(-0.001, 13.0]",precision_score,0.741531
1,"(13.0, 18.0]",precision_score,0.804067
2,"(18.0, 21.0]",precision_score,0.740648
3,"(21.0, 24.0]",precision_score,0.771208
4,"(24.0, 27.0]",precision_score,0.792593
5,"(27.0, 30.0]",precision_score,0.830735
6,"(30.0, 35.0]",precision_score,0.797674
7,"(35.0, 40.0]",precision_score,0.827298
8,"(40.0, 49.0]",precision_score,0.822323
9,"(49.0, 79.0]",precision_score,0.822581


In [8]:
performance_results = lens.get_results(evaluator_name='Performance')[0]
# first dataframe is overall metrics
display(performance_results['results'][0])
# second dataframe is the long form of the confusion matrix
display(performance_results['results'][1])

Unnamed: 0,type,value
0,precision_score,0.790921
1,recall_score,0.827775


Unnamed: 0,true_label,predicted_label,value
0,False,False,0.777984
1,True,False,0.172225
2,False,True,0.222016
3,True,True,0.827775
