In [3]:
import pandas as pd
import numpy as np
import pickle

# transparentai package : https://github.com/Nathanlauga/transparentai
from transparentai.datasets import load_adult, load_boston

# transparentai.__SAVEPLOT__ = True

In [6]:
with open('model.pkl', 'rb') as file:
    model = pickle.load(file)
file.close()

In [12]:
training_metrics = {
    'performance':{
        'accuracy':0.0,
        'f1':0.0
    },
    'bias':{
        'protected_attr':{
            
        }
        'dataset':{

        },
        'model':{

        }
    }
}

bad_behavior = {
    'same'
}

new_data = {
    'X':None,
    'y_pred':None,
    'timestamp':None,
    'y_real':None
}

In [11]:
training_metrics

{'performance': {'accuracy': 0.0, 'f1': 0.0},
 'bias': {'dataset': {}, 'model': {}}}

# Monitoring

I need to answer the following questions :
- Is my model doing well since training ?
- Are new data biased ?

Checklist new data : 
1. **New X ==> shape (m,n)** [MANDATORY]
2. **predictions ==> shape (m,1)** [MANDATORY]
3. [optional] y_real ==> shape (m,1) [RECOMANDED]
5. Define protected_attr if not retrieve from training [RECOMANDED]
6. [optional] Define bad_behavior for alert

```
[1] Dataset Bias (with/without comparison) [need protected_attr]
[2a] Model Bias without y_real (with/without comparison) [need protected_attr]
[2b] Model (with/without comparison) [need protected_attr]
[3] perf (with/without comparison) 
[4] perf evolution 
```

# I have originals metrics

1. I don't have timestamp and y_real ==> [1][2a] with comparison 
2. I have only y_real ==> [1][2b][3] with comparison 
3. I have both ==> [1][2b][3][4] with comparison 


# I don't have originals metrics but original X_test & y_test

Autogenerate training metrics (perf & bias)

1. I don't have timestamp and y_real ==> [1][2a] with comparison 
2. I have only y_real ==> [1][2b][3] with comparison 
3. I have both ==> [1][2b][3][4] with comparison 


# I don't have originals metrics and not original X_test & y_test

1. I don't have timestamp and y_real ==> [1][2a] without comparison 
2. I have only y_real ==> [1][2b][3] without comparison 
3. I have both ==> [1][2b][3][4] without comparison 


In [1]:
import numpy as np
import pandas as pd
import warnings

from transparentai.models import ClassificationModel, RegressionModel
from transparentai.datasets import StructuredDataset
from transparentai.fairness import DatasetBiasMetric, ModelBiasMetric

class Monitoring():
    """
    """
    
    def __init__(self, X, y_preds, y_real=None, model_type='classification',
                 orig_metrics=None, privileged_groups=None, alert_threshold=None):
        """
        """
        if model_type not in ['classification','regression']:
            raise ValueError('Only regression and classification are handled  for model_type.')
        if type(X) not in [pd.DataFrame]:
            raise TypeError('X has to be a pandas dataframe')
        if len(X) != len(y_preds):
            raise ValueError('y_preds and X must have the same length')
        if y_real is not None:
            if len(X) != len(y_real):
                raise ValueError('y_real and X must have the same length')
        if orig_metrics is not None:
            metrics = ['performance','bias_dataset','bias_model']
        
        self.X = X
        self.y_preds = y_preds
        self.y_real = y_real
        self.model_type = model_type
        self.orig_metrics = orig_metrics
        self.privileged_groups = privileged_groups
        self.alert_threshold = alert_threshold
        
        df = X.copy()
        df['target'] = y_preds if y_real is None else y_real
        self.dataset = StructuredDataset(df=df, target='target')
        
        self._compute_new_metrics()
        
        if orig_metrics is not None:
            self._check_orig_and_new_metrics()
        
        
    def compute_orig_metrics(self, X_orig, y_orig):
        """
        Only if you don't have original metrics already stored
        """
        # Todo
        orig_metrics = {}
        
        self.orig_metrics = orig_metrics
        
    def _compute_new_metrics(self):
        """
        """
        new_metrics = {}
        # handle only 2 first ?
#         model_bias_metrics = ['Disparate impact', 'Statistical parity difference']
#         model_bias_metrics += ['Equal opportunity difference', 'Average abs odds difference', 'Theil index']
        
        # I have y_real ==> compute model perf & define model bias to compute
        if (self.y_real is not None):
            if self.model_type == 'classification':
                model_obj = ClassificationModel
            elif self.model_type == 'regression':
                model_obj = RegressionModel
                
            model = model_obj(X=self.X, y=self.y_real, y_preds=y_preds)
            new_metrics['performance'] = model.scores_to_json()
            
        
        # I have protected attr ==> compute dataset bias & model bias
        if (self.privileged_groups is not None):
            
            bias = DatasetBiasMetric(self.dataset, privileged_groups)
            new_metrics['bias_dataset'] = bias.metrics_to_json()
            
            if self.y_real is not None:
                bias = ModelBiasMetric(self.dataset, self.y_preds, self.privileged_groups)
                new_metrics['bias_model'] = bias.metrics_to_json()
            
        self.new_metrics = new_metrics
        
    def _check_orig_and_new_metrics(self):
        """
        """
        # Check performance
        
        
#         else:
        # WARNING only n metrics can be compare
        print()

    def plot_perfomance(self):
        """
        """
        if self.orig_metrics is None:
            print()
            # plot_bar_performance(self.new_metrics, alert_threshold=alert_threshold)
        else:
#             plot_bar_performance_comparison(self.new_metrics, 
#                                             self.orig_metrics,
#                                             alert_threshold=alert_threshold)
            print()
    
    def plot_bias(self):
        """
        """
        if self.orig_metrics is None:
            print()
            # plot_gauge_bias(self.new_metrics, alert_threshold=alert_threshold)
        else:
#             plot_gauge_bias_comparison(self.new_metrics, 
#                                        self.orig_metrics,
#                                        alert_threshold=alert_threshold)
            print()

    def insight(self):
        """
        """
        # TODO
        print()
        
    

In [2]:
from sklearn.linear_model import LinearRegression

from transparentai.datasets import load_adult, load_boston

boston = load_boston()
boston['age category'] = np.where(boston['AGE'] < 26, 0,
                                 np.where(boston['AGE'] < 61, 1, 2))
target='MEDV'
privileged_groups = {
    'age category': [1]
}          

data = boston.copy()
# data['age category'] = data['age category'].replace({'Young':0, 'Adult':1, 'Elder':2})

X_reg, y_reg = data.drop(columns=target), data[target]
reg = LinearRegression().fit(X_reg, y_reg)
y_preds = reg.predict(X_reg)

dataset = StructuredDataset(df=boston, target='MEDV')
dataset_bias = DatasetBiasMetric(dataset=dataset, privileged_groups=privileged_groups)
model = RegressionModel(model=reg)
model.compute_scores(X=X_reg, y=y_reg)
model_bias = ModelBiasMetric(dataset=dataset, preds=y_preds,
                             privileged_groups=privileged_groups)

metrics_bias_data = dataset_bias.metrics_to_json()
metrics_bias_model = model_bias.metrics_to_json()
metrics_performance = model.scores_to_json()

orig_metrics = {
    'performance':metrics_performance,
    'bias_dataset':metrics_bias_data,
    'bias_model':metrics_bias_model
}

new_X = X_reg.sample(50)
new_y = y_reg.loc[new_X.index]
y_preds = reg.predict(new_X)

alert_threshold = {
    'MAE':5.,
    'MSE':50.,
    'RMSE':45.,
    'R2':0.6
}

test = Monitoring(X=new_X, y_preds=y_preds, y_real=new_y, privileged_groups=privileged_groups,
                  model_type='regression', alert_threshold=alert_threshold)
test.new_metrics

{'performance': {'MAE': 3.5300412243731283,
  'MSE': 27.840428958079606,
  'RMSE': 27.840428958079606,
  'R2': 0.6077531114662875},
 'bias_dataset': {'age category': {'<=22.68': '{"Disparate impact":3.6315789474,"Statistical parity difference":0.4385964912}',
   '>22.68': '{"Disparate impact":0.4736842105,"Statistical parity difference":-0.4385964912}'}},
 'bias_model': {'age category': {'<=22.68': '{"Disparate impact":2.1052631579,"Statistical parity difference":0.2763157895,"Equal opportunity difference":-0.2173913043,"Average abs odds difference":0.1253623188,"Theil index":0.1274653921}',
   '>22.68': '{"Disparate impact":0.6315789474,"Statistical parity difference":-0.2763157895,"Equal opportunity difference":-0.0333333333,"Average abs odds difference":0.1253623188,"Theil index":0.0940768216}'}}}

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from transparentai.utils import encode_categorical_vars, labelencoder_to_dict

adult = load_adult()
adult['age category'] = np.where(adult['age'] < 26, 'Young',
                                 np.where(adult['age'] < 61, 'Adult','Elder'))
target='income'
privileged_groups = {
    'age category': ['Adult'],
    'marital-status': ['Married-civ-spouse','Married-AF-spouse'],
    'race': ['White'],
    'gender': ['Male']
}   

target_value = {'>50K':1, '<=50K':0}
adult[target] = adult[target].replace(target_value)
adult, encoders = encode_categorical_vars(adult)
X, y = adult.drop(columns=target), adult[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
clf = RandomForestClassifier(random_state=0)
clf.fit(X_train, y_train)


def decode_data(X):
    df = X.copy()
    for feature, encoder in encoders.items():
        df[feature] = encoder.inverse_transform(df[feature])
    return df 

X = decode_data(X_test)

df = X.copy()
target_value = {0:'<=50K', 1:'>50K'}
df['income'] = y_test.replace(target_value)
dataset = StructuredDataset(df=df,target='income')

favorable_label = '>50K'
dataset_bias = DatasetBiasMetric(dataset=dataset, privileged_groups=privileged_groups,
                                 favorable_label=favorable_label)

model = ClassificationModel(model=clf)
model.compute_scores(X=X_test, y=y_test)

preds = pd.Series(clf.predict(X_test)).replace(target_value)
model_bias = ModelBiasMetric(dataset=dataset, preds=preds,
                             privileged_groups=privileged_groups,
                             favorable_label=favorable_label)

metrics_bias_data = dataset_bias.metrics_to_json()
metrics_bias_model = model_bias.metrics_to_json()
metrics_performance = model.scores_to_json()

orig_metrics = {
    'performance':metrics_performance,
    'bias_dataset':metrics_bias_data,
    'bias_model':metrics_bias_model
}


sample = X_test.sample(50)
X = decode_data(sample)

new_X = X
new_y = y_test.loc[new_X.index].replace(target_value)
y_preds = pd.Series(clf.predict(sample)).replace(target_value)

alert_threshold = {
}

test = Monitoring(X=new_X, y_preds=y_preds, y_real=new_y, privileged_groups=privileged_groups,
                  model_type='classification')
test.new_metrics

{'performance': {'accuracy': 0.84,
  'f1': 0.84,
  'precision': 0.84,
  'recall': 0.84},
 'bias_dataset': {'age category': {'<=50K': '{"Disparate impact":0.9807692308,"Statistical parity difference":-0.0147058824}',
   '>50K': '{"Disparate impact":1.0625,"Statistical parity difference":0.0147058824}'},
  'marital-status': {'<=50K': '{"Disparate impact":2.2983870968,"Statistical parity difference":0.5466893039}',
   '>50K': '{"Disparate impact":0.0557184751,"Statistical parity difference":-0.5466893039}'},
  'race': {'<=50K': '{"Disparate impact":1.0588235294,"Statistical parity difference":0.0444444444}',
   '>50K': '{"Disparate impact":0.8181818182,"Statistical parity difference":-0.0444444444}'},
  'gender': {'<=50K': '{"Disparate impact":1.4684210526,"Statistical parity difference":0.3022071307}',
   '>50K': '{"Disparate impact":0.1483253589,"Statistical parity difference":-0.3022071307}'}},
 'bias_model': {'age category': {'<=50K': '{"Disparate impact":1.105,"Statistical parity dif

In [4]:
orig_metrics

{'performance': {'accuracy': 0.864313190222112,
  'f1': 0.8609861500652922,
  'precision': 0.8597211166057749,
  'recall': 0.864313190222112,
  'roc_auc': [0.9104387547348203]},
 'bias_dataset': {'age category': {'>50K': '{"Disparate impact":0.2513138549,"Statistical parity difference":-0.2218542129}'},
  'marital-status': {'>50K': '{"Disparate impact":0.1419849266,"Statistical parity difference":-0.382217429}'},
  'race': {'>50K': '{"Disparate impact":0.5474878574,"Statistical parity difference":-0.1143289004}'},
  'gender': {'>50K': '{"Disparate impact":0.3544886573,"Statistical parity difference":-0.1933251261}'}},
 'bias_model': {'age category': {'>50K': '{"Disparate impact":0.2399307681,"Statistical parity difference":-0.1926260559,"Equal opportunity difference":-0.0866979077,"Average abs odds difference":0.0770036441,"Theil index":0.1077434355}'},
  'marital-status': {'>50K': '{"Disparate impact":0.0794715624,"Statistical parity difference":-0.373522116,"Equal opportunity differe

In [6]:
t = np.array([1,2,2,2])
e = np.array([2,2,2,3])
matrix = pd.crosstab(t, e)

print(matrix)

for val in matrix.columns.values:
    if val not in matrix.index.values:
        matrix.loc[val, :] = 0
        
for val in matrix.index.values:
    if val not in matrix.columns.values:
        matrix.loc[:, val] = 0
        
matrix

col_0  2  3
row_0      
1      1  0
2      2  1


col_0,2,3,1
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1.0,0.0,0
2,2.0,1.0,0
3,0.0,0.0,0
