In [1]:
from counterfactuals.explainers import Fimap
from counterfactuals.constraints import ValueMonotonicity, ValueNominal, Freeze
from data import AdultData
from tensorflow import keras
import random
import numpy as np
import tensorflow as tf
import os
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

def reset_random_seeds(seed=42):
   os.environ['PYTHONHASHSEED']=str(seed)
   tf.random.set_seed(seed)
   np.random.seed(seed)
   random.seed(seed)

reset_random_seeds()

In [2]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

adult_data = AdultData('data/datasets/adult.csv')


rf=RandomForestClassifier(n_estimators=10)
rf.fit(adult_data.X_train, adult_data.y_train)
print(classification_report(adult_data.y_test, rf.predict(adult_data.X_test)))
model = rf

              precision    recall  f1-score   support

           0       0.90      0.75      0.82      4533
           1       0.50      0.76      0.60      1500

    accuracy                           0.75      6033
   macro avg       0.70      0.75      0.71      6033
weighted avg       0.80      0.75      0.77      6033



In [3]:
"""
adult_data = AdultData('data/datasets/adult.csv')
model = keras.models.load_model('models/model_adult')
model_predictions = model.predict(adult_data.X_train)
model_predictions[model_predictions > 0.5] = 1
model_predictions[model_predictions <= 0.5] = 0
"""

"\nadult_data = AdultData('data/datasets/adult.csv')\nmodel = keras.models.load_model('models/model_adult')\nmodel_predictions = model.predict(adult_data.X_train)\nmodel_predictions[model_predictions > 0.5] = 1\nmodel_predictions[model_predictions <= 0.5] = 0\n"

In [4]:
train_X, test_X, train_y, test_y = adult_data.raw_X_train, adult_data.raw_X_test, adult_data.raw_y_train, adult_data.raw_y_test
model_predictions = rf.predict(adult_data.X_train)

In [5]:
def compare(i, fimap):
    original_class = model.predict(adult_data.X_train.iloc[i].to_frame().T)[0]
    x = test_X.iloc[i]
    cf = fimap.generate(x)
    x["income"] = original_class
    surrogate_class = fimap._s_prediction
    cf["income"] = surrogate_class
    return pd.concat([x.to_frame().T, cf])

In [6]:
    
fimap = Fimap(constraints=[ValueNominal(adult_data.categorical_columns)])
fimap.fit(adult_data.raw_X_train, model_predictions, epochs=400)

dfs = [compare(i, fimap) for i in range(200)]
concat_df = pd.concat(dfs)
concat_df.to_csv("results_nominal_constraints.csv")


Training s
Training loss (for one batch): 0.5308 
Training accuracy 0.69288886
Training loss (for one batch): 0.5038 
Training accuracy 0.72575366
Training loss (for one batch): 0.4884 
Training accuracy 0.7407558
Training loss (for one batch): 0.4828 
Training accuracy 0.7497989
Training loss (for one batch): 0.4786 
Training accuracy 0.7558369
Training loss (for one batch): 0.4717 
Training accuracy 0.76061434
Training loss (for one batch): 0.4660 
Training accuracy 0.76433724
Training loss (for one batch): 0.4639 
Training accuracy 0.76759064
Training loss (for one batch): 0.4562 
Training accuracy 0.7704118
Training loss (for one batch): 0.4549 
Training accuracy 0.77283984

Training g
Training loss (for one batch): 1.1825 
Training accuracy 0.47075087
Training loss (for one batch): 1.1029 
Training accuracy 0.48439524
Training loss (for one batch): 1.0474 
Training accuracy 0.49620244
Training loss (for one batch): 1.0273 
Training accuracy 0.5114921
Training loss (for one batch)

In [7]:
for constraint in adult_data.constraints:
    print(constraint)
    
fimap = Fimap(constraints=adult_data.constraints)
fimap.fit(adult_data.raw_X_train, model_predictions, epochs=400)


dfs = [compare(i, fimap) for i in range(200)]
concat_df = pd.concat(dfs)
concat_df.to_csv("results_normal_constraints.csv")

ValueNominal(columns=['workclass', 'marital.status', 'occupation', 'race', 'sex'], values=[])
Freeze(columns=['race', 'sex'])

Training s
Training loss (for one batch): 0.5283 
Training accuracy 0.70511377
Training loss (for one batch): 0.4976 
Training accuracy 0.7348483
Training loss (for one batch): 0.4825 
Training accuracy 0.7484742
Training loss (for one batch): 0.4752 
Training accuracy 0.7566303
Training loss (for one batch): 0.4687 
Training accuracy 0.7624265
Training loss (for one batch): 0.4623 
Training accuracy 0.76677454
Training loss (for one batch): 0.4576 
Training accuracy 0.7703625
Training loss (for one batch): 0.4549 
Training accuracy 0.77332395
Training loss (for one batch): 0.4504 
Training accuracy 0.77585846
Training loss (for one batch): 0.4482 
Training accuracy 0.77807933

Training g
Training loss (for one batch): 1.2467 
Training accuracy 0.45516923
Training loss (for one batch): 1.1159 
Training accuracy 0.47285
Training loss (for one batch): 1.0666 
Tra

In [8]:
for constraint in adult_data.constraints:
    print(constraint)
    
fimap = Fimap(constraints=adult_data.constraints + [ValueMonotonicity(['age'], 'increasing')])
fimap.fit(adult_data.raw_X_train, model_predictions, epochs=400)


dfs = [compare(i, fimap) for i in range(200)]
concat_df = pd.concat(dfs)
concat_df.to_csv("results_normal_constraints_age_increasing.csv")

ValueNominal(columns=['workclass', 'marital.status', 'occupation', 'race', 'sex'], values=[])
Freeze(columns=['race', 'sex'])

Training s
Training loss (for one batch): 0.5215 
Training accuracy 0.681376
Training loss (for one batch): 0.4931 
Training accuracy 0.7238441
Training loss (for one batch): 0.4800 
Training accuracy 0.7419364
Training loss (for one batch): 0.4747 
Training accuracy 0.7520714
Training loss (for one batch): 0.4687 
Training accuracy 0.75899947
Training loss (for one batch): 0.4645 
Training accuracy 0.76409745
Training loss (for one batch): 0.4599 
Training accuracy 0.76818544
Training loss (for one batch): 0.4550 
Training accuracy 0.7716432
Training loss (for one batch): 0.4491 
Training accuracy 0.7744878
Training loss (for one batch): 0.4467 
Training accuracy 0.77689105

Training g
Training loss (for one batch): 1.1825 
Training accuracy 0.4538099
Training loss (for one batch): 1.0890 
Training accuracy 0.47320604
Training loss (for one batch): 1.0952 
Tra