In [1]:
# github.com/lsch0lz/counterfactuals.git

In [None]:
import torch
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

from counterfactual_xai.utils.lsat_dataloader.lsat_dataloader import LsatDataloader
from counterfactual_xai.utils.datafeed import DataFeed
from counterfactual_xai.utils.clue.gaussian_mlp import GaussianMLP
from counterfactual_xai.utils.clue.bnn.gaussian_bnn import GaussianBNN
from counterfactual_xai.utils.clue.bnn.train_regression import train_BNN_regression
from counterfactual_xai.utils.mimic_dataloader import MimiDataLoader


In [3]:
#Data preparation
df_clean = pd.read_csv('brfss13.csv')
df_clean.index = range(1, len(df_clean) + 1)
df_clean = df_clean.drop("Unnamed: 0", axis=1)
df_clean['Heartdis'] = df_clean['Heartdis'].map({'Yes': 1, 'No': 0})

In [4]:
df_clean['Diabetes'] = df_clean['Diabetes'].map({'Yes': 1, 'No': 0})
df_clean['Kidney'] = df_clean['Kidney'].map({'Yes': 1, 'No': 0})
df_clean['Stroke'] = df_clean['Stroke'].map({'Yes': 1, 'No': 0})
df_clean['Gender'] = df_clean['Gender'].map({'Male': 0, 'Female': 1})

df_clean = pd.get_dummies(df_clean, columns=['Smoking'], drop_first=False)
df_clean = pd.get_dummies(df_clean, columns=['Age'], drop_first=False)

In [5]:
# Prediction Features
x = df_clean.drop('Heartdis', axis=1)
y = df_clean['Heartdis']

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=.20,
                                                    random_state=42)

x_train = x_train.astype(np.float32)
x_test = x_test.astype(np.float32)

y_means = y_train.mean()
y_stds = y_train.std()

In [6]:
trainset = DataFeed(x_train.values, y_train.values)
valset = DataFeed(x_test.values, y_test.values)

In [7]:
model = GaussianMLP(input_dim=x_train.shape[1], width=200, depth=2, output_dim=1, flatten_image=False)

In [8]:
# Bayesian model initialization   
cuda = False
N_train = x_train.shape[0]
log_interval = 1

model_bnn = GaussianBNN(model, N_train, lr=1e-2, cuda=cuda)

Total params: 0.05M


In [None]:
save_dir = "/Results1/"
batch_size = 512
nb_epochs = 2200
burn_in = 120
sim_steps = 20
N_saves = 100
resample_its = 10
resample_prior_its = 50  # 45 can be choosen to better control overfitting
re_burn = 1e7

# Training
cost_train, cost_dev, rms_dev, ll_dev = train_BNN_regression(model_bnn, save_dir, batch_size, nb_epochs, 
                                                             trainset, valset, cuda,
                                                             burn_in, sim_steps, N_saves, resample_its,
                                                             resample_prior_its,
                                                             re_burn, flat_ims=False, nb_its_dev=10, y_mu=y_means,
                                                             y_std=y_stds)

In [None]:
# CLUE avec le modèle entraîné
clue = Clue(model=model_bnn, datafeed=trainset)

# Exemple d’individu à expliquer (Sick == 1)
idx = y_test[y_test == 1].index[0]
x_orig = x_test.loc[idx].values

# Contrefait
cf_result = clue.explain(x_orig)

print("Contrefait :", cf_result)


In [None]:
import os
print(os.listdir("counterfactuals/counterfactual_xai/utils/clue"))