In [1]:
from CERTIFAI import CERTIFAI
import numpy as np
import torch.nn as nn
import torch
import os
import pandas as pd 
from pickle import dump , load

In [2]:
def load_data(data):
    return np.load(data)


class AdultClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(13, 64)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(64, 128)
        self.act2 = nn.ReLU()
        self.hidden3 = nn.Linear(128, 32)
        self.act3 = nn.ReLU()
        self.hidden4 = nn.Linear(32, 16)
        self.act4 = nn.ReLU()
        self.output = nn.Linear(16, 1)
        self.act_output = nn.Sigmoid()
        self.dropout = nn.Dropout(0.1)

    def forward(self, x):
        x = self.dropout(self.act1(self.hidden1(x)))
        x = self.dropout(self.act2(self.hidden2(x)))
        x = self.dropout(self.act3(self.hidden3(x)))
        x = self.dropout(self.act4(self.hidden4(x)))
        # x = self.output(x)
        x = self.act_output(self.output(x))
        return x
    
    
def load_model():
    model = torch.load('./../model_training/adult_credit__model')
    model.eval()
    return model

def eval_model(model, input, scaler, columns_to_standardize):
    model.eval()
    # print(input)
    input = input.reshape(-1, 13)
    input = torch.tensor(input, dtype=torch.float32)
    input = input.numpy()
    input[:, columns_to_standardize] = scaler.transform(input[:, columns_to_standardize])
    input = torch.from_numpy(input).type(torch.float)
    with torch.no_grad():
        prob = model(input)
    return prob.tolist()[0][0]

def load_scaler(scaler_loc):
    return load(open(scaler_loc, 'rb'))


In [3]:
def scale_input(scaler, input, columns_to_standardize):
    input = input.reshape(-1, 13)
    input = torch.tensor(input, dtype=torch.float32)
    input = input.numpy()
    input[:, columns_to_standardize] = scaler.transform(input[:, columns_to_standardize])
    return input


In [15]:
# file_path = "./../output/NSGAIII/1/input_data.npy"
# input_array = load_data(file_path)
model = load_model()
scaler = load_scaler('./../model_training/StandardScaler.pkl')
bounds = [(17, 90), (0, 8), (12285, 1455436), (1, 16), (0, 6), (0, 14), (0, 5), (0, 4), (0, 1), (0, 99999), (0, 4365),
              (1, 99), (0, 41)]
min_array = np.array([17, 0, 12285, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0])
max_array = np.array([90, 8, 1455436, 16, 6, 14, 5, 4, 1, 99999, 4365, 99, 41])
columns_to_standardize = [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12]
scaled_min = scale_input(scaler, min_array, columns_to_standardize)
scaled_max = scale_input(scaler, max_array, columns_to_standardize)

In [16]:
def inverse_transform(df_scaled, scaler, columns_to_standardize):
    df_values = df_scaled.values
    print(df_values.shape)
    df_values[:, columns_to_standardize] = scaler.inverse_transform(df_values[:, columns_to_standardize])
    df_original = pd.DataFrame(df_values, columns=df_scaled.columns)
    return df_original

def transform(df, scaler, columns_to_standardize):
    df_values = df.values
    print(df_values.shape)
    df_values[:, columns_to_standardize] = scaler.transform(df_values[:, columns_to_standardize])
    df_original = pd.DataFrame(df_values, columns=df.columns)
    return df_original

def create_pd_dataframe(input_folder, scaler, columns_to_standardize):
    features = ['age','workclass','fnlwgt','education-num','marital-status','occupation','relationship','race','sex',
            'capital-gain','capital-loss','hours-per-week','native-country']
    max = [90, 8, 1455436, 16, 6, 14, 5, 4, 1, 99999, 4365, 99, 41]
    min = [17, 0, 12285, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0]
    input_data_list = []
    order_list = []
    for i in os.listdir(input_folder):
        if i=='.DS_Store':
            continue
        # print(i)
        input_array_location = input_folder + '/' + i + '/' + 'input_data.npy'
        input = load_data(input_array_location)
        input_data_list.append(input)
        order_list.append(i)
    input_data_list.append(min)
    input_data_list.append(max)
    data_frame  = pd.DataFrame(data=input_data_list, columns=features)
    data_frame = transform(data_frame, scaler, columns_to_standardize)
    return data_frame, order_list

input_df, order_list = create_pd_dataframe("./../output/NSGAIII_multi/", scaler, columns_to_standardize )
input_df.to_csv('certif_input_df.csv', index=False)

(32, 13)


In [17]:
certifai_instance = CERTIFAI.from_csv('certif_input_df.csv')
print(type(certifai_instance.tab_dataset))

<class 'pandas.core.frame.DataFrame'>


In [18]:
certifai_instance.fit(model, generations=200, verbose=True, final_k= 9, classification=False, experiment=True, distance='L2')

32


Generating counterfactual(s) for sample 31:  97%|█████████▋| 31/32 [02:30<00:04,  4.84s/it]


ValueError: Found array with 0 sample(s) (shape=(0, 13)) while a minimum of 1 is required by check_pairwise_arrays.

In [19]:
def save_certif_counterfactuals(certifai_instance_results, order_list, scaler, columns_to_standardize):
    certif_save_loc = 'Certif_cf'
    features =['age','workclass','fnlwgt','education-num','marital-status','occupation','relationship','race','sex',
            'capital-gain','capital-loss','hours-per-week','native-country']
    
    for i in range(len(order_list)):
        cf_list = certifai_instance_results[i][1]
        
        certif_df  = pd.DataFrame(data=cf_list, columns=features)
        certif_df = inverse_transform(certif_df, scaler, columns_to_standardize)
        isExist = os.path.exists(certif_save_loc)
        if not isExist:
            os.makedirs(certif_save_loc)
        certif_df.to_csv(certif_save_loc + "/" + order_list[i] + '.csv'  , index=False)
    

In [20]:
save_certif_counterfactuals(certifai_instance.results, order_list, scaler, columns_to_standardize)

(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
(9, 13)
