In [4]:
from CERTIFAI import CERTIFAI
import numpy as np
import torch.nn as nn
import torch
import os
import pandas as pd 
from pickle import dump , load

In [5]:
def load_data(data):
    return np.load(data)


class TaiwaneseCreditClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(23, 64)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(64, 128)
        self.act2 = nn.ReLU()
        self.hidden3 = nn.Linear(128, 32)
        self.act3 = nn.ReLU()
        self.hidden4 = nn.Linear(32, 16)
        self.act4 = nn.ReLU()
        self.output = nn.Linear(16, 1)
        self.act_output = nn.Sigmoid()
        self.dropout = nn.Dropout(0.1)

    def forward(self, x):
        x = self.dropout(self.act1(self.hidden1(x)))
        x = self.dropout(self.act2(self.hidden2(x)))
        x = self.dropout(self.act3(self.hidden3(x)))
        x = self.dropout(self.act4(self.hidden4(x)))
        # x = self.output(x)
        x = self.act_output(self.output(x))
        return x
    
    
def load_model():
    model = torch.load('./../model_training/Taiwanese_credit_model')
    model.eval()
    return model

def eval_model(model, input, scaler, columns_to_standardize):
    model.eval()
    # print(input)
    input = input.reshape(-1, 23)
    input = torch.tensor(input, dtype=torch.float32)
    input = input.numpy()
    input[:, columns_to_standardize] = scaler.transform(input[:, columns_to_standardize])
    input = torch.from_numpy(input).type(torch.float)
    with torch.no_grad():
        prob = model(input)
    return prob.tolist()[0][0]

def load_scaler(scaler_loc):
    return load(open(scaler_loc, 'rb'))


In [6]:
def scale_input(scaler, input, columns_to_standardize):
    input = input.reshape(-1, 23)
    input = torch.tensor(input, dtype=torch.float32)
    input = input.numpy()
    input[:, columns_to_standardize] = scaler.transform(input[:, columns_to_standardize])
    return input


In [7]:
# file_path = "./../output/NSGAIII/1/input_data.npy"
# input_array = load_data(file_path)
model = load_model()
scaler = load_scaler('./../model_training/StandardScaler.pkl')


columns_to_standardize = list(range(23))

bounds= [(10000, 1000000),
              (0, 1),
              (0, 6),
              (0, 3),
              (20, 80),
              (-2, 8),
              (-2, 8),
              (-2, 8),
              (-2, 8),
              (-2, 8),
              (-2, 8),
              (-154973, 964511),
              (-67526, 983931),
              (-61506, 855086),
              (-81334, 891586),
              (53007, 927171),
              (-339603, 961664),
              (0, 873552),
              (0, 1215471),
              (0, 846040),
              (0, 621000),
              (0, 417990),
              (0, 403500)]


min_array = np.array([i[0] for i in bounds])
max_array = np.array([i[1] for i in bounds])


feature = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11','X12', 'X13',
            'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23']

scaled_min = scale_input(scaler, min_array, columns_to_standardize)
scaled_max = scale_input(scaler, max_array, columns_to_standardize)

In [8]:
def inverse_transform(df_scaled, scaler, columns_to_standardize):
    df_values = df_scaled.values
    print(df_values.shape)
    df_values[:, columns_to_standardize] = scaler.inverse_transform(df_values[:, columns_to_standardize])
    df_original = pd.DataFrame(df_values, columns=df_scaled.columns)
    return df_original

def transform(df, scaler, columns_to_standardize):
    df_values = df.values
    print(df_values.shape)
    df_values[:, columns_to_standardize] = scaler.transform(df_values[:, columns_to_standardize])
    df_original = pd.DataFrame(df_values, columns=df.columns)
    return df_original

def create_pd_dataframe(input_folder, scaler, columns_to_standardize, bounds):
    features = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11','X12', 'X13',
            'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23']
    max = [i[1] for i in bounds]
    min = [i[0] for i in bounds]
    input_data_list = []
    order_list = []
    for i in os.listdir(input_folder):
        if i=='.DS_Store':
            continue
        # print(i)
        input_array_location = input_folder + '/' + i + '/' + 'input_data.npy'
        input = load_data(input_array_location)
        input_data_list.append(input)
        order_list.append(i)
    input_data_list.append(min)
    input_data_list.append(max)
    data_frame  = pd.DataFrame(data=input_data_list, columns=features)
    data_frame = transform(data_frame, scaler, columns_to_standardize)
    return data_frame, order_list

input_df, order_list = create_pd_dataframe("./../output/NSGAIII_multi/", scaler, columns_to_standardize, bounds )
input_df.to_csv('certif_input_df.csv', index=False)

(32, 23)


In [9]:
certifai_instance = CERTIFAI.from_csv('certif_input_df.csv')
print(type(certifai_instance.tab_dataset))

<class 'pandas.core.frame.DataFrame'>


In [11]:
certifai_instance.fit(model, generations=100, verbose=True, final_k= 196, classification=False, experiment=True, distance='L2')

Generating counterfactual(s) for sample 31: 100%|██████████| 32/32 [09:21<00:00, 17.55s/it]


In [12]:
def save_certif_counterfactuals(certifai_instance_results, order_list, scaler, columns_to_standardize):
    certif_save_loc = 'Certif_cf'
    features = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8', 'X9', 'X10', 'X11','X12', 'X13',
            'X14', 'X15', 'X16', 'X17', 'X18', 'X19', 'X20', 'X21', 'X22', 'X23']
    
    for i in range(len(order_list)):
        cf_list = certifai_instance_results[i][1]
        
        certif_df  = pd.DataFrame(data=cf_list, columns=features)
        certif_df = inverse_transform(certif_df, scaler, columns_to_standardize)
        isExist = os.path.exists(certif_save_loc)
        if not isExist:
            os.makedirs(certif_save_loc)
        certif_df.to_csv(certif_save_loc + "/" + order_list[i] + '.csv'  , index=False)
    

In [13]:
save_certif_counterfactuals(certifai_instance.results, order_list, scaler, columns_to_standardize)

(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
(196, 23)
