In [1]:
from CERTIFAI import CERTIFAI
import numpy as np
import torch.nn as nn
import torch
import os
import pandas as pd 

In [2]:
def load_data(data):
    return np.load(data)

class PimaClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(8, 32)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(32, 64)
        self.act2 = nn.ReLU()
        self.hidden3 = nn.Linear(64, 16)
        self.act3 = nn.ReLU()
        self.output = nn.Linear(16, 1)
        self.act_output = nn.Sigmoid()

    def forward(self, x):
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.act3(self.hidden3(x))
        x = self.act_output(self.output(x))
        return x
    
    
def load_model(input_folder = 'model_training/model'):
    model = torch.load(input_folder)
    model.eval()
    return model



In [3]:
file_path = "./../output/NSGAIII/1/input_data.npy"
input_array = load_data(file_path)
model = load_model(input_folder = './../model_training/model')

In [9]:
def create_pd_dataframe(input_folder):
    features = ["Pregnancies",'Glucose','Blood Pressure','Skin Thickness','Insulin', 'BMI', 'DiabetesPedigreeFunction','Age']
    max = [17, 200, 122, 99, 846, 67, 2.5000, 100]
    min = [0, 25, 20, 0, 0, 10, 0.060, 0]
    input_data_list = []
    order_list = []
    for i in os.listdir(input_folder):
        if i=='.DS_Store':
            continue
        # print(i)
        input_array_location = input_folder + '/' + i + '/' + 'input_data.npy'
        input = load_data(input_array_location)
        input_data_list.append(input)
        order_list.append(i)
    input_data_list.append(min)
    input_data_list.append(max)
    data_frame  = pd.DataFrame(data=input_data_list, columns=features)
    return data_frame, order_list

input_df, order_list = create_pd_dataframe(input_folder="./../output/NSGAIII/")
input_df.to_csv('certif_input_df.csv', index=False)

In [10]:
certifai_instance = CERTIFAI.from_csv('certif_input_df.csv')
print(type(certifai_instance.tab_dataset))

<class 'pandas.core.frame.DataFrame'>


In [11]:
certifai_instance.fit(model, generations=100, verbose=True, final_k= 20, classification=False, experiment=True, distance='L2')

32


Generating counterfactual(s) for sample 31: 100%|██████████| 32/32 [00:23<00:00,  1.37it/s]


In [12]:
def save_certif_counterfactuals(certifai_instance_results, order_list):
    certif_save_loc = 'Certif_cf'
    features = ["Pregnancies",'Glucose','Blood Pressure','Skin Thickness','Insulin', 'BMI', 'DiabetesPedigreeFunction','Age']
    
    for i in range(len(order_list)):
        cf_list = certifai_instance_results[i][1]
        
        certif_df  = pd.DataFrame(data=cf_list, columns=features)
        isExist = os.path.exists(certif_save_loc)
        if not isExist:
            os.makedirs(certif_save_loc)
        certif_df.to_csv(certif_save_loc + "/" + order_list[i] + '.csv'  , index=False)
    

In [13]:
save_certif_counterfactuals(certifai_instance.results, order_list)