In [107]:
import dice_ml

import pandas as pd

import torch.nn as nn
import torch

import copy

import os
import numpy as np

import seaborn as sns

In [108]:
class PimaClassifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.hidden1 = nn.Linear(8, 32)
        self.act1 = nn.ReLU()
        self.hidden2 = nn.Linear(32, 64)
        self.act2 = nn.ReLU()
        self.hidden3 = nn.Linear(64, 16)
        self.act3 = nn.ReLU()
        self.output = nn.Linear(16, 1)
        self.act_output = nn.Sigmoid()

    def forward(self, x):
        x = self.act1(self.hidden1(x))
        x = self.act2(self.hidden2(x))
        x = self.act3(self.hidden3(x))
        x = self.act_output(self.output(x))
        return x
    
    
def load_model(input_folder = 'model_training/model'):
    model = torch.load(input_folder)
    model.eval()
    return model

In [109]:
model = load_model(input_folder = './../model_training/model')

In [110]:
def load_data(data):
    return np.load(data)


def create_dataframe_from_list_of_arrays(arrays, column_names=None):
    data= []
    if not arrays:
        raise ValueError("The list of arrays is empty")
    
    # Check if all arrays have the same length
    length = len(arrays[0])
    if not all(len(arr) == length for arr in arrays):
        raise ValueError("All arrays must have the same length")
    
    # Create a dictionary for DataFrame creation

    if len(column_names) != length:
        print(len(column_names), len(arrays))
        raise ValueError("Number of column names must match the number of arrays")
    for i in arrays:
        data.append({column_names[j]: i[j] for j in range(len(i))})
    return pd.DataFrame(data)


def create_data(folder_location, column_names):
    input_list = []
    for i in os.listdir(folder_location):
        if i=='.DS_Store':
            continue
        input_array_location = folder_location + '/' + i + '/' + 'input_data.npy'
        input = load_data(input_array_location)
        print(input)
        input_list.append(input)
    
    df = create_dataframe_from_list_of_arrays(input_list, column_names)
    return df


In [113]:
def apply_phenotype(array, phenotype):
    # Split the phenotype string into individual operations
    operations = phenotype.split(';')
    x = copy.deepcopy(array)
    # Iterate over each operation and execute it
    for operation in operations:
        # Strip any leading/trailing whitespace from the operation
        operation = operation.strip()
        
        # Use the exec function to execute the operation on the array
        if operation:
            exec(operation)

    return x

def create_dice_cf(input_folder):
    
    features = ["Pregnancies",'Glucose','Blood Pressure','Skin Thickness','Insulin', 'BMI', 'DiabetesPedigreeFunction','Age']
    backend = 'PYT'  # needs pytorch installed
    ML_modelpath = './../model_training/model'
    m = dice_ml.Model(model_path=ML_modelpath, backend=backend)
    
    d = dice_ml.Data(features={'Pregnancies': [0,17],
                           'Glucose' : [25, 200],
                           'Blood Pressure' : [20, 122],
                           'Skin Thickness' : [0, 99],
                           'Insulin' : [0, 846],
                           'BMI' : [10, 67],
                           'DiabetesPedigreeFunction': [0.060, 2.5000],
                           'Age' : [0, 100] },
                 outcome_name='outcome')

    exp = dice_ml.Dice(d, m)
    path_to_DICE_cf = 'Dice_cf'
    path_to_GE_cf = 'Ge_cf'
    
    for i in os.listdir(input_folder):
        if i=='.DS_Store':
            continue
        filename = input_folder + '/' + i + '/' "final_gen.csv"
        input_array_location = input_folder + '/' + i + '/' + 'input_data.npy'
        input = load_data(input_array_location)
        data = pd.read_csv(filename)
        data = data.sort_values(by=['o_1'])
        data = data[data['o_1'] < 0.50]
        number_of_counterfactuals = data.shape[0]
        # print(number_of_counterfactuals)
        input_dice = pd.DataFrame(data=[input], columns=features)
        print(number_of_counterfactuals)
        dice_exp = exp.generate_counterfactuals(input_dice, total_CFs=number_of_counterfactuals, desired_class="opposite")
        isExist = os.path.exists(path_to_DICE_cf)
        if not isExist:
            os.makedirs(path_to_DICE_cf)
        # print(dice_exp.cf_examples_list[0].final_cfs_df.head())
        dice_exp.cf_examples_list[0].final_cfs_df.to_csv(path_or_buf= path_to_DICE_cf + "/" + str(i) + '.csv'  , index=False)
        
        evolved_x = []
    
        for phenotype in data['Phenotype'].tolist():
            x_out = apply_phenotype(input, phenotype)
            evolved_x.append(x_out)
            
        GE_df = pd.DataFrame(data=evolved_x, columns=features)
        isExist = os.path.exists(path_to_GE_cf)
        if not isExist:
            os.makedirs(path_to_GE_cf)
        GE_df.to_csv(path_to_GE_cf + "/" + str(i) + '.csv'  , index=False)
        
    
        
    

In [114]:
input_folder = './../output/NSGAIII/'
create_dice_cf(input_folder)

9


100%|██████████| 1/1 [00:00<00:00, 31.11it/s]


192


100%|██████████| 1/1 [00:00<00:00, 39.12it/s]


7


100%|██████████| 1/1 [00:00<00:00, 43.16it/s]


78


100%|██████████| 1/1 [00:00<00:00, 39.54it/s]


14


100%|██████████| 1/1 [00:00<00:00, 38.17it/s]


3


100%|██████████| 1/1 [00:00<00:00, 39.11it/s]


72


100%|██████████| 1/1 [00:00<00:00, 42.01it/s]


105


100%|██████████| 1/1 [00:00<00:00, 38.80it/s]


294


100%|██████████| 1/1 [00:00<00:00, 39.16it/s]


48


100%|██████████| 1/1 [00:00<00:00, 40.96it/s]


11


100%|██████████| 1/1 [00:00<00:00, 42.70it/s]


20


100%|██████████| 1/1 [00:00<00:00, 38.90it/s]


205


100%|██████████| 1/1 [00:00<00:00, 39.15it/s]


1


100%|██████████| 1/1 [00:00<00:00, 38.31it/s]


50


100%|██████████| 1/1 [00:00<00:00, 41.90it/s]


30


100%|██████████| 1/1 [00:00<00:00, 41.45it/s]


2


100%|██████████| 1/1 [00:00<00:00, 42.88it/s]


4


100%|██████████| 1/1 [00:00<00:00, 39.35it/s]


41


100%|██████████| 1/1 [00:00<00:00, 44.06it/s]


14


100%|██████████| 1/1 [00:00<00:00, 43.13it/s]


64


100%|██████████| 1/1 [00:00<00:00, 40.95it/s]


7


100%|██████████| 1/1 [00:00<00:00, 40.37it/s]


167


100%|██████████| 1/1 [00:00<00:00, 39.18it/s]


5


100%|██████████| 1/1 [00:00<00:00, 42.01it/s]


5


100%|██████████| 1/1 [00:00<00:00, 43.92it/s]


76


100%|██████████| 1/1 [00:00<00:00, 40.83it/s]


2


100%|██████████| 1/1 [00:00<00:00, 40.47it/s]


110


100%|██████████| 1/1 [00:00<00:00, 40.15it/s]


5


100%|██████████| 1/1 [00:00<00:00, 43.30it/s]


8


100%|██████████| 1/1 [00:00<00:00, 42.65it/s]


In [115]:
def create_NSGAII_cf(input_folder):
    
    features = ["Pregnancies",'Glucose','Blood Pressure','Skin Thickness','Insulin', 'BMI', 'DiabetesPedigreeFunction','Age']

    

    path_to_NSGAII_cf = 'Ge_NSGAII_cf'
    
    for i in os.listdir(input_folder):
        if i=='.DS_Store':
            continue
        filename = input_folder + '/' + i + '/' "final_gen.csv"
        input_array_location = input_folder + '/' + i + '/' + 'input_data.npy'
        input = load_data(input_array_location)
        data = pd.read_csv(filename)
        data = data.sort_values(by=['o_1'])
        data = data[data['o_1'] < 0.50]
        number_of_counterfactuals = data.shape[0]
        # print(number_of_counterfactuals)
        
        evolved_x = []
    
        for phenotype in data['Phenotype'].tolist():
            x_out = apply_phenotype(input, phenotype)
            evolved_x.append(x_out)
            
        GE_df = pd.DataFrame(data=evolved_x, columns=features)
        isExist = os.path.exists(path_to_NSGAII_cf)
        if not isExist:
            os.makedirs(path_to_NSGAII_cf)
        GE_df.to_csv(path_to_NSGAII_cf + "/" + str(i) + '.csv'  , index=False)
        
        
input_folder = './../output/NSGAII_multi/'
create_NSGAII_cf(input_folder)
        