In [1]:
# pymoo dependencies
from pymoo.core.problem import ElementwiseProblem
from pymoo.algorithms.moo.unsga3 import UNSGA3
from pymoo.core.problem import Problem, ElementwiseProblem
from pymoo.operators.crossover.pntx import TwoPointCrossover
from pymoo.operators.mutation.bitflip import BitflipMutation
from pymoo.operators.sampling.rnd import BinaryRandomSampling
from pymoo.optimize import minimize
from pymoo.core.callback import Callback
from pymoo.util.ref_dirs import get_reference_directions

In [2]:
import numpy as np
import pandas as pd
import torch
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
import time
from botorch.utils.multi_objective.pareto import is_non_dominated
from botorch.utils.multi_objective.hypervolume import Hypervolume

In [3]:
from tensorflow.keras.models import load_model

In [4]:
import os
os.chdir('./sources')
from Cal_SA import SA_score
from Cal_TC import pre_TC
from utility import Initial_X,cover_torch,cover_numpy,X_info

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
def optimize_nsga3(Problem,N_BATCH, pop_size, # must haves
                            ref_num=10, # as a rule of thumb, pop_size>ref_num,
                            random_state=np.random.randint(0, 1000000, (1,)).item(), verbose=False):
    print("Optimizing with NSGA-III")
    t0 = time.time()
    ref_point0=Problem.ref_point
    # some initializing
    hv=Hypervolume(ref_point=-ref_point0) # sets the hv based on problem, flip since BoTorch takes maximisation
    hvs = [] # create a blank array to append the scores at each batch/iteration for that run
     # define a pymoo problem class
    class MyProblem(ElementwiseProblem):
        def __init__(self):
            super().__init__(n_var=Problem.n_var,n_obj=Problem.n_obj,xl=np.full(Problem.n_var,0),xu=np.full(Problem.n_var,1))

        def _evaluate(self, X, out, *args, **kwargs):
            # base input/output from torch class
            X1=np.array(X).astype(int).reshape(-1,Problem.n_var)
            res_=Problem.evaluate(X1)
            out["F"] = -1*cover_numpy(res_) # flip since botorch assumes maximisation vs pymoo minimization
   # define a Call_back
    class MyCallback(Callback):
        def __init__(self) -> None:
            super().__init__()
            self.data["best"] = []
            self.data["full"] = []
        def notify(self, algorithm):
            self.data["best"].append(algorithm.pop.get("F").min())
            self.data["full"].append(algorithm.pop.get("F"))
    
    ##########  
    pymooproblem = MyProblem()
    # create the reference directions to be used for the optimization
    ref_dirs = get_reference_directions("energy", Problem.n_obj, ref_num, seed=random_state)
    algorithm =  UNSGA3(
    pop_size=pop_size,
    sampling=BinaryRandomSampling(),
    crossover=TwoPointCrossover(),
    mutation=BitflipMutation(),
    ref_dirs=ref_dirs,
    eliminate_duplicates=True)

    res = minimize(pymooproblem,
                   algorithm,
                   seed=random_state, 
                   termination=('n_gen', N_BATCH+1),
                   callback=MyCallback(),
                   verbose=verbose, 
                   save_history=True)
    ##########
    t1 = time.time()
    print(f"Time taken in total: {t1-t0:>4.2f}s.")
    # convert population data into tensor form
#     initial data
    train_x = torch.tensor(res.history[0].pop.get("X").tolist())
    train_obj = Problem.evaluate(train_x)
    pareto_mask = is_non_dominated(train_obj) # check for 2nd criteria: non-dominated, meaning new pareto optimal
    pareto_y = train_obj[pareto_mask] # take only points that fit the 2nd check
    volume = hv.compute(pareto_y) # compute change in HV with new pareto optimal wrt to original ref point
    hvs.append(volume)
    # population at each iteration
    for i in range(1,N_BATCH+1): # don't forget we did +1 for total iterations
        new_x = torch.tensor(res.history[i].pop.get("X").tolist())
        new_obj = Problem.evaluate(new_x)
        train_x = torch.cat([train_x, new_x])
        train_obj = torch.cat([train_obj, new_obj])
        pareto_mask = is_non_dominated(train_obj) # check for 2nd criteria: non-dominated, meaning new pareto optimal
        pareto_y = train_obj[pareto_mask] # take only points that fit the 2nd check
        volume = hv.compute(pareto_y) # compute change in HV with new pareto optimal wrt to original ref point
        hvs.append(volume)
    ##########
    return hvs, torch.hstack([train_x, train_obj]).cpu().numpy()    

In [7]:
TC_model=load_model(r"TC_DNN.h5",compile=False)
df=pd.read_csv(r'smi.csv')
chromosome=df["chromosome"]
selected_cor=pd.read_csv(r'select_keys.csv',index_col=0)
selected_keys=np.array(selected_cor["key"])
selected_Corr_df=np.array(selected_cor["Index"])

In [8]:
class Problem(torch.nn.Module):
    # must define these!
    n_var = 25
    n_obj = 2 
    n_random=10
    seed=0
    TC_model=TC_model
    selected_keys=selected_keys
    chromosome=chromosome
    Initial_Xdata=Initial_X(n_var,n_random,seed)
    ref_point = torch.tensor([0.0,10.0]) 
    def evaluate(X,n_var0=n_var):
        X=cover_numpy(X).reshape(-1,n_var0)
        res=[]
        for x in X:
            tc=pre_TC(x,model=TC_model,selected_keys=selected_keys,chromosome=chromosome)
            TC_=tc[0]
            SA_=SA_score(tc[2])*(-1) 
            res.append(TC_)
            res.append(SA_)
        #print (res)
        return cover_torch (np.array(res).reshape(-1,2)) #Uniformity as a maximum problem

In [9]:
hvs_pymoo,train_pymoo=optimize_nsga3(Problem=Problem,N_BATCH=50,pop_size=10,ref_num=10,random_state=Problem.seed, verbose=True)

Optimizing with NSGA-III
n_gen  |  n_eval  | n_nds  |      eps      |   indicator  
     1 |       10 |      2 |             - |             -
     2 |       20 |      2 |  0.9341317365 |         ideal
     3 |       30 |      3 |  0.2047619048 |         ideal
     4 |       40 |      3 |  0.000000E+00 |             f
     5 |       50 |      4 |  0.0558671918 |             f
     6 |       60 |      5 |  0.2164179104 |         ideal
     7 |       70 |      4 |  0.0716904308 |             f
     8 |       80 |      4 |  0.0323434149 |             f
     9 |       90 |      4 |  0.2617079890 |         ideal
    10 |      100 |      4 |  0.0953597094 |         ideal
    11 |      110 |      2 |  0.0345744681 |         ideal
    12 |      120 |      3 |  0.5909943339 |         ideal
    13 |      130 |      2 |  0.000000E+00 |             f
    14 |      140 |      2 |  0.4474797844 |         ideal
    15 |      150 |      2 |  0.0372428281 |         ideal
    16 |      160 |      3 |  0

In [10]:
gen=[]
for i in range(51):
    for j in range (10):
        gen.append(i)
df_gen=pd.DataFrame(gen,columns=["Gen"])    

In [11]:
res=[X_info(arr) for arr in train_pymoo[:,:-2]]
df_res=pd.concat([pd.DataFrame(np.array(res, dtype=object).reshape(-1,2)),pd.DataFrame(train_pymoo)],axis=1)
df_res.columns=["Serial","SMILES"]+["Bit_"+str(x) for x in range(25)]+["TC","SA"]
df_res=df_res.join(df_gen)
df_dup=df_res.drop_duplicates(subset="SMILES")
df_HV=pd.DataFrame(np.array(hvs_pymoo),columns=["HV"])

In [12]:
#Polymes designed by MOEA
df_dup

Unnamed: 0,Serial,SMILES,Bit_0,Bit_1,Bit_2,Bit_3,Bit_4,Bit_5,Bit_6,Bit_7,...,Bit_18,Bit_19,Bit_20,Bit_21,Bit_22,Bit_23,Bit_24,TC,SA,Gen
0,"[21, 3, 8, 13, 14]",O=C(Oc1ccc(c2ccc3oc([*])nc3c2)cc1)Nc1ccc(Nc2cc...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.366,-3.195528,0
1,"[23, 20, 11, 12, 5]",O=C(C=CC(=O)NC(=O)C(=O)NNC(=O)c1ccc(C(=O)c2ncc...,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.424,-3.575833,0
2,"[9, 21, 0, 26, 15]",O=C(C=Cc1ccc2oc(c3ccc([*])o3)nc2c1)c1ccc(Nc2cc...,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,...,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.363,-3.416657,0
3,"[13, 14, 1, 12, 8]",O=C(O[*])c1cnc(CCCCCCc2nc3cc(Nc4ccc(N[*])cc4)c...,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.328,-3.306609,0
4,"[4, 27, 30, 10, 23]",O=C(C=CC(=O)NC(=O)C=CC(=O)n1c(=O)c2cc3c(=O)n(N...,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,...,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.409,-4.181535,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
443,"[28, 25, 29, 25, 25]",O=C(Nc1ccc(NC(=O)c2ccc(C(=O)Nc3ccc(C(=O)N[*])c...,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,...,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.897,-2.618636,44
474,"[28, 3, 25, 29, 25]",O=C(Nc1ccc(NC(=O)c2ccc(C(=O)N[*])cc2)cc1)c1ccc...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.882,-2.366756,47
498,"[28, 13, 25, 29, 25]",O=C(Nc1ccc(NC(=O)c2ccc(C(=O)N[*])cc2)cc1)c1ccc...,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.873,-2.739115,49
499,"[28, 5, 25, 29, 25]",O=C(Nc1ccc(NC(=O)c2ccc(C(=O)N[*])cc2)cc1)c1ccc...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.746,-2.618810,49


In [13]:
#HVs improvement
df_HV

Unnamed: 0,HV
0,2.863039
1,4.013835
2,4.295658
3,4.295658
4,4.312231
5,4.705827
6,4.710222
7,4.712337
8,5.400717
9,5.40605


In [14]:
df_dup

Unnamed: 0,Serial,SMILES,Bit_0,Bit_1,Bit_2,Bit_3,Bit_4,Bit_5,Bit_6,Bit_7,...,Bit_18,Bit_19,Bit_20,Bit_21,Bit_22,Bit_23,Bit_24,TC,SA,Gen
0,"[21, 3, 8, 13, 14]",O=C(Oc1ccc(c2ccc3oc([*])nc3c2)cc1)Nc1ccc(Nc2cc...,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,1.0,1.0,0.0,0.366,-3.195528,0
1,"[23, 20, 11, 12, 5]",O=C(C=CC(=O)NC(=O)C(=O)NNC(=O)c1ccc(C(=O)c2ncc...,1.0,0.0,1.0,1.0,1.0,1.0,0.0,1.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.424,-3.575833,0
2,"[9, 21, 0, 26, 15]",O=C(C=Cc1ccc2oc(c3ccc([*])o3)nc2c1)c1ccc(Nc2cc...,0.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0,...,1.0,0.0,0.0,1.0,1.0,1.0,1.0,0.363,-3.416657,0
3,"[13, 14, 1, 12, 8]",O=C(O[*])c1cnc(CCCCCCc2nc3cc(Nc4ccc(N[*])cc4)c...,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.328,-3.306609,0
4,"[4, 27, 30, 10, 23]",O=C(C=CC(=O)NC(=O)C=CC(=O)n1c(=O)c2cc3c(=O)n(N...,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,...,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.409,-4.181535,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
443,"[28, 25, 29, 25, 25]",O=C(Nc1ccc(NC(=O)c2ccc(C(=O)Nc3ccc(C(=O)N[*])c...,1.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,...,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.897,-2.618636,44
474,"[28, 3, 25, 29, 25]",O=C(Nc1ccc(NC(=O)c2ccc(C(=O)N[*])cc2)cc1)c1ccc...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.882,-2.366756,47
498,"[28, 13, 25, 29, 25]",O=C(Nc1ccc(NC(=O)c2ccc(C(=O)N[*])cc2)cc1)c1ccc...,1.0,1.0,1.0,0.0,0.0,0.0,1.0,1.0,...,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.873,-2.739115,49
499,"[28, 5, 25, 29, 25]",O=C(Nc1ccc(NC(=O)c2ccc(C(=O)N[*])cc2)cc1)c1ccc...,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,1.0,1.0,0.0,0.0,1.0,0.746,-2.618810,49


In [23]:
df_HV.to_csv("./results/MOEA_HV.csv")
df_dup.to_csv("./results/MOEA_candidates.csv")