In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = "2"

import random

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from mofnet import MofNet
from dataset import DataLoader

In [2]:
mofnet = MofNet(32, 32)

# Build weights.
nt = np.zeros(shape=[1, 1], dtype=np.int32)
nl = np.zeros(shape=[1, 1, 1], dtype=np.int32)
et = np.zeros(shape=[1, 1, 1], dtype=np.int32)
st = np.zeros(shape=[1, 1, 32], dtype=np.float32)
mofnet(nt, nl, et, st)

# Load trained weights.
mofnet.load_weights("MOF-50000-sa-1.h5")

# Load surface area data.
sa = pd.read_csv("MOF-50000-sa.csv")
sa = sa.set_index("structure")
sa = sa.drop("Unnamed: 0", axis=1)

print("Load dataset")
keys = {}
with open("MOF-50000-key.txt", "r") as f:
    for line in f:
        i, key = line.split()
        keys[int(i)] = key

topo_path = "/home/lsw/Workspace/MOF_MAKER/examples/topology.npz"
topologies = {
    topo.name: topo for topo in np.load(topo_path)["topologies"]
}
node_hash = np.load("MOF-50000-sa-node_hash.npy").item()
edge_hash = np.load("MOF-50000-sa-edge_hash.npy").item()
V = np.load("MOF-50000-sa-V.npy")

data_loader = DataLoader(topologies, node_hash, edge_hash, V)

##valid_keys = [keys[i] for i in sa.index]
##ys = sa["ASA_m^2/cm^3"] / 1000.0
#dataset = data_loader.make_dataset(valid_keys, ys)

Load dataset


In [6]:
count = 0
topo_hash = {}
for t in keys.values():
    t = t.split(":")[0]
    
    # Check.
    if len(topologies[t].unique_node_types) > 4:
        continue
    
    if len(topologies[t].unique_edge_types) > 4:
        continue
    
    if t in topo_hash:
        continue
    topo_hash[t] = count
    count += 1
    
print("count:", count)

count: 822


In [8]:
def key2chromo(key):
    genes = []
    tokens = key.split(":")
    topo = topologies[tokens.pop(0)]
    genes.append(topo_hash[topo.name])

    for _ in topo.unique_node_types:
        genes.append(node_hash[tokens.pop(0)])

    while len(genes) < 1+4:
        genes.append(-1)

    for _ in topo.unique_edge_types:
        genes.append(edge_hash[tokens.pop(0)])

    while len(genes) < 1+4+4:
        genes.append(-1)

    return genes

In [11]:
# Make initial chromosome.
valid_keys = {i: k for i, k in keys.items() if k.split(":")[0] in topo_hash}

n_chromos = 100
sample_keys = random.sample(list(valid_keys.values()), n_chromos)

for key in sample_keys:
    print(key, key2chromo(key))

ecv:sym_8_mc_8:L_23 [237, 25, -1, -1, -1, 16, -1, -1, -1]
mmn:sym_3_mc_0:sym_6_mc_3:sym_4_mc_1:L_6:L_16:L_6 [204, 17, 15, 1, -1, 36, 37, 36, -1]
cml:sym_3_mc_0:sym_7_mc_4:L_20 [540, 17, 8, -1, -1, 23, -1, -1, -1]
raf:sym_3_mc_0:sym_3_on_1:L_17:L_4:L_4 [713, 17, 18, -1, -1, 10, 14, 14, -1]
wip:sym_6_mc_3:L_17 [124, 15, -1, -1, -1, 10, -1, -1, -1]
xaa:sym_3_on_1:sym_3_mc_0:L_25:L_46 [771, 18, 17, -1, -1, 34, 38, -1, -1]
edi:sym_4_mc_1:sym_4_on_8:L_16:L_28 [428, 1, 6, -1, -1, 37, 39, -1, -1]
cfd:sym_4_mc_1:sym_4_on_7:sym_4_mc_1:L_7:L_46:L_7:L_46 [19, 1, 2, 1, -1, 17, 38, 17, 38]
fel:sym_4_mc_1:sym_5_on_13:L_19:L_29:L_29 [616, 1, 11, -1, -1, 22, 41, 41, -1]
srd:sym_5_on_12:sym_3_mc_0:L_26:L_19 [743, 12, 17, -1, -1, 30, 22, -1, -1]
dia:sym_4_mc_1:L_31 [338, 1, -1, -1, -1, 31, -1, -1, -1]
lcv-e:sym_7_mc_4:L_20 [258, 8, -1, -1, -1, 23, -1, -1, -1]
eft:sym_5_on_13:sym_8_mc_8:sym_3_on_4:L_32:L_32 [268, 11, 25, 19, -1, 3, 3, -1, -1]
wne:sym_6_mc_3:L_38 [4, 15, -1, -1, -1, 11, -1, -1, -1]
vtx:sym

In [13]:
# Make reverse mapping.
hash_node = {v: k for k, v in node_hash.items()}
hash_edge = {v: k for k, v in edge_hash.items()}
hash_topo = {v: k for k, v in topo_hash.items()}

def chromo2key(chromo):
    topo = hash_topo[chromo[0]]
    topo = topologies[topo]
    
    key = topo.name
    
    for i in range(1, 4+1):
        if chromo[i] == -1:
            continue
        key += ":" + hash_node[chromo[i]]
        
    for i in range(4+1, 8+1):
        if chromo[i] == -1:
            continue
        key += ":" + hash_edge[chromo[i]]
    
    return key
    
data_loader.key2data(chromo2key([161, 17, 5, 18, -1, 18, 18, -1, -1]))

(array([17., 17., 17., 17., 17., 17., 17., 17., 17., 17., 17., 17., 17.,
        17., 17., 17., 17., 17.,  5.,  5.,  5.,  5.,  5.,  5., 18., 18.,
        18., 18., 18., 18.]), array([[24, 20, 23, -1, -1, -1],
        [24, 20, 23, -1, -1, -1],
        [24, 20, 23, -1, -1, -1],
        [25, 21, 22, -1, -1, -1],
        [25, 21, 22, -1, -1, -1],
        [25, 21, 22, -1, -1, -1],
        [26, 19, 22, -1, -1, -1],
        [26, 19, 22, -1, -1, -1],
        [26, 19, 22, -1, -1, -1],
        [27, 18, 23, -1, -1, -1],
        [27, 18, 23, -1, -1, -1],
        [27, 18, 23, -1, -1, -1],
        [28, 18, 21, -1, -1, -1],
        [28, 18, 21, -1, -1, -1],
        [28, 18, 21, -1, -1, -1],
        [29, 19, 20, -1, -1, -1],
        [29, 19, 20, -1, -1, -1],
        [29, 19, 20, -1, -1, -1],
        [10, 11,  9, 13, 14, 12],
        [16, 17, 15,  7,  8,  6],
        [16, 17, 15,  1,  2,  0],
        [ 4,  5,  3, 13, 14, 12],
        [ 4,  5,  3,  7,  8,  6],
        [10, 11,  9,  1,  2,  0],
        [