In [1]:
import torch
import torch.nn as nn
from ZW_utils import *
from ZW_model import GPT
import torch.nn.functional as F

In [2]:
classes = std_classes
data_split_ratio = 0.80
batch_size = 100
max_epochs = 30
learning_rate = 1e-3
block_size = 22
n_embd = 32  # 32
n_head = 4  # 4
n_layer = 2  # 2
dropout = 0.1  # 0.1
vocab_size = len(classes)
N = 3000

class LSTM_packed(nn.Module):
    def __init__(self, embd_size,hidden_size):
        super(LSTM_packed, self).__init__()
        self.embedding = nn.Embedding(13, embd_size)
        self.lstm = nn.LSTM(embd_size, hidden_size, num_layers=2, batch_first=True, dropout=0.1)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x, lengths):
        x = self.embedding(x.long())
        x = nn.utils.rnn.pack_padded_sequence(
            x, lengths, batch_first=True, enforce_sorted=False
        )
        output, (hidden, _) = self.lstm(x)
        x = self.fc(hidden[-1])
        return x

#generator
phi = GPT(vocab_size, n_embd, n_head, n_layer, block_size, dropout)

#predicor
psi = LSTM_packed(128,1024)

phi.load_state_dict(torch.load("GPT_NA/M2_model_0.pt"))
psi.load_state_dict(torch.load("psi_norm_128_1024.pt"))

<All keys matched successfully>

In [20]:
phi.eval()
psi.eval()
N = 3000
int_to_char = dict((i, c) for i, c in enumerate(classes))
decode = lambda l: "".join([int_to_char[i] for i in l])
equipment_list = []
string_list = []
for i in range (N):
    psi_token_stack = torch.tensor([0,1,2,3,4,5,6,7,8,9,10,11]).reshape(12,1)
    idx = torch.zeros((1, 1), dtype=torch.long)
    for _ in range(22):
        phi_logits = phi(idx)
        phi_logits = phi_logits[:, -1, :]
        idx_stack = idx.repeat(12, 1)
        idx_stack = torch.cat((idx_stack, psi_token_stack), 1).float()
        lengths = torch.tensor([idx_stack.size(1)]*idx_stack.size(0))
        psi_logits = psi(idx_stack,lengths)
        product = (phi_logits.flatten() + (1 - psi_logits.flatten())).reshape(1,12)
        probs = F.softmax(product, dim=-1)
        k = 1
        topp = probs.topk(k)
        total_prob = topp[0].sum()
        while total_prob < 0.9:
            k += 1
            topp = probs.topk(k)
            total_prob = topp[0].sum()
        idx_next = topp[1][0][torch.multinomial(topp[0] / total_prob, 1)]
        idx = torch.cat((idx, idx_next), dim=1) 
        if idx_next.item() == len(classes) - 1:
            break
    idx = idx.flatten().tolist()
    string_list.append(decode(idx))
    equipment_list.append(idx)
# for e,s in zip(equipment_list,string_list):
#     print(e,s)

In [22]:
from thermo_validity import validity
cutoff = 143.957
save_path = "GPT_NA_psitest"
dataset = np.load("GPT_NA_psitest/initial_10k_good_layouts.npy", allow_pickle=True)
generated_layouts = string_list
print("Number of generated layouts: ", len(generated_layouts))
print("Number of valid layouts: ", len(validity(generated_layouts)))
print("Number of unique valid layouts: ", len(np.unique(validity(generated_layouts))))
unique_strings = np.unique(
                np.array(validity(generated_layouts), dtype=object)
)
p_datalist = dataset
datalist = np.unique(np.concatenate((p_datalist, unique_strings), axis=0))
# Separating the new strings from the old ones
candidates = datalist[
    np.where(np.isin(datalist, p_datalist, invert=True))[0]
]
print("Number of unique valid new layouts: ", len(candidates))
# np.save(f"{save_path}/psiphi_generated_M2_0.npy", generated_layouts)

Number of generated layouts:  3000
Number of valid layouts:  2933
Number of unique valid layouts:  2457
Number of unique valid new layouts:  2441


In [None]:

from ZW_Transmain import optimization, optimization_filter

# Optimization of the new strings
candidates_results = optimization(
    candidates, classes, save_path, "candidates_" + str(i)
)
# Filtering the results above the threshold
good_layouts, good_results = optimization_filter(
    candidates_results, candidates, cutoff, "M2_" + str(i)
)