In [2]:
#Supervised training of policy-value networks to determine the best architecture
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from split_functions import classes,equipments_to_strings
from mcts_util import evaluation


layouts = np.load("M2_data_300_8_augmented_layouts.npy", allow_pickle=True)
results = np.load("M2_data_300_8_augmented_results.npy", allow_pickle=True)
data_copy = layouts.copy()
layouts = equipments_to_strings(layouts, classes)
results = 1 - (results - 125) / 175
indices = np.argsort(results)
sorted_results = np.array(results)[indices]
sorted_layouts = np.array(layouts)[indices]
unique, indices = np.unique(sorted_layouts, return_index=True)
unique_results = sorted_results[indices]
unique_layouts = sorted_layouts[indices]
print(len(unique_layouts), len(unique_results))
layouts = unique_layouts.tolist()
results = unique_results
new_layouts = []
new_results = []

# Calculate the value of data_copy
data_values = []
for layout in data_copy:
    value = evaluation(np.array(layout),new_layouts,new_results,layouts,results)
    data_values.append(value)

class LSTMemb(nn.Module):
    def __init__(self, hidden_size=32, num_layers=2, out_features=12, emb_size=16):
        super().__init__()
        self.embedding = nn.Embedding(13, embedding_dim=emb_size, padding_idx=12)
        self.lstm = nn.LSTM(
            input_size=emb_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
        )
        self.policyhead = nn.Linear(in_features=hidden_size, out_features=out_features)
        self.valuehead = nn.Linear(in_features=hidden_size, out_features=1)

    def forward(self, x, lengths):
        x = self.embedding(x.squeeze(-1).long())
        # x_packed = nn.utils.rnn.pack_padded_sequence(
        #     x, lengths, batch_first=True, enforce_sorted=False
        # )
        output, (hidden, _) = self.lstm(x)
        policy = self.policyhead(hidden[-1])
        value = self.valuehead(hidden[-1])
        return policy, value


67345 67345


In [3]:
data_values = np.array(data_values)
lengths = [len(data_copy[i]) for i in range(len(data_copy))]
chunks = []
chunk_targets = np.zeros((sum(lengths)-len(data_copy)), dtype=int)
chunk_lengths = np.zeros((sum(lengths)-len(data_copy)), dtype=int)
chunk_values = np.zeros((sum(lengths)-len(data_copy)), dtype=float)
tensor_chunks = np.ones((sum(lengths)-len(data_copy),22), dtype=int) * 12
j = 0
while j < sum(lengths)-len(data_copy):
    for i in range(len(data_copy)):
        for k in range(1,len(data_copy[i])):
            tensor_chunks[j][:k] = data_copy[i][:k]
            chunk_targets[j] = data_copy[i][k]
            chunk_lengths[j] = k
            chunk_values[j] = data_values[i]
            j += 1

In [4]:
print(tensor_chunks.shape, chunk_targets.shape, chunk_lengths.shape, chunk_values.shape)
print(tensor_chunks[-10:], chunk_targets[-10:], chunk_lengths[-10:], chunk_values[-10:])

(967054, 22) (967054,) (967054,) (967054,)
[[ 0  4  9  2  3  7  3  4  1  5  4 12 12 12 12 12 12 12 12 12 12 12]
 [ 0  4  9  2  3  7  3  4  1  5  4  1 12 12 12 12 12 12 12 12 12 12]
 [ 0  4  9  2  3  7  3  4  1  5  4  1  4 12 12 12 12 12 12 12 12 12]
 [ 0  4  9  2  3  7  3  4  1  5  4  1  4  7 12 12 12 12 12 12 12 12]
 [ 0  4  9  2  3  7  3  4  1  5  4  1  4  7  2 12 12 12 12 12 12 12]
 [ 0  4  9  2  3  7  3  4  1  5  4  1  4  7  2  3 12 12 12 12 12 12]
 [ 0  4  9  2  3  7  3  4  1  5  4  1  4  7  2  3  2 12 12 12 12 12]
 [ 0  4  9  2  3  7  3  4  1  5  4  1  4  7  2  3  2  4 12 12 12 12]
 [ 0  4  9  2  3  7  3  4  1  5  4  1  4  7  2  3  2  4  5 12 12 12]
 [ 0  4  9  2  3  7  3  4  1  5  4  1  4  7  2  3  2  4  5  2 12 12]] [ 1  4  7  2  3  2  4  5  2 11] [11 12 13 14 15 16 17 18 19 20] [0.93424008 0.93424008 0.93424008 0.93424008 0.93424008 0.93424008
 0.93424008 0.93424008 0.93424008 0.93424008]


In [5]:
for emb_size in [8, 16, 32,64]:
    for hidden_size in [32,64,128,256]:
        data = torch.tensor(tensor_chunks, dtype=torch.int64)
        targets = torch.tensor(chunk_targets, dtype=torch.int64)
        lengths = torch.tensor(chunk_lengths, dtype=torch.int64)
        values = torch.tensor(chunk_values, dtype=torch.float32)
        train_size = int(len(data) * 0.85)
        train_data = data[:train_size]
        train_targets = targets[:train_size]
        train_lengths = lengths[:train_size]
        train_values = values[:train_size]
        test_data = data[train_size:]
        test_targets = targets[train_size:]
        test_lengths = lengths[train_size:]
        test_values = values[train_size:]
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        criterion1 = nn.CrossEntropyLoss()
        criterion2 = nn.MSELoss()
        model = LSTMemb(hidden_size=hidden_size, emb_size=emb_size).to(device)
        optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
        epochs=10
        batch_size=64
        for epoch in range(epochs):
            model.train()
            for i in range(0, len(train_data), batch_size):
                optimizer.zero_grad()
                batch_data = train_data[i:i+batch_size].to(device)
                batch_targets = train_targets[i:i+batch_size].to(device)
                batch_lengths = train_lengths[i:i+batch_size].to(device)
                batch_values = train_values[i:i+batch_size].to(device)
                policy, value = model(batch_data, batch_lengths)
                loss1 = criterion1(policy, batch_targets)
                loss2 = criterion2(value.squeeze(), batch_values)
                loss = loss1 + loss2
                loss.backward()
                optimizer.step()
            print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")
            model.eval()
            with torch.no_grad():
                test_policy, test_value = model(test_data.to(device), test_lengths.to(device))
                test_loss1 = criterion1(test_policy, test_targets.to(device))
                test_loss2 = criterion2(test_value.squeeze(), test_values.to(device))
                test_loss = test_loss1 + test_loss2
                print(f"Test Loss: {test_loss.item():.4f}")
                test_policy = F.softmax(test_policy, dim=1)
                test_policy = torch.argmax(test_policy, dim=1)
                accuracy = (test_policy == test_targets.to(device)).float().mean()
                print(f"Test Accuracy: {accuracy.item():.4f}")

            indices = np.arange(len(train_data))
            np.random.shuffle(indices)
            train_data = train_data[indices]
            train_targets = train_targets[indices]
            train_lengths = train_lengths[indices]
            train_values = train_values[indices]


        torch.save(model.state_dict(), f"SFT_pvmodels_{hidden_size}_{emb_size}.pt")



Epoch 1/10, Loss: 1.5891
Test Loss: 1.8988
Test Accuracy: 0.3106
Epoch 2/10, Loss: 1.4048
Test Loss: 1.8974
Test Accuracy: 0.3375
Epoch 3/10, Loss: 1.4258
Test Loss: 1.7580
Test Accuracy: 0.3934
Epoch 4/10, Loss: 1.3809
Test Loss: 1.5701
Test Accuracy: 0.4406
Epoch 5/10, Loss: 1.2065
Test Loss: 1.4984
Test Accuracy: 0.4511
Epoch 6/10, Loss: 1.2658
Test Loss: 1.5246
Test Accuracy: 0.4463
Epoch 7/10, Loss: 1.0089
Test Loss: 1.4755
Test Accuracy: 0.4618
Epoch 8/10, Loss: 0.9102
Test Loss: 1.4330
Test Accuracy: 0.4669
Epoch 9/10, Loss: 1.2081
Test Loss: 1.5412
Test Accuracy: 0.4482
Epoch 10/10, Loss: 0.9824
Test Loss: 1.4366
Test Accuracy: 0.4702
Epoch 1/10, Loss: 1.5668
Test Loss: 1.6869
Test Accuracy: 0.3474
Epoch 2/10, Loss: 1.4997
Test Loss: 1.9685
Test Accuracy: 0.3453
Epoch 3/10, Loss: 1.4851
Test Loss: 1.7994
Test Accuracy: 0.4011
Epoch 4/10, Loss: 1.4410
Test Loss: 1.7904
Test Accuracy: 0.4147
Epoch 5/10, Loss: 1.4301
Test Loss: 1.7327
Test Accuracy: 0.4078
Epoch 6/10, Loss: 1.2096

In [None]:
#Supervised training of RL model
import torch
import torch.nn as nn
import numpy as np
from split_functions import string_to_equipment,equipments_to_strings
embd = True
class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=32, num_layers=2, out_features=12):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
        )
        self.policyhead = nn.Linear(in_features=hidden_size, out_features=out_features)
        self.valuehead = nn.Linear(in_features=hidden_size, out_features=1)

    def forward(self, x, lengths):
        x_packed = nn.utils.rnn.pack_padded_sequence(
            x, lengths, batch_first=True, enforce_sorted=False
        )
        output, (hidden, _) = self.lstm(x_packed.float())
        policy = self.policyhead(hidden[-1])
        value = self.valuehead(hidden[-1])
        return policy, value
if embd:
    class LSTM(nn.Module):
        def __init__(self, input_size=1, hidden_size=32, num_layers=2, out_features=12,emb_size=64):
            super(LSTM, self).__init__()
            self.embedding = nn.Embedding(13,embedding_dim=emb_size, padding_idx=12)
            self.lstm = nn.LSTM(
                input_size=emb_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True,
            )
            self.policyhead = nn.Linear(in_features=hidden_size, out_features=out_features)
            self.valuehead = nn.Linear(in_features=hidden_size, out_features=1)

        def forward(self, x, lengths):
            x = self.embedding(x.squeeze(-1).long())
            x_packed = nn.utils.rnn.pack_padded_sequence(
                x, lengths, batch_first=True, enforce_sorted=False
            )
            output, (hidden, _) = self.lstm(x)
            policy = self.policyhead(hidden[-1])
            value = self.valuehead(hidden[-1])
            return policy, value

equipment = np.load("M2_data_300_8_augmented_layouts.npy", allow_pickle=True)
#input output preparation
input_data = []
output = []
for eq in range(len(equipment)):
    for i in range(1,len(equipment[eq])):
        input_data.append(equipment[eq][:i])
        output.append(equipment[eq][i])
padded_input = np.array([a + [12] * (22 - len(a)) for a in input_data]).reshape(-1, 22, 1)
training_data = np.concatenate((padded_input, np.array(output).reshape(-1, 1, 1)), axis=1)
split = int(0.85 * len(training_data))
train_data = training_data[:split]
val_data = training_data[split:]
x_train = torch.tensor(train_data[:, :-1])
y_train = torch.tensor(train_data[:, -1]).reshape(-1).long()
x_val = torch.tensor(val_data[:, :-1])
y_val = torch.tensor(val_data[:, -1]).reshape(-1).long()
lengths_train = torch.tensor([len(a) for a in input_data[:split]])
lengths_val = torch.tensor([len(a) for a in input_data[split:]])
model = LSTM(emb_size=16)
model.train()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 10
batch_size = 100
best_model  = None
best_loss = np.inf
for epoch in range(epochs):
    epoch_loss = 0
    for i in range(0, len(x_train), batch_size):
        x_batch = x_train[i : i + batch_size]
        y_batch = y_train[i : i + batch_size]
        lengths_batch = lengths_train[i : i + batch_size]
        optimizer.zero_grad()
        policy, value = model(x_batch, lengths_batch)
        loss = criterion(policy, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f"Epoch {epoch} Loss: {epoch_loss/len(x_train):.2f}")
    with torch.no_grad():
        policy, value = model(x_val, lengths_val)
        loss = criterion(policy, y_val)
        #prediction success
        correct = 0
        for i in range(len(policy)):
            if torch.argmax(policy[i]) == y_val[i]:
                correct += 1
        print(f"Validation Accuracy: {correct/len(x_val):.2f}")
        print(f"Validation Loss: {loss.item()/len(x_val):.2f}")
        if loss.item() < best_loss:
            best_loss = loss.item()
            best_model = model.state_dict()
            print("Best Model Updated epoch:", epoch)
    
    indices = np.random.permutation(len(x_train))
    x_train = x_train[indices]
    y_train = y_train[indices]
    lengths_train = lengths_train[indices]
   
    x = torch.tensor([0]).reshape(1,-1,1)
    lengths = torch.tensor(x.shape[1]).reshape(1)
    while x.shape[1] < 22:
        policy, value = model(x,lengths)
        next_token = torch.argmax(nn.Softmax(dim=-1)(policy),dim=-1)
        x = torch.cat((x,next_token.reshape(1,-1,1)),dim=1)
        if next_token.item() ==11:
            break
        lengths = torch.tensor(x.shape[1]).reshape(1)
    print(x.reshape(-1))
torch.save(best_model, "SFT_LSTM_model_x.pt")
torch.save(model.state_dict(), "SFT_LSTM_model_last.pt")

In [None]:
import torch
import torch.nn as nn
import numpy as np
from split_functions import string_to_equipment
embd = True
class LSTM(nn.Module):
    def __init__(self, input_size=1, hidden_size=32, num_layers=2, out_features=12):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
        )
        self.policyhead = nn.Linear(in_features=hidden_size, out_features=out_features)
        self.valuehead = nn.Linear(in_features=hidden_size, out_features=1)

    def forward(self, x, lengths):
        x_packed = nn.utils.rnn.pack_padded_sequence(
            x, lengths, batch_first=True, enforce_sorted=False
        )
        output, (hidden, _) = self.lstm(x_packed.float())
        policy = self.policyhead(hidden[-1])
        value = self.valuehead(hidden[-1])
        return policy, value
if embd:
    class LSTM(nn.Module):
        def __init__(self, input_size=1, hidden_size=32, num_layers=2, out_features=12,emb_size=64):
            super(LSTM, self).__init__()
            self.embedding = nn.Embedding(13,embedding_dim=emb_size, padding_idx=12)
            self.lstm = nn.LSTM(
                input_size=emb_size,
                hidden_size=hidden_size,
                num_layers=num_layers,
                batch_first=True,
            )
            self.policyhead = nn.Linear(in_features=hidden_size, out_features=out_features)
            self.valuehead = nn.Linear(in_features=hidden_size, out_features=1)

        def forward(self, x, lengths):
            x = self.embedding(x.squeeze(-1).long())
            x_packed = nn.utils.rnn.pack_padded_sequence(
                x, lengths, batch_first=True, enforce_sorted=False
            )
            output, (hidden, _) = self.lstm(x_packed)
            policy = self.policyhead(hidden[-1])
            value = self.valuehead(hidden[-1])
            return policy, value
model = LSTM(emb_size=16)
model.load_state_dict(torch.load("SFT_LSTM_model_x.pt"))
model.eval()
generated_designs = []
input_data = [[0]]
for N in range(1000):
    sample_x = torch.tensor(input_data[0]).reshape(1,-1,1)
    sample_lengths = torch.tensor(sample_x.shape[1]).reshape(1)
    while sample_x.shape[1] < 23:
        policy, value = model(sample_x,sample_lengths)
        next_token = torch.multinomial(nn.Softmax(dim=-1)(policy),1).reshape(-1)
        sample_x = torch.cat((sample_x,next_token.reshape(1,-1,1)),dim=1)
        if next_token.item() ==11:
            break
        sample_lengths = torch.tensor(sample_x.shape[1]).reshape(1)
    generated_designs.append(sample_x.reshape(-1).tolist())
from thermo_validity import validity
from split_functions import equipments_to_strings
strings = equipments_to_strings(generated_designs)
valid = validity(strings)
print(valid)
print("Valid/Generated:", len(valid),"/",len(generated_designs))