In [7]:
from ZW_utils import std_classes, dataloading
from ZW_dataset import PSI_Dataset
import numpy as np
from config import DATA_DIRECTORY
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
from split_functions import uniqueness_check


classes = std_classes
data_split_ratio = 0.85
batch_size = 8
max_epochs = 30
learning_rate = 1e-3
block_size = 22
n_embd = 32  # 32
n_head = 4  # 4
n_layer = 2  # 2
dropout = 0.1  # 0.1
vocab_size = len(classes)
criterion = nn.MSELoss()

In [8]:
# layouts = np.load(DATA_DIRECTORY/"v22DF_m2_sorted_layouts.npy", allow_pickle=True)
# results = np.load(DATA_DIRECTORY/"v22DF_m2_sorted_results.npy", allow_pickle=True)
layouts = np.load("GPT_NA_psitest/M2_data_8_layouts.npy", allow_pickle=True)
results = np.load("GPT_NA_psitest/M2_data_8_results.npy", allow_pickle=True)
l2 = []
r2 = []
cutoff = 143.957
for i, r in enumerate(results):
    if r > 0 and r < cutoff:
        l2.append(layouts[i])
        r2.append(r)
layouts = np.asanyarray(l2)
results = np.asanyarray(r2)
print("layouts", layouts.shape)

designs, equipments = uniqueness_check(layouts)
sorted_equipments = equipments.copy()
sorted_equipments.sort()
sorted_results = []
for se in sorted_equipments:
    index = equipments.index(se)
    sorted_results.append(results[index])
eq_array = np.zeros((len(sorted_equipments), 22))
for i, e in enumerate(sorted_equipments):
    for j, u in enumerate(e):
        eq_array[i, j] = u
re_array = np.array(sorted_results)
equipment_chunks = []
results_chunks = []
for equipment in sorted_equipments:
    for i in range(len(equipment)):
        candidate_chunk = equipment[: i + 1]
        if candidate_chunk not in equipment_chunks:
            equipment_chunks.append(candidate_chunk)
            # checking the same chunks in eq array
            chunk_indices = np.where(
                (eq_array[:, : i + 1] == candidate_chunk).all(axis=1)
            )[0]
            chunk_results = np.min(re_array[chunk_indices])
            results_chunks.append(chunk_results)
print(25,equipment_chunks[25], results_chunks[25])

layouts (3121,)
25 [0, 1, 2, 1, 2, 3, 5, 4, 1, 5, 4] 143.66786601745056


In [9]:
lengths = torch.tensor([x for x in map(len, equipment_chunks)])
max_length = max(lengths)
input_data = np.ones((len(equipment_chunks), max_length)) * 12
for i, e in enumerate(equipment_chunks):
    input_data[i, : len(e)] = e
input_data = torch.tensor(input_data)
target_data = torch.tensor(results_chunks).float().reshape(-1, 1)
print(input_data.shape, target_data.shape)

torch.Size([15773, 19]) torch.Size([15773, 1])


In [10]:
# normalizing the target data to be between 0 and 1
# print(target_data.min().item(), target_data.max().item())
target_data = (target_data - target_data.min()) / (target_data.max() - target_data.min())
# standardizing the target data
# target_data = (target_data - target_data.mean()) / target_data.std()


In [11]:
indices = torch.randperm(len(input_data))
input_data = input_data[indices]
target_data = target_data[indices]
lengths = lengths[indices]
train_data = input_data[: int(0.85 * len(input_data))]
train_target = target_data[: int(0.85 * len(input_data))]
train_lengths = lengths[: int(0.85 * len(input_data))]
val_data = input_data[int(0.85 * len(input_data)) :]
val_target = target_data[int(0.85 * len(input_data)) :]
val_lengths = lengths[int(0.85 * len(input_data)) :]
print(train_data[25], train_target[25], train_lengths[25])

tensor([ 0.,  1.,  2.,  3.,  7.,  5.,  4.,  1.,  7.,  1.,  5.,  2.,  3.,  9.,
        11., 12., 12., 12., 12.], dtype=torch.float64) tensor([0.5131]) tensor(15)


In [12]:
class MLP(nn.Module):
    def __init__(self, hidden_size):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(21, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, 1),
        )

    def forward(self, x):
        mask = (x != 12).float()
        x = x * mask
        x = self.model(x)
        return x


# LSTM model with masked input where the token is 12 (padding)
class LSTM(nn.Module):
    def __init__(self, hidden_size):
        super(LSTM, self).__init__()
        self.lstm = nn.LSTM(21, hidden_size, num_layers=2, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        mask = (x != 12).float()
        x = x * mask
        x, _ = self.lstm(x)
        x = self.fc(x)
        return x


class LSTM_packed(nn.Module):
    def __init__(self, embd_size,hidden_size):
        super(LSTM_packed, self).__init__()
        self.embedding = nn.Embedding(13, embd_size)
        self.lstm = nn.LSTM(embd_size, hidden_size, num_layers=2, batch_first=True, dropout=0.1)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x, lengths):
        x = self.embedding(x.long())
        x = nn.utils.rnn.pack_padded_sequence(
            x, lengths, batch_first=True, enforce_sorted=False
        )
        output, (hidden, _) = self.lstm(x)
        x = self.fc(hidden[-1])
        return x

In [13]:
for embd_size in [128]:
    for hidden_size in [1024]:
        batch_size = 4
        patience = 10
        model = LSTM_packed(embd_size,hidden_size)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
        best_loss = 1e9
        for epoch in range(max_epochs+1):
            model.train()
            epoch_loss = 0
            for i in range(0, len(train_data), batch_size):
                input_batch = train_data[i : i + batch_size]
                target_batch = train_target[i : i + batch_size]
                lengths_batch = train_lengths[i : i + batch_size]
                optimizer.zero_grad()
                output = model(input_batch, lengths_batch)
                loss = criterion(output, target_batch)
                loss.backward()
                optimizer.step()
                epoch_loss += loss.item()
            epoch_loss /= len(train_data) / batch_size
            
            indices = torch.randperm(len(train_data))
            train_data = train_data[indices]
            train_target = train_target[indices]
            train_lengths = train_lengths[indices]

            model.eval()
            val_loss = 0
            with torch.no_grad():
                for i in range(0, len(val_data), batch_size):
                    input_batch = val_data[i : i + batch_size]
                    target_batch = val_target[i : i + batch_size]
                    lengths_batch = val_lengths[i : i + batch_size]
                    output = model(input_batch, lengths_batch)
                    loss = criterion(output, target_batch)
                    val_loss += loss.item()
                val_loss /= len(val_data) / batch_size
            if val_loss < best_loss:
                best_model_epoch = epoch
                best_loss = val_loss
                best_model = model.state_dict()
                patience = 10
            else:
                patience -= 1
            if patience == 0:
                break
            # print(f"Epoch {epoch} Training Loss: {epoch_loss:.2f} Validation Loss: {val_loss:.2f}")
            #random prediction
            random_index = np.random.randint(0, len(val_data))
            random_input = val_data[random_index]
            random_target = val_target[random_index]
            random_length = val_lengths[random_index]
            random_output = model(random_input.unsqueeze(0), random_length.unsqueeze(0))
            # print(f"Target: {random_target.item():.2f} Prediction: {random_output.item():.2f} Error: {abs(random_target.item() - random_output.item())/random_target.item()*100:.2f}")
        torch.save(best_model, f"psi_norm_min_{embd_size}_{hidden_size}_{batch_size}_{cutoff}_8.pt")
        # best model prediction and mean error
        model.load_state_dict(best_model)
        model.eval()
        print("Best Model Prediction",embd_size,hidden_size)
        print("batch_size",batch_size,"found epoch",best_model_epoch)
        mean_error = 0
        with torch.no_grad():
            for i in range(0, len(val_data)):
                input_batch = val_data[i].unsqueeze(0)
                target_batch = val_target[i].unsqueeze(0)
                lengths_batch = val_lengths[i].unsqueeze(0)
                output = model(input_batch, lengths_batch)
                if target_batch.item() == 0:
                    continue
                mean_error += ((torch.abs(output - target_batch))/torch.abs(target_batch)).item()
            mean_error /= len(val_data)
            print(f"Mean Error: {mean_error*100:.2f}%")
            for i in range(5):
                random_index = np.random.randint(0, len(val_data))
                random_input = val_data[random_index]
                random_target = val_target[random_index]
                random_length = val_lengths[random_index]
                random_output = model(random_input.unsqueeze(0), random_length.unsqueeze(0))
                print(f"Target: {random_target.item():.2f} Prediction: {random_output.item():.2f} Error: {abs((random_target.item() - random_output.item())/random_target.item())*100:.2f}")


Best Model Prediction 128 1024
batch_size 4 found epoch 27
Mean Error: 5.25%
Target: 0.50 Prediction: 0.51 Error: 1.80
Target: 0.67 Prediction: 0.70 Error: 4.15
Target: 0.84 Prediction: 0.85 Error: 1.12
Target: 0.79 Prediction: 0.81 Error: 1.97
Target: 0.57 Prediction: 0.60 Error: 4.59


In [8]:
# # best model prediction and mean error
# embd_size = 128
# hidden_size = 1024
# batch_size = 100
# cutoff = 143.957
# model = LSTM_packed(embd_size,hidden_size)
# model.load_state_dict(torch.load(f"psi_norm_{embd_size}_{hidden_size}_100_1.pt"))
# model.eval()
# print("Best Model Prediction",embd_size,hidden_size)
# # print("batch_size",batch_size,"found epoch",best_model_epoch)
# mean_error = 0
# with torch.no_grad():
#     for i in range(0, len(input_data)):
#         input_batch = input_data
#         target_batch = target_data
#         lengths_batch = lengths
#         output = model(input_batch, lengths_batch)
#         if target_batch.item() == 0:
#             continue
#         mean_error += ((torch.abs(output - target_batch))/torch.abs(target_batch)).item()
#     mean_error /= len(input_data)
#     print(f"Mean Error: {mean_error*100:.2f}%")


#     mean_val = np.mean(val_target.numpy())
#     predicted = np.zeros(len(val_data))
#     ssres = 0 
#     for i in range(len(val_data)):
#         # random_index = np.random.randint(0, len(val_data))
#         random_index = i
#         random_input = val_data[random_index]
#         random_target = val_target[random_index]
#         random_length = val_lengths[random_index]
#         random_output = model(random_input.unsqueeze(0), random_length.unsqueeze(0))
#         predicted[i] = random_output.item()
#         ssres += (random_target - random_output)**2
#         # print(f"Target: {random_target.item():.2f} Prediction: {random_output.item():.2f} Error: {abs((random_target.item() - random_output.item())/random_target.item())*100:.2f}")
#     sstot = np.sum((val_target.numpy() - mean_val)**2)
#     r2 = 1 - ssres/sstot
#     print("R2",r2)