In [2]:
import numpy as np
import torch
from config import DATA_DIRECTORY
from torch.utils.data import Dataset
from split_functions import string_to_equipment
from ZW_utils import std_classes

results = np.load(DATA_DIRECTORY / "v22DF_m2_sorted_results.npy", allow_pickle=True)
layouts = np.load(DATA_DIRECTORY / "v22DF_m2_sorted_layouts.npy", allow_pickle=True)

In [51]:
class PSI_Dataset(Dataset):
    """
    Design dataset(numpy array) and results (numpy array) is taken as input
    Outputs: torch tensor of design and same shape of result tensor
    """
    def __init__(self, data, results,classes,block_size, training_type="standard"):
        self.base = data
        print("Designs in the dataset:", len(self.base))
        self.data = data
        self.results = results
        self.classes = classes
        self.data = string_to_equipment(self.data, self.classes)
        if training_type == "augmented":
            self.data, self.results = self.augment_data(self.data,self.results)
            print("Data augmented:", len(self.data) - len(self.base))
        self.data = torch.tensor([i + [11] * (block_size - len(i)) for i in self.data])
        self.labels = self.output_prep(self.data,self.results)
        print("Input shape:", self.data.shape, "Output shape:", self.labels.shape)

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]
    
    def augment_data(self, designs,rs):
        augmented = []
        augmented_results = []
        for i,design in enumerate(designs):
            original = np.array(design)
            nognoe = original[1:-1]
            for j in range(1, len(nognoe)):
                new_rep = np.roll(nognoe, j, axis=0)
                augmented.append(
                    np.concatenate((original[0:1], new_rep, original[-1:]), axis=0).tolist()
                )
                augmented_results.append(rs[i])
        return designs + augmented,rs.tolist()+augmented_results
    
    def output_prep(self, data,results):
        outputs = []
        for layout,result in zip(data,results):
            outputs.append(torch.tensor([[result]*i for i in layout.shape]).flatten())
        outputs = torch.stack(outputs)
        return outputs
            
        

In [52]:
a = PSI_Dataset(layouts, results, std_classes, 22, training_type="augmented")


Designs in the dataset: 2041
Data augmented: 23776
Input shape: torch.Size([25817, 22]) Output shape: torch.Size([25817, 22])


(tensor([ 0,  7,  4,  1,  5,  2,  3,  9,  5,  7,  4,  1,  4, 11, 11, 11, 11, 11,
         11, 11, 11, 11]),
 tensor([127.6508, 127.6508, 127.6508, 127.6508, 127.6508, 127.6508, 127.6508,
         127.6508, 127.6508, 127.6508, 127.6508, 127.6508, 127.6508, 127.6508,
         127.6508, 127.6508, 127.6508, 127.6508, 127.6508, 127.6508, 127.6508,
         127.6508], dtype=torch.float64),
 torch.Size([22]),
 torch.Size([22]))

In [13]:
from ZW_dataset import GPTDataset
b = GPTDataset(layouts[:2], std_classes, 22, training_type="augmented")
b.data[0],b.labels[0], b.data[0].shape

Designs in the dataset: 2
Data augmented: 21
Input shape: torch.Size([23, 21])


(tensor([ 0,  7,  4,  1,  5,  2,  3,  9,  5,  7,  4,  1,  4, 11, 11, 11, 11, 11,
         11, 11, 11]),
 tensor([ 7,  4,  1,  5,  2,  3,  9,  5,  7,  4,  1,  4, 11, 11, 11, 11, 11, 11,
         11, 11, 11]),
 torch.Size([21]))