In [None]:
import torch
import torch.nn as nn
import torchvision.models as models

# Define the encoder network using pre-trained VGG16
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        vgg16 = models.vgg16(pretrained=True)
        self.features = nn.Sequential(*list(vgg16.features.children()))
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.fc = nn.Linear(512 * 7 * 7, 4096)

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# Define the decoder network
class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc = nn.Linear(input_size, output_size)

    def forward(self, x):
        x = self.fc(x)
        return x

# Create the encoder and decoder networks
encoder = Encoder()
decoders = []
params = ['param1', 'param2', 'param3']  # Replace with actual parameter names

for param in params:
    decoder = Decoder(4096, output_size)  # Replace output_size with the actual size of the parameter
    decoders.append(decoder)

# Load the input image
image_path = './datasets/test_dataset/images/image.jpg'  # Replace with the actual image path
image = torch.randn(1, 1, 224, 224)  # Replace with the actual image tensor

# Encode the image
embedding = encoder(image)

# Decode the parameters
decoded_params = []
for decoder in decoders:
    decoded_param = decoder(embedding)
    decoded_params.append(decoded_param)

# Print the decoded parameters
for param, decoded_param in zip(params, decoded_params):
    print(f'{param}: {decoded_param}')


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
import yaml
from sklearn.preprocessing import MinMaxScaler
from PIL import Image
import os

# Define your custom dataset class
class CustomDataset(Dataset):
    def __init__(self, image_folder, params_folder, normalization_ranges, transform=None):
        self.image_folder = image_folder
        self.params_folder = params_folder
        self.normalization_ranges = normalization_ranges
        self.transform = transform

        self.image_filenames = [filename for filename in os.listdir(image_folder) if filename.endswith(".png")]

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_filename = self.image_filenames[idx]

        # Load image
        img_path = os.path.join(self.image_folder, img_filename)
        img = Image.open(img_path).convert('L')  # Convert to grayscale
        if self.transform:
            img = self.transform(img)

        # Load corresponding parameters
        param_filename = os.path.splitext(img_filename)[0] + ".yml"
        param_path = os.path.join(self.params_folder, param_filename)
        with open(param_path, "r") as param_file:
            param_data = yaml.load(param_file, Loader=yaml.FullLoader)

        # Normalize parameters based on the provided ranges
        for key, value in param_data.items():
            if key in self.normalization_ranges:
                if self.normalization_ranges[key]['type'] == 'float':
                    param_data[key] = (value - self.normalization_ranges[key]['min']) / (self.normalization_ranges[key]['max'] - self.normalization_ranges[key]['min'])
                elif self.normalization_ranges[key]['type'] == 'int':
                    param_data[key] = (value - self.normalization_ranges[key]['min']) / (self.normalization_ranges[key]['max'] - self.normalization_ranges[key]['min'])
                elif self.normalization_ranges[key]['type'] == 'vector':
                    for dim in ['x', 'y', 'z']:
                        param_data[key][dim] = (value[dim] - self.normalization_ranges[key][f'{dim}min']) / (self.normalization_ranges[key][f'{dim}max'] - self.normalization_ranges[key][f'{dim}min'])
                elif self.normalization_ranges[key]['type'] == 'states':
                    # Convert states to one-hot encoding
                    states = self.normalization_ranges[key]['values']
                    param_data[key] = [1 if state == value else 0 for state in states]
                elif self.normalization_ranges[key]['type'] == 'bool':
                    # Convert bool to 0 or 1
                    param_data[key] = 1 if value else 0

        return img, param_data


# Define the custom decoder network
class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Load dataset and normalization ranges
dataset_path = "./datasets/test_dataset"
image_folder = os.path.join(dataset_path, "images")
params_folder = os.path.join(dataset_path, "params")
ranges_file = os.path.join(dataset_path, "ranges.yml")

with open(ranges_file, "r") as file:
    normalization_ranges = yaml.load(file, Loader=yaml.FullLoader)

# Create dataset instance
transform = transforms.Compose([
    transforms.Resize((512, 512)),
    transforms.ToTensor()
])

custom_dataset = CustomDataset(image_folder, params_folder, normalization_ranges, transform)

# Create DataLoader
batch_size = 32
dataloader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)

# Define VGG model as encoder
vgg_model = models.vgg16(pretrained=True)
vgg_model.features[0] = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
encoder = vgg_model.features

# Create decoder instance
decoder_input_size = 512  # Adjust based on the output of the encoder
decoder_output_size = your_output_size  # Adjust based on your parameter output size
decoder = Decoder(decoder_input_size, decoder_output_size)

# Define your model
class YourModel(nn.Module):
    def __init__(self, encoder, decoder):
        super(YourModel, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)  # Flatten the output of the encoder
        x = self.decoder(x)
        return x

# Instantiate your model
model = YourModel(encoder, decoder)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 10

for epoch in range(epochs):
    for batch in dataloader:
        images, params = batch

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Compute the loss
        loss = criterion(outputs, params)

        # Backward pass
        loss.backward()

        # Update weights
        optimizer.step()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}")

# Save your trained model if needed
torch.save(model.state_dict(), "your_model.pth")


In [12]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        # Example VGG-like architecture for demonstration
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(), 
            nn.Linear(256 * 28 * 28, 4096)
        )

    def forward(self, x):
        x = self.features(x)
        return x
model = Encoder()
# check output size
x = torch.randn(1, 1, 224, 224)
output = model(x)
print(output.size())  # torch.Size([1, 256, 28, 28])

torch.Size([1, 4096])


In [1]:
# Encoder-Decoders (own)

import torch
import torch.nn as nn
import torch.optim as optim

# Define Encoder using VGG (you may replace it with your desired encoder)
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        # Example VGG-like architecture for demonstration
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(256 * 28 * 28, 4096)
        )

    def forward(self, x):
        x = self.features(x)
        return x

# Define Decoder using a simple 3-layer MLP
class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(512, 256)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(256, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Complete model with one Encoder and multiple Decoders
class EncoderDecoderModel(nn.Module):
    def __init__(self, encoder, decoder_count, decoder_output_size):
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoders = nn.ModuleList([Decoder(4096, decoder_output_size) for _ in range(decoder_count)])

    def forward(self, x):
        x = self.encoder(x)
        decoder_outputs = [decoder(x) for decoder in self.decoders]
        return decoder_outputs

# Example usage
if __name__ == '__main__':
    # Create an instance of the EncoderDecoderModel
    encoder = Encoder()
    decoder_count = 3
    decoder_output_size = 10  # Change this to your desired output size
    model = EncoderDecoderModel(encoder, decoder_count, decoder_output_size)

    # Example input tensor with shape (batch_size, channels, height, width)
    input_tensor = torch.randn(2, 1, 224, 224)

    # Forward pass
    output = model(input_tensor)

    # Print the output shapes for each decoder
    for i, decoder_output in enumerate(output):
        print(f"Decoder {i + 1} output shape: {decoder_output.shape}")
        print(decoder_output)


Decoder 1 output shape: torch.Size([2, 10])
tensor([[ 0.0023, -0.0169,  0.0799,  0.0611, -0.0463, -0.0411,  0.0378, -0.0634,
          0.0316, -0.0387],
        [ 0.0023, -0.0129,  0.0778,  0.0571, -0.0494, -0.0460,  0.0412, -0.0643,
          0.0299, -0.0415]], grad_fn=<AddmmBackward0>)
Decoder 2 output shape: torch.Size([2, 10])
tensor([[ 0.0568,  0.0541,  0.0270, -0.0402,  0.0625, -0.0515,  0.0577, -0.0658,
          0.0035, -0.0482],
        [ 0.0597,  0.0535,  0.0264, -0.0321,  0.0657, -0.0503,  0.0589, -0.0611,
          0.0045, -0.0452]], grad_fn=<AddmmBackward0>)
Decoder 3 output shape: torch.Size([2, 10])
tensor([[ 0.0510,  0.0615, -0.0405,  0.0265,  0.0909, -0.0070, -0.0178, -0.0118,
         -0.0439,  0.0020],
        [ 0.0467,  0.0568, -0.0375,  0.0309,  0.0885, -0.0070, -0.0252, -0.0161,
         -0.0382,  0.0054]], grad_fn=<AddmmBackward0>)


In [19]:
# Encoder-decoders (VGG16 Encoder)


import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

# Define Encoder using a pre-trained VGG16
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        vgg16 = models.vgg16(pretrained=True)
        # Remove the fully connected layers to get features only
        self.features = nn.Sequential(*list(vgg16.features.children()))

        # Set parameters of the encoder to be non-trainable
        for param in self.features.parameters():
            param.requires_grad = False

    def forward(self, x):
        x = self.features(x)
        return x

# Define Decoder using a simple 3-layer MLP
class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Complete model with one Pre-trained Encoder and multiple Decoders
class EncoderDecoderModel(nn.Module):
    def __init__(self, encoder, decoder_count, decoder_output_size):
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoders = nn.ModuleList([Decoder(512 * 16 * 16, decoder_output_size) for _ in range(decoder_count)])

    def forward(self, x):
        x = self.encoder(x)
        decoder_outputs = [decoder(x.view(x.size(0), -1)) for decoder in self.decoders]
        return decoder_outputs

# Example usage
if __name__ == '__main__':
    # Create an instance of the EncoderDecoderModel with a pre-trained encoder
    encoder = Encoder()
    decoder_count = 3
    decoder_output_size = 10  # Change this to your desired output size
    model = EncoderDecoderModel(encoder, decoder_count, decoder_output_size)

    # Example input tensor with shape (batch_size, channels, height, width)
    input_tensor = torch.randn(2, 3, 512, 512)

    # Forward pass
    output = model(input_tensor)

    # Print the output shapes for each decoder
    for i, decoder_output in enumerate(output):
        print(f"Decoder {i + 1} output shape: {decoder_output.shape}")


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /home/zsy/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:20<00:00, 26.5MB/s] 


Decoder 1 output shape: torch.Size([2, 10])
Decoder 2 output shape: torch.Size([2, 10])
Decoder 3 output shape: torch.Size([2, 10])


In [29]:
import torch
import torch.nn as nn

import torch.optim as optim
import torchvision.models as models

# Define Encoder using a pre-trained VGG16
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        vgg16 = models.vgg16(pretrained=True)
        # Remove the fully connected layers to get features only
        self.features = nn.Sequential(*list(vgg16.features.children()))

        # Set parameters of the encoder to be non-trainable
        for param in self.features.parameters():
            param.requires_grad = False

    def forward(self, x):
        x = self.features(x)
        return x

class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Load the ranges from the YAML file
import yaml

with open('./datasets/test_dataset/ranges.yml', 'r') as file:
    ranges = yaml.safe_load(file)

# Create a mapping between parameter names and output sizes
parameter_output_mapping = {}
for param_name, param_specs in ranges.items():
    if param_specs['type'] == 'float':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'int':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'vector':
        parameter_output_mapping[param_name] = 3  # 3 for x, y, z
    elif param_specs['type'] == 'states':
        parameter_output_mapping[param_name] = len(param_specs['values'])
    elif param_specs['type'] == 'bool':
        parameter_output_mapping[param_name] = 2  # 2 for binary encoding



# Create decoders based on the mapping
decoders = nn.ModuleDict({
    param_name: Decoder(512, output_size)
    for param_name, output_size in parameter_output_mapping.items()
})

# Complete model with one Pre-trained Encoder and multiple Decoders
class EncoderDecoderModel(nn.Module):
    def __init__(self, encoder, decoders):
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoders = decoders

    def forward(self, x):
        x = self.encoder(x)
        print(x.size())
        decoder_outputs = {param_name: decoder(x.view(x.size(0), -1))
                           for param_name, decoder in self.decoders.items()}
        return decoder_outputs

# Example usage

# Create an instance of the EncoderDecoderModel with a pre-trained encoder
encoder = Encoder()  # Use your pre-trained encoder here
model = EncoderDecoderModel(encoder, decoders)

# Example input tensor with shape (batch_size, channels, height, width)
input_tensor = torch.randn(2, 3, 512, 512)

# Forward pass
output = model(input_tensor)

# Print the output shapes for each decoder
for param_name, decoder_output in output.items():
    print(f"{param_name} decoder output shape: {decoder_output.shape}")


torch.Size([2, 512, 16, 16])


RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x131072 and 512x256)

In [34]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import yaml

# Define Encoder using a pre-trained VGG16
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        vgg16 = models.vgg16(pretrained=True)
        # Remove the fully connected layers to get features only
        self.features = nn.Sequential(*list(vgg16.features.children()))

        # Set parameters of the encoder to be non-trainable
        for param in self.features.parameters():
            param.requires_grad = False

    def forward(self, x):
        x = self.features(x)
        return x

class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Load the ranges from the YAML file
with open('./datasets/test_dataset/ranges.yml', 'r') as file:
    ranges = yaml.safe_load(file)

# Create a mapping between parameter names and output sizes
parameter_output_mapping = {}
for param_name, param_specs in ranges.items():
    if param_specs['type'] == 'float':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'int':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'vector':
        parameter_output_mapping[param_name] = 3  # 3 for x, y, z
    elif param_specs['type'] == 'states':
        parameter_output_mapping[param_name] = len(param_specs['values'])
    elif param_specs['type'] == 'bool':
        parameter_output_mapping[param_name] = 2  # 2 for binary encoding

# Create decoders based on the mapping
decoders = nn.ModuleDict({
    param_name: Decoder(512, output_size)
    for param_name, output_size in parameter_output_mapping.items()
})

# Complete model with one Pre-trained Encoder and multiple Decoders
class EncoderDecoderModel(nn.Module):
    def __init__(self, encoder, decoders):
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoders = decoders

    def forward(self, x):
        x = self.encoder(x)
        batch_size = x.size(0)  # Get the batch size
        x = x.view(batch_size, -1)  # Flatten the feature tensor, considering the batch size
        decoder_outputs = {param_name: decoder(x) for param_name, decoder in self.decoders.items()}
        return decoder_outputs

# Example usage

# Create an instance of the EncoderDecoderModel with a pre-trained encoder
encoder = Encoder()  # Use your pre-trained encoder here
model = EncoderDecoderModel(encoder, decoders)

# Example input tensor with shape (batch_size, channels, height, width)
input_tensor = torch.randn(2, 3, 224, 224)

# Forward pass
output = model(input_tensor)

# Print the output shapes for each decoder
for param_name, decoder_output in output.items():
    print(f"{param_name} decoder output shape: {decoder_output.shape}")


RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x25088 and 512x256)

In [None]:
# Encoder-Decoders (own)

import torch
import torch.nn as nn
import torch.optim as optim

# Define Encoder using VGG (you may replace it with your desired encoder)
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        # Example VGG-like architecture for demonstration
        self.features = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(256 * 28 * 28, 4096)
        )

    def forward(self, x):
        x = self.features(x)
        return x

# Define Decoder using a simple 3-layer MLP
class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 512)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(512, 256)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(256, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Complete model with one Encoder and multiple Decoders
class EncoderDecoderModel(nn.Module):
    def __init__(self, encoder, decoder_count, decoder_output_size):
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoders = nn.ModuleList([Decoder(4096, decoder_output_size) for _ in range(decoder_count)])

    def forward(self, x):
        x = self.encoder(x)
        decoder_outputs = [decoder(x) for decoder in self.decoders]
        return decoder_outputs

# Example usage
if __name__ == '__main__':
    # Create an instance of the EncoderDecoderModel
    encoder = Encoder()
    decoder_count = 3
    decoder_output_size = 10  # Change this to your desired output size
    model = EncoderDecoderModel(encoder, decoder_count, decoder_output_size)

    # Example input tensor with shape (batch_size, channels, height, width)
    input_tensor = torch.randn(2, 1, 224, 224)

    # Forward pass
    output = model(input_tensor)

    # Print the output shapes for each decoder
    for i, decoder_output in enumerate(output):
        print(f"Decoder {i + 1} output shape: {decoder_output.shape}")
        print(decoder_output)


In [None]:
# Encoder-Decoders (own)

import yaml

import torch
import torch.nn as nn
import torch.optim as optim

# Define Encoder using VGG (you may replace it with your desired encoder)
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        # Example VGG-like architecture for demonstration
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),  # size: 512x512x16
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 256x256x16
            nn.Conv2d(16, 32, kernel_size=3, padding=1),  # size: 256x256x32
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 128x128x32
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # size: 128x128x64
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 64x64x64
            nn.Flatten(),  # size: 64x64x64
            nn.Linear(64 * 64 * 64, 4096)  # size: 4096
        )

    def forward(self, x):
        x = self.features(x)
        return x

# Define Decoder using a simple 3-layer MLP
class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Load the ranges from the YAML file
with open('./ranges.yml', 'r') as file:
    ranges = yaml.safe_load(file)

# Create a mapping between parameter names and output sizes
parameter_output_mapping = {}
for param_name, param_specs in ranges.items():
    if param_specs['type'] == 'float':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'int':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'vector':
        parameter_output_mapping[param_name] = 3  # 3 for x, y, z
    elif param_specs['type'] == 'states':
        parameter_output_mapping[param_name] = len(param_specs['values'])
    elif param_specs['type'] == 'bool':
        parameter_output_mapping[param_name] = 2  # 2 for binary encoding

# Create decoders based on the mapping
decoders = nn.ModuleDict({
    param_name: Decoder(512, output_size)
    for param_name, output_size in parameter_output_mapping.items()
})

# Complete model with one Pre-trained Encoder and multiple Decoders
class EncoderDecoderModel(nn.Module):
    def __init__(self, encoder, decoders):
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoders = decoders

    def forward(self, x):
        x = self.encoder(x)
        print(x.size())
        batch_size = x.size(0)  # Get the batch size
        x = x.view(batch_size, -1)  # Flatten the feature tensor, considering the batch size
        decoder_outputs = {param_name: decoder(x) for param_name, decoder in self.decoders.items()}
        return decoder_outputs

# Example usage
# Create an instance of the EncoderDecoderModel with a pre-trained encoder
encoder = Encoder()  # Use your pre-trained encoder here
model = EncoderDecoderModel(encoder, decoders)

# Example input tensor with shape (batch_size, channels, height, width)
input_tensor = torch.randn(2, 1, 512, 512)

# Forward pass
output = model(input_tensor)

# Print the output shapes for each decoder
for param_name, decoder_output in output.items():
    print(f"{param_name} decoder output shape: {decoder_output.shape}")


In [None]:
# Encoder-Decoders (own) working from colab

import yaml

import torch
import torch.nn as nn
import torch.optim as optim

# Define Encoder using VGG (you may replace it with your desired encoder)
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),  # size: 512x512
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 256x256
            nn.Conv2d(16, 32, kernel_size=3, padding=1),  # size: 256x256
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 128x128
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # size: 128x128 
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 64x64
            nn.Flatten(),  # size: 64x64
            nn.Linear(64 * 64 * 64, 4096)  # size: 4096
        )

    def forward(self, x):
        x = self.features(x)
        return x

# Define Decoder using a simple 3-layer MLP
class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Load the ranges from the YAML file
with open('./ranges.yml', 'r') as file:
    ranges = yaml.safe_load(file)

# Create a mapping between parameter names and output sizes
parameter_output_mapping = {}
for param_name, param_specs in ranges.items():
    if param_specs['type'] == 'float':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'int':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'vector':
        parameter_output_mapping[param_name] = 3  # 3 for x, y, z
    elif param_specs['type'] == 'states':
        parameter_output_mapping[param_name] = len(param_specs['values'])
    elif param_specs['type'] == 'bool':
        parameter_output_mapping[param_name] = 2  # 2 for binary encoding

# Create decoders based on the mapping
decoders = nn.ModuleDict({
    param_name: Decoder(4096, output_size)
    for param_name, output_size in parameter_output_mapping.items()
})

# Complete model with one Pre-trained Encoder and multiple Decoders
class EncoderDecoderModel(nn.Module):
    def __init__(self, encoder, decoders):
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoders = decoders

    def forward(self, x):
        x = self.encoder(x)
        print(x.size())
        batch_size = x.size(0)  # Get the batch size
        x = x.view(batch_size, -1)  # Flatten the feature tensor, considering the batch size
        decoder_outputs = {param_name: decoder(x) for param_name, decoder in self.decoders.items()}
        return decoder_outputs

# Example usage
# Create an instance of the EncoderDecoderModel with a pre-trained encoder
encoder = Encoder()  # Use your pre-trained encoder here
model = EncoderDecoderModel(encoder, decoders)

# Example input tensor with shape (batch_size, channels, height, width)
input_tensor = torch.randn(2, 1, 512, 512)

# Forward pass
output = model(input_tensor)

# Print the output shapes for each decoder
for param_name, decoder_output in output.items():
    print(f"{param_name} decoder output shape: {decoder_output.shape}")


In [None]:
class DAGDataset(torch.utils.data.Dataset):
    def __init__(self, dataset_name: str, datasets_folder: str="./datasets", transform=None):
        self.dataset_name = dataset_name
        self.datasets_folder = datasets_folder
        self.dataset_path = os.path.join(self.datasets_folder, self.dataset_name)
        self.images_folder = os.path.join(self.dataset_path, "images")
        self.params_folder = os.path.join(self.dataset_path, "params")
        self.ranges_file_path = os.path.join(self.dataset_path, "ranges.yml")
        self.ranges = None
        self.transform = transforms.Compose(
            [transforms.Resize((512, 512)), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
            ) if transform is None else transform
        self.data = self.load_data()

    def load_data(self):
        # Load the ranges from the YAML file
        with open(self.ranges_file_path, 'r') as file:
            self.ranges = yaml.safe_load(file)
        # read images and parameters
        data = []
        for image_name in os.listdir(self.images_folder):
            image_path = os.path.join(self.images_folder, image_name)
            param_path = os.path.join(self.params_folder, os.path.splitext(image_name)[0] + ".yml")
            with open(param_path, 'r') as file:
                param = yaml.safe_load(file)
            # normalize
            param = self.preprocess(param)
            data.append((image_path, param))
        return data

    def preprocess(self, param):
        processed_param = {}
        # for float and vector: normalize with min max
        # for states, bool: convert to one hot
        # for ints: treat as float, but round back to int when saving as param
        for param_name, param_spec in self.ranges.items():
            if param_spec['type'] == 'float' or param_spec['type'] == 'int' or param_spec['type'] == 'vector':
                processed_param[param_name] = self.normalize(param[param_name], param_spec)
            elif param_spec['type'] == 'states' or param_spec['type'] == 'bool':
                processed_param[param_name] = self.one_hot(param[param_name], param_spec)
            else: 
                raise ValueError(f"Unsupported parameter type: {param_spec['type']}")
        return processed_param
    
    def normalize(self, value, param_spec):
        if param_spec['type'] == 'float' or param_spec['type'] == 'int':
            return (value - param_spec['min']) / (param_spec['max'] - param_spec['min'])
        elif param_spec['type'] == 'vector':
            return [(value[i] - param_spec[f'{dim}min']) / (param_spec[f'{dim}max'] - param_spec[f'{dim}min']) for i, dim in enumerate(['x', 'y', 'z'])]
        else: 
            raise ValueError(f"Unsupported parameter type: {param_spec['type']}")

    def one_hot(self, value, param_spec):
        if param_spec['type'] == 'states':
            index = param_spec['values'].index(value)
            return [1 if i == index else 0 for i in range(len(param_spec['values']))]
        elif param_spec['type'] == 'bool':
            # make bools onehot too to make it consistent
            return [1, 0] if value else [0, 1]
        else:
            raise ValueError(f"Unsupported parameter type: {param_spec['type']}")

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample, target = self.data[idx]
        sample = Image.open(sample).convert('L')

        if self.transform:
            sample = self.transform(sample)

        # convert target's values to tensor if it's not already
        for key, value in target.items():
            if not isinstance(value, torch.Tensor):
                target[key] = torch.tensor(value, dtype=torch.float32)

        return sample, target

In [None]:
# Create an instance of the DAGDataset
dataset = DAGDataset("test_dataset")

# split into train val and test
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Create DataLoader
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Define model
encoder = Encoder()
decoders = nn.ModuleDict({
    param_name: Decoder(4096, output_size)
    for param_name, output_size in parameter_output_mapping.items()
})
model = EncoderDecoderModel(encoder, decoders)

# Define loss function and optimizer
# for regression, use MSELoss, for classification, use CrossEntropyLoss
class EncDecsLoss(nn.Module):
    def __init__(self, decoders):
        super(EncDecsLoss, self).__init__()
        self.decoders = decoders

    def forward(self, outputs, targets):
        loss = 0.0
        for param_name, decoder_output in outputs.items():
            decoder = self.decoders[param_name]
            loss += decoder_loss(decoder_output, targets[param_name])
        return loss

def decoder_loss(decoder_output, target):
    # Define your decoder-specific loss function here
    # For example, you can use mean squared error (MSE) loss for regression
    # or cross-entropy loss for classification
    if decoder_output.size(-1) == 1:
        # get rid of unnecessary dimension
        decoder_output = decoder_output.squeeze(-1)
        return nn.MSELoss()(decoder_output, target)
    else:
        return nn.CrossEntropyLoss()(decoder_output, target)

criterion = EncDecsLoss(decoders)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop with train and val
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 2000:.3f}")
            running_loss = 0.0
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for i, data in enumerate(val_loader, 0):
            inputs, targets = data
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()
        print(f"Validation loss: {val_loss / len(val_loader)}")

print("Finished Training")

# Save your trained model if needed
torch.save(model.state_dict(), "encDecModel.pth")

# Test the model
model.eval()
test_loss = 0.0
with torch.no_grad():
    for i, data in enumerate(test_loader, 0):
        inputs, targets = data
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()
    print(f"Test loss: {test_loss / len(test_loader)}")
print("Finished Testing")