In [None]:
# Import necessary libraries
import os
import cv2
import numpy as np
import yaml
from tensorflow.keras import layers, models, optimizers, callbacks
from sklearn.preprocessing import MinMaxScaler

# Load dataset and normalization ranges
dataset_path = "./datasets/test_dataset"
image_folder = os.path.join(dataset_path, "images")
params_folder = os.path.join(dataset_path, "params")
ranges_file = os.path.join(dataset_path, "ranges.yml")

with open(ranges_file, "r") as file:
    normalization_ranges = yaml.load(file, Loader=yaml.FullLoader)

# Function to load and preprocess the dataset
def load_dataset(image_folder, params_folder, normalization_ranges):
    images = []
    params = []

    for filename in os.listdir(image_folder):
        if filename.endswith(".png"):
            # Load and preprocess images
            img_path = os.path.join(image_folder, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (512, 512))  # Resize to your desired input size
            img = img / 255.0  # Normalize to [0, 1]
            images.append(img)

            # Load corresponding parameters
            param_file_path = os.path.join(params_folder, f"{os.path.splitext(filename)[0]}.yml")
            with open(param_file_path, "r") as param_file:
                param_data = yaml.load(param_file, Loader=yaml.FullLoader)
                params.append(param_data)

    # Convert lists to numpy arrays
    images = np.array(images)
    params = np.array(params)

    # Normalize parameters using MinMaxScaler
    scaler = MinMaxScaler()
    params_normalized = scaler.fit_transform(params)

    return images, params_normalized

# Load the dataset
images, params_normalized = load_dataset(image_folder, params_folder, normalization_ranges)

# Define the VGG model as encoder
# You can load the pre-trained VGG model from Keras applications
from tensorflow.keras.applications import VGG16
vgg_model = VGG16(weights='imagenet', include_top=False, input_shape=(512, 512, 3))
encoder_output = vgg_model.output

# Add your own decoder layers
decoder_input = layers.Flatten()(encoder_output)
decoder_output = layers.Dense(units=your_output_units, activation='your_activation_function')(decoder_input)

# Create your model
model = models.Model(inputs=vgg_model.input, outputs=decoder_output)

# Compile your model with the appropriate loss and optimizer
model.compile(optimizer='your_optimizer', loss='your_loss_function')

# Train your model using images and normalized parameters
model.fit(images, params_normalized, epochs=your_epochs, batch_size=your_batch_size, validation_split=your_validation_split)


In [None]:
import yaml

import torch
import torch.nn as nn
import torch.optim as optim

In [None]:
# Define Encoder using VGG (you may replace it with your desired encoder)
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        # Example VGG-like architecture for demonstration
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),  # size: 512x512
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 256x256
            nn.Conv2d(16, 32, kernel_size=3, padding=1),  # size: 256x256
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 128x128
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # size: 128x128
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 64x64
            nn.Flatten(),  # size: 64x64
            nn.Linear(64 * 64 * 64, 4096)  # size: 4096
        )

    def forward(self, x):
        x = self.features(x)
        return x

model = 1

In [None]:
# Encoder-Decoders (own)

import yaml

import torch
import torch.nn as nn
import torch.optim as optim

# Define Encoder using VGG (you may replace it with your desired encoder)
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),  # size: 512x512
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 256x256
            nn.Conv2d(16, 32, kernel_size=3, padding=1),  # size: 256x256
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 128x128
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # size: 128x128
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 64x64
            nn.Flatten(),  # size: 64x64
            nn.Linear(64 * 64 * 64, 4096)  # size: 4096
        )

    def forward(self, x):
        x = self.features(x)
        return x

# Define Decoder using a simple 3-layer MLP
class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Load the ranges from the YAML file
with open('./ranges.yml', 'r') as file:
    ranges = yaml.safe_load(file)

# Create a mapping between parameter names and output sizes
parameter_output_mapping = {}
for param_name, param_specs in ranges.items():
    if param_specs['type'] == 'float':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'int':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'vector':
        parameter_output_mapping[param_name] = 3  # 3 for x, y, z
    elif param_specs['type'] == 'states':
        parameter_output_mapping[param_name] = len(param_specs['values'])
    elif param_specs['type'] == 'bool':
        parameter_output_mapping[param_name] = 2  # 2 for binary encoding

# Create decoders based on the mapping
decoders = nn.ModuleDict({
    param_name: Decoder(4096, output_size)
    for param_name, output_size in parameter_output_mapping.items()
})

# Complete model with one Pre-trained Encoder and multiple Decoders
class EncoderDecoderModel(nn.Module):
    def __init__(self, encoder, decoders):
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoders = decoders

    def forward(self, x):
        x = self.encoder(x)
        print(x.size())
        batch_size = x.size(0)  # Get the batch size
        x = x.view(batch_size, -1)  # Flatten the feature tensor, considering the batch size
        decoder_outputs = {param_name: decoder(x) for param_name, decoder in self.decoders.items()}
        return decoder_outputs

# Example usage
# Create an instance of the EncoderDecoderModel with a pre-trained encoder
encoder = Encoder()  # Use your pre-trained encoder here
model = EncoderDecoderModel(encoder, decoders)

# Example input tensor with shape (batch_size, channels, height, width)
input_tensor = torch.randn(2, 1, 512, 512)

# Forward pass
output = model(input_tensor)

# Print the output shapes for each decoder
for param_name, decoder_output in output.items():
    print(f"{param_name} decoder output shape: {decoder_output.shape}")


torch.Size([2, 4096])
Bm Base Shape decoder output shape: torch.Size([2, 2])
Bm Size decoder output shape: torch.Size([2, 3])
Floor Ledge Extrusion X decoder output shape: torch.Size([2, 1])
Floor Ledge Extrusion Z decoder output shape: torch.Size([2, 1])
Floor Ledge Size X decoder output shape: torch.Size([2, 1])
Floor Ledge Size Z decoder output shape: torch.Size([2, 1])
Has Floor Ledge decoder output shape: torch.Size([2, 2])
Has Window Ledge decoder output shape: torch.Size([2, 2])
Num Floors decoder output shape: torch.Size([2, 1])
Num Windows Each Side decoder output shape: torch.Size([2, 1])
Rf Base Shape decoder output shape: torch.Size([2, 3])
Rf Size decoder output shape: torch.Size([2, 3])
Window Divided Horizontal decoder output shape: torch.Size([2, 2])
Window Divided Vertical decoder output shape: torch.Size([2, 2])
Window Interpanel Offset Percentage Y decoder output shape: torch.Size([2, 1])
Window Interpanel Offset Percentage Z decoder output shape: torch.Size([2, 1])


In [1]:
from google.colab import files
uploaded = files.upload()

Saving test_dataset0202.zip to test_dataset0202.zip


In [None]:
!unzip test_dataset0202.zip

In [None]:
!mkdir datasets

In [5]:
!mv test_dataset/ datasets/

In [6]:
# Encoder-Decoders

import yaml

import torch
import torch.nn as nn
import torch.optim as optim

# Define Encoder using VGG (you may replace it with your desired encoder)
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),  # size: 512x512
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 256x256
            nn.Conv2d(16, 32, kernel_size=3, padding=1),  # size: 256x256
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 128x128
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # size: 128x128
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 64x64
            nn.Flatten(),  # size: 64x64
            nn.Linear(64 * 64 * 64, 4096)  # size: 4096
        )

    def forward(self, x):
        x = self.features(x)
        return x

# Define Decoder using a simple 3-layer MLP
class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

# Load the ranges from the YAML file
with open('./ranges.yml', 'r') as file:
    ranges = yaml.safe_load(file)

# Create a mapping between parameter names and output sizes
parameter_output_mapping = {}
for param_name, param_specs in ranges.items():
    if param_specs['type'] == 'float':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'int':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'vector':
        parameter_output_mapping[param_name] = 3  # 3 for x, y, z
    elif param_specs['type'] == 'states':
        parameter_output_mapping[param_name] = len(param_specs['values'])
    elif param_specs['type'] == 'bool':
        parameter_output_mapping[param_name] = 2  # 2 for binary encoding

# Create decoders based on the mapping
decoders = nn.ModuleDict({
    param_name: Decoder(4096, output_size)
    for param_name, output_size in parameter_output_mapping.items()
})

# Complete model with one Pre-trained Encoder and multiple Decoders
class EncoderDecoderModel(nn.Module):
    def __init__(self, encoder, decoders):
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoders = decoders

    def forward(self, x):
        x = self.encoder(x)
        print(x.size())
        batch_size = x.size(0)  # Get the batch size
        x = x.view(batch_size, -1)  # Flatten the feature tensor, considering the batch size
        decoder_outputs = {param_name: decoder(x) for param_name, decoder in self.decoders.items()}
        return decoder_outputs

# Example usage
# Create an instance of the EncoderDecoderModel with a pre-trained encoder
# encoder = Encoder()  # Use your pre-trained encoder here
# model = EncoderDecoderModel(encoder, decoders)

# # Example input tensor with shape (batch_size, channels, height, width)
# input_tensor = torch.randn(2, 1, 512, 512)

# # Forward pass
# output = model(input_tensor)

# # Print the output shapes for each decoder
# for param_name, decoder_output in output.items():
#     print(f"{param_name} decoder output shape: {decoder_output.shape}")


KeyboardInterrupt: 

# Formal

In [1]:
import yaml
import os
import yaml
from PIL import Image

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms

In [2]:
class Encoder(nn.Module):
    def __init__(self):
        super(Encoder, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),  # size: 512x512
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 256x256
            nn.Conv2d(16, 32, kernel_size=3, padding=1),  # size: 256x256
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 128x128
            nn.Conv2d(32, 64, kernel_size=3, padding=1),  # size: 128x128
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  # size: 64x64
            nn.Flatten(),  # size: 64x64
            nn.Linear(64 * 64 * 64, 4096)  # size: 4096
        )

    def forward(self, x):
        x = self.features(x)
        return x

In [3]:
class Decoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(Decoder, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

In [4]:
class EncoderDecoderModel(nn.Module):
    def __init__(self, encoder, decoders):
        super(EncoderDecoderModel, self).__init__()
        self.encoder = encoder
        self.decoders = decoders

    def forward(self, x):
        x = self.encoder(x)
        batch_size = x.size(0)  # Get the batch size
        x = x.view(batch_size, -1)  # Flatten the feature tensor, considering the batch size
        decoder_outputs = {param_name: decoder(x) for param_name, decoder in self.decoders.items()}
        return decoder_outputs

In [5]:
class DAGDataset(torch.utils.data.Dataset):
    def __init__(self, dataset_name: str, datasets_folder: str="./datasets", transform=None):
        self.dataset_name = dataset_name
        self.datasets_folder = datasets_folder
        self.dataset_path = os.path.join(self.datasets_folder, self.dataset_name)
        self.images_folder = os.path.join(self.dataset_path, "images")
        self.params_folder = os.path.join(self.dataset_path, "params")
        self.ranges_file_path = os.path.join(self.dataset_path, "ranges.yml")
        self.ranges = None
        self.transform = transforms.Compose(
            [transforms.Resize((512, 512)), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
            ) if transform is None else transform
        self.data = self.load_data()

    def load_data(self):
        # Load the ranges from the YAML file
        with open(self.ranges_file_path, 'r') as file:
            self.ranges = yaml.safe_load(file)
        # read images and parameters
        data = []
        for image_name in os.listdir(self.images_folder):
            image_path = os.path.join(self.images_folder, image_name)
            param_path = os.path.join(self.params_folder, os.path.splitext(image_name)[0] + ".yml")
            with open(param_path, 'r') as file:
                param = yaml.safe_load(file)
            # normalize
            param = self.preprocess(param)
            data.append((image_path, param))
        return data

    def preprocess(self, param):
        processed_param = {}
        # for float and vector: normalize with min max
        # for states, bool: convert to one hot
        # for ints: treat as float, but round back to int when saving as param
        for param_name, param_spec in self.ranges.items():
            if param_spec['type'] == 'float' or param_spec['type'] == 'int' or param_spec['type'] == 'vector':
                processed_param[param_name] = self.normalize(param[param_name], param_spec)
            elif param_spec['type'] == 'states' or param_spec['type'] == 'bool':
                processed_param[param_name] = self.one_hot(param[param_name], param_spec)
            else:
                raise ValueError(f"Unsupported parameter type: {param_spec['type']}")
        return processed_param

    def normalize(self, value, param_spec):
        if param_spec['type'] == 'float' or param_spec['type'] == 'int':
            return (value - param_spec['min']) / (param_spec['max'] - param_spec['min'])
        elif param_spec['type'] == 'vector':
            return [(value[i] - param_spec[f'{dim}min']) / (param_spec[f'{dim}max'] - param_spec[f'{dim}min']) for i, dim in enumerate(['x', 'y', 'z'])]
        else:
            raise ValueError(f"Unsupported parameter type: {param_spec['type']}")

    def one_hot(self, value, param_spec):
        if param_spec['type'] == 'states':
            index = param_spec['values'].index(value)
            return [1 if i == index else 0 for i in range(len(param_spec['values']))]
        elif param_spec['type'] == 'bool':
            # make bools onehot too to make it consistent
            return [1, 0] if value else [0, 1]
        else:
            raise ValueError(f"Unsupported parameter type: {param_spec['type']}")


    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        sample, target = self.data[idx]
        sample = Image.open(sample).convert('L')

        if self.transform:
            sample = self.transform(sample)

        # convert target's values to tensor
        for key, value in target.items():
            target[key] = torch.tensor(value, dtype=torch.float32)

        return sample, target

In [6]:
dataset = DAGDataset("test_dataset")

In [7]:
dataset.__getitem__(0)

(tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.]]]),
 {'Bm Base Shape': tensor([0., 1.]),
  'Bm Size': tensor([0.2754, 0.3033, 0.5714]),
  'Floor Ledge Extrusion X': tensor(0.7033),
  'Floor Ledge Extrusion Z': tensor(0.8891),
  'Floor Ledge Size X': tensor(0.4263),
  'Floor Ledge Size Z': tensor(0.1645),
  'Has Floor Ledge': tensor([1., 0.]),
  'Has Window Ledge': tensor([1., 0.]),
  'Num Floors': tensor(0.7500),
  'Num Windows Each Side': tensor(0.2500),
  'Rf Base Shape': tensor([0., 1., 0.]),
  'Rf Size': tensor([0.5063, 0.8231, 0.9543]),
  'Window Divided Horizontal': tensor([1., 0.]),
  'Window Divided Vertical': tensor([0., 1.]),
  'Window Interpanel Offset Percentage Y': tensor(0.8371),
  'Window Interpanel Offset Percentage Z': tensor(0.9874),
  'Window Ledge Extr

In [8]:
# split into train val and test
train_size = int(0.8 * len(dataset))
val_size = int(0.1 * len(dataset))
test_size = len(dataset) - train_size - val_size
train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

# Create DataLoader
batch_size = 32
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [9]:
# Load the ranges from the YAML file
with open('./datasets/test_dataset/ranges.yml', 'r') as file:
    ranges = yaml.safe_load(file)

# Create a mapping between parameter names and output sizes
parameter_output_mapping = {}
for param_name, param_specs in ranges.items():
    if param_specs['type'] == 'float':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'int':
        parameter_output_mapping[param_name] = 1  # 1 for scalar
    elif param_specs['type'] == 'vector':
        parameter_output_mapping[param_name] = 3  # 3 for x, y, z
    elif param_specs['type'] == 'states':
        parameter_output_mapping[param_name] = len(param_specs['values'])
    elif param_specs['type'] == 'bool':
        parameter_output_mapping[param_name] = 2  # 2 for binary encoding

encoder = Encoder()
decoders = nn.ModuleDict({
    param_name: Decoder(4096, output_size)
    for param_name, output_size in parameter_output_mapping.items()
})
model = EncoderDecoderModel(encoder, decoders)

In [10]:
# Define loss function and optimizer
# for regression, use MSELoss, for classification, use CrossEntropyLoss
class EncDecsLoss(nn.Module):
    def __init__(self, decoders):
        super(EncDecsLoss, self).__init__()
        self.decoders = decoders

    def forward(self, outputs, targets):
        loss = 0.0
        for param_name, decoder_output in outputs.items():
            decoder = self.decoders[param_name]
            loss += decoder_loss(decoder_output, targets[param_name])
        return loss

def decoder_loss(decoder_output, target):
    # Define your decoder-specific loss function here
    # For example, you can use mean squared error (MSE) loss for regression
    # or cross-entropy loss for classification
    # print(target)
    if decoder_output.size(-1) == 1:
        return nn.MSELoss()(decoder_output, target)
    else:
        return nn.CrossEntropyLoss()(decoder_output, target)

criterion = EncDecsLoss(decoders)
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training loop with train and val
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, targets = data
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
            print(f"[{epoch + 1}, {i + 1}] loss: {running_loss / 2000:.3f}")
            running_loss = 0.0
    # Validation
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for i, data in enumerate(val_loader, 0):
            inputs, targets = data
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            val_loss += loss.item()
        print(f"Validation loss: {val_loss / len(val_loader)}")

print("Finished Training")

# Save your trained model if needed
torch.save(model.state_dict(), "encDecModel.pth")

  target[key] = torch.tensor(value, dtype=torch.float32)
  return F.mse_loss(input, target, reduction=self.reduction)


In [None]:
# Test the model
model.eval()
test_loss = 0.0
with torch.no_grad():
    for i, data in enumerate(test_loader, 0):
        inputs, targets = data
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        test_loss += loss.item()
    print(f"Test loss: {test_loss / len(test_loader)}")
print("Finished Testing")