In [1]:
import torchvision.datasets as datasets
import torch
from torch import nn
import ml_collections
from tqdm import tqdm

# Download dataset
train_data = datasets.Flowers102(root='./flower-102/train', split='train', download=True)
val_data = datasets.Flowers102(root='./flower-102/val', split='val', download=True)
test_data = datasets.Flowers102(root='./flower-102/test', split='test', download=True)

In [2]:
dtype = torch.cuda.FloatTensor
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Model Implementations:

In [3]:
# List of available pretrained resnets from pytorch
class Pretrains():
    resnet_versions = [
        'resnet18',
        'resnet34',
        'resnet50',
        'resnet101',
        'resnet152'
    ]
    vgg_versions = [
        'vgg11',
        'vgg11_bn',
        'vgg13',
        'vgg13_bn',
        'vgg16',
        'vgg16_bn',
        'vgg19',
        'vgg19_bn'
    ]

class PretrainBackbone(nn.Module):
    def __init__(self, config):
        super(ResNetBackbone, self).__init__()
        
        # Load pretrained ResNet/VGG backbone
        if config.pretrain in Pretrains.resnet_versions or config.resnet_version in Pretrains.vgg_versions:
            model = torch.hub.load('pytorch/vision:v0.10.0', config.pretrain, pretrained=True)
        else:
            raise ValueError('Invalid ResNet/VGG Version. Please select from: ' 
                             + ', '.join(Pretrains.resnet_versions + Pretrains.vgg_versions))
        
        # Segments out only the backbone layers as list, unpacks, and load into nn.Sequential
        backbone_layers = list(model.children())[:-1]
        self.backbone = nn.Sequential(*backbone_layers)
        
    def forward(self, x):
        x = self.backbone(x)
        return x

class ActivationFunction(nn.Module):
    def __init__(self, config):
        super(ActivationFunction, self).__init__()
        match config.type:
            case 'LeakyReLU':
                self.activation_func = nn.LeakyReLU(
                    config.negative_slope,
                    inplace = True
                )
            case 'ReLU':
                self.activation_func = nn.ReLU(inplace = True)
            case 'Softmax':
                self.activation_func = nn.Softmax(dim = self.dim)
            case _:
                raise ValueError('Invalid activation function or not implemented')
    def forward(self, x):
        return self.activation_func(x)

class Conv2dBlock(nn.Module):
    def __init__(self, config):
        super(Conv2dBlock, self).__init__()
        modules = []
        if config.layer_num < 1:
            raise ValueError('Number of layers cannot be less than 1')
        for layer_idx in range(config.layer_num):
            # Conv2d
            modules.append(nn.Conv2d(
                config.in_channels if not layer_idx else config.out_channels,
                config.out_channels,
                kernel_size = 3,
                padding = 1
            ))
            
            # Batch Normalization
            if config.use_batchnorm:
                modules.append(nn.BatchNorm2d(config.out_channels))
                
            # Activation function, skip this step if skip_last_activation is True
            if config.skip_last_activation and layer_idx == config.layer_num - 1:
                break   
            modules.append(ActivationFunction(config.activation_func))
        self.sequential = nn.Sequential(*modules)
        
    def forward(self, x):
        return self.sequential(x)

# Creates a mirrored Conv2dBlock
class RevConv2dBlock(nn.Module):
    def __init__(self, conv2d_block):
        super(RevConv2dBlock, self).__init__()
        
        # Reverses module from conv2d_block
        modules = list(conv2d_block.sequential)
        modules.reverse()
        module_iterator = iter(range(len(modules)))
        for idx in module_iterator:
            if isinstance(modules[idx], torch.nn.modules.batchnorm.BatchNorm2d):
                
                # Switch order of batch and conv2d
                modules[idx], modules[idx + 1] = modules[idx + 1], modules[idx]
                
                # Swap conv2d with convtranspose2d
                modules[idx] = nn.ConvTranspose2d(
                    modules[idx].out_channels,
                    modules[idx].in_channels,
                    kernel_size = modules[idx].kernel_size,
                    stride = modules[idx].stride,
                    padding = modules[idx].padding
                )
                
                modules[idx + 1] = nn.BatchNorm2d(modules[idx].out_channels)
                
                # Skip next index
                next(module_iterator)
            
        if isinstance(modules[0], ActivationFunction):
            activation_func = modules.pop(0)
            modules.append(activation_func)
            
        self.sequential = nn.Sequential(*modules)
        
    def forward(self, x):
        return self.sequential(x)
    
class VGGBackboneBlock(nn.Module):
    def __init__(self, config):
        super(VGGBackboneBlock, self).__init__()
        config.skip_last_activation = False
        
        # Conv2d
        self.conv2d_block = Conv2dBlock(config)
        
        # Maxpool
        self.maxpool = nn.MaxPool2d(
            kernel_size=config.compression_ratio, 
            stride=config.compression_ratio
        )
    
    def forward(self, x):
        out = self.conv2d_block(x)
        out = self.maxpool(out)
        return out
    
    def get_reverse(self):
        return RevVGGBackconeBlock(self)
    
class RevVGGBackconeBlock(nn.Module):
    def __init__(self, vgg_backbone_block):
        super(VGGBackboneBlock).__init__()
        # To be implemented
    def forward(self, x):
        # To be implemented
        return x
    
class ResidualBlock(nn.Module):
    def __init__(self, config):
        super(ResidualBlock, self).__init__()
        
        # Main Conv2d block
        main_block_config = config
        main_block_config.layer_num = config.main_layer_num
        main_block_config.skip_last_activation = True
        self.main_block = Conv2dBlock(main_block_config)
        
        # Shortcut Conv2d block, we leave self.shortcut_block as undefined if shortcut layer depth = 0
        if config.shortcut_layer_num:
            shortcut_block_config = config
            shortcut_block_config.layer_num = config.shortcut_layer_num
            shortcut_block_config.skip_last_activation = True
            self.shortcut_block = Conv2dBlock(shortcut_block_config)
            
        self.activation_func = ActivationFunction(config.activation_func)
        
        # Optional maxpooling layer if compression_ratio is set
        if hasattr(config, 'compression_ratio'):
            self.maxpool = nn.MaxPool2d(
                kernel_size=config.compression_ratio, 
                stride=config.compression_ratio
        )
    
    def forward(self, x):
        out = self.main_block(x)
        if hasattr(self, 'shortcut_block'):
            out += self.shortcut_block(x)
        else:
            out += x
            
        out = self.activation_func(out)
            
        if hasattr(self, 'maxpool'):
            out = self.maxpool(out)
            
        return out

    def get_reverse(self):
        # Get reversed version
        return RevResidualBlock(self)

class RevResidualBlock(nn.Module):
    def __init__(self, residual_block):
        super(RevResidualBlock, self).__init__()
        self.main_block = RevConv2dBlock(residual_block.main_block)
        
        if hasattr(residual_block, 'shortcut_block'):
            self.shortcut_block = RevConv2dBlock(residual_block.shortcut_block)
            
        if hasattr(residual_block, 'maxpool'):
            self.upsample = nn.Upsample(scale_factor=residual_block.maxpool.stride)
            
    def forward(self, x):
        if hasattr(self, 'upsample'):
            x = self.upsample(x)
        else:
            x = x
            
        out = self.main_block(x)
        
        if hasattr(self, 'shortcut_block'):
            out += self.shortcut_block(x)
        else:
            out += x
        return out
    
class Encoder(nn.Module):
    def __init__(self, config):
        super(Encoder, self).__init__()
        modules = []
        match config.type:
            case 'residual_blocks':
                for idx, block_feature in enumerate(config.features):
                    if not idx:
                        in_channels = config.in_channels
                        out_channels = block_feature
                    else:
                        in_channels = config.features[idx - 1]
                        out_channels = block_feature

                    block_config = ml_collections.ConfigDict({
                        'main_layer_num': config.main_layer_num,
                        'shortcut_layer_num': config.shortcut_layer_num,
                        'in_channels': in_channels,
                        'out_channels': out_channels,
                        'use_batchnorm': config.use_batchnorm,
                        'activation_func': config.activation_func,
                    })
                    if hasattr(config, 'compression_ratio'):
                        block_config.compression_ratio = config.compression_ratio

                    modules.append(ResidualBlock(block_config))
            case 'vgg_backbone_blocks':
                # To be implemented
                raise NotImplementedError('To be implemented')
                
        self.sequential = nn.Sequential(*modules)
    def forward(self, x):
        return self.sequential(x)
        
class Decoder(nn.Module):
    def __init__(self, arg):
        super(Decoder, self).__init__()
        # Initialize by mirroring encoder
        if isinstance(arg, Encoder):
            encoder = arg
            modules = list(encoder.sequential)
            modules.reverse()
            
            for idx in range(len(modules)):
                modules[idx] = modules[idx].get_reverse()
            self.sequential = nn.Sequential(*modules)
        # Initialize by config (not implemented since we are using mirrored encoder/decoder)
        else:
            raise NotImplementedError('This decoder class is only implemented to be initialized by mirroring an encoder class')
    def forward(self, x):
        return self.sequential(x)
    
class AutoEncoder(nn.Module):
    def __init__(self, config):
        super(AutoEncoder, self).__init__()
        # Encoder
        encoder_config = config.encoder_config
        encoder_config.in_channels = config.in_channels
        self.encoder = Encoder(encoder_config)
        
        # Check for bottleneck input size by passing dummy input to encoder
        dummy_input = torch.randn(1, config.in_channels, config.in_dimension[0], config.in_dimension[1])
        out = self.encoder.forward(dummy_input)
        out_dimension = list(out.size())
        in_bottleneck = out_dimension[1] * out_dimension[2] * out_dimension[3]
        
        # Bottleneck
        self.bottleneck = nn.Linear(in_bottleneck, config.bottleneck_width)
        
        # Bottleneck output reshaper
        self.decoder_in_shape = out_dimension
        self.bottleneck_reshape = nn.Conv2d(config.bottleneck_width, out_dimension[1], kernel_size=1)
        
        # Decoder
        self.decoder = Decoder(self.encoder)
        
    def forward(self, x):
        # Encoder
        out = self.encoder(x)
        
        # Reshape to fit bottleneck
        encoder_out_shape = out.size() 
        flatten = out.view(out.size(0), -1)
        
        # Bottleneck
        out = self.bottleneck(flatten)
        
        # Reshape to fit decoder
        out_reshaped = out.view(out.size()[0], out.size()[1], 1, 1)
        out = self.bottleneck_reshape(out_reshaped)
        out = nn.AdaptiveAvgPool2d((self.decoder_in_shape[2], self.decoder_in_shape[3]))(out)
        
        # Decoder
        out = self.decoder(out)
        
        return out

### Parameters

In [None]:
dimension = 128

### Test:

In [20]:
config_dict = {
    'in_dimension': (dimension, dimension),
    'in_channels': 3,
    'encoder_config': {
        'type': 'residual_blocks',
        'compression_ratio': 2,
        'features': [64, 128, 256, 512, 512, 512],
        'main_layer_num': 3,
        'shortcut_layer_num': 1,
        'use_batchnorm': True,
        'activation_func': {
            'type': 'LeakyReLU',
            'negative_slope': 0.1
        },
    },
    'decoder_config': {
        'mirror_encoder': True
    },
    'bottleneck_width': 256
}


test_config = ml_collections.ConfigDict(config_dict)

autoencoder = AutoEncoder(test_config).cuda()
autoencoder.forward(torch.randn(1, 3, 128, 128).type(dtype)).size()

torch.Size([1, 3, 128, 128])

In [5]:
train_data

Dataset Flowers102
    Number of datapoints: 1020
    Root location: ./flower-102/train
    split=train

### Hyperparameters

In [15]:
import copy

# All hyperparameters to be tuned for the autoencoder network
configs_dict = {
    'in_dimension': [(dimension, dimension)],
    'in_channels': [3],
    'encoder_config': {
        'type': ['residual_blocks'],
        'compression_ratio': [2],
        'features': [
            [64, 128, 256, 512, 512, 512],
            [64, 128, 256, 512, 512],
            [64, 128, 256],
            [32, 64, 128, 256, 512, 512, 512],
            [32, 64, 128, 256, 512, 512],
            [32, 64, 128, 256, 512],
            [16, 32, 64, 128, 256, 512, 512],
            [16, 32, 64, 128, 256, 512],
            [16, 32, 64, 128, 256],
        ],
        'main_layer_num': [3, 2],
        'shortcut_layer_num': [1],
        'use_batchnorm': [True, False],
        'activation_func':[
            {
                'type': 'LeakyReLU',
                'negative_slope': 0.1
            },
            {
                'type': 'LeakyReLU',
                'negative_slope': 0.2
            },
            {
                'type': 'ReLU',
            }
        ] 
            
    },
    'decoder_config': {
        'mirror_encoder': [True]
    },
    'bottleneck_width': [512, 1024, 2048]
}
configs = ml_collections.ConfigDict(configs_dict)

# Parse ConfigDict with hyperparameters to be tuned and output list of all ConfigDicts to be tested
def generate_configs(configs):
    config_list = [ml_collections.ConfigDict()]
    for key in configs:
        new_config_list = []
        
        current_key_configs = []
        if isinstance(configs[key], list):
            current_key_configs = configs[key]
        elif isinstance(configs[key], ml_collections.config_dict.config_dict.ConfigDict):
            current_key_configs = generate_configs(configs[key])
        else:
            raise TypeError(configs[key] + ' is neither a list nor an ml_collections.ConfigDict object')
#         print(key, current_key_configs)
        for key_config in current_key_configs:
            for prev_config in config_list:
                prev_config_copy = copy.deepcopy(prev_config)
                prev_config_copy[key] = key_config
                new_config_list.append(prev_config_copy)
        config_list = new_config_list
    return config_list

config_list = generate_configs(configs)        

### Dataset

In [24]:
import gc
import random
from torchvision.datasets import ImageFolder
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, Subset

class AdaptiveAvgPool2dTransform:
    def __init__(self, output_size):
        self.to_tensor = transforms.ToTensor()
        self.pool = nn.AdaptiveAvgPool2d(output_size)

    def __call__(self, img):
        tensor = self.to_tensor(img)
        return self.pool(tensor)


def load_images(root_dir, transform=None, num_images_per_category=1, selection_mode='first'):
    if transform is None:
        transform = transforms.Compose([
            AdaptiveAvgPool2dTransform((dimension, dimension))
        ])
    
    # Load full dataset
    full_dataset = ImageFolder(root=root_dir, transform=transform)
    
    # Select first or random 20 images per category
    indices = []
    targets = [item[1] for item in full_dataset.imgs]
    unique_targets = set(targets)
    
    for target in unique_targets:
        target_indices = [i for i, t in enumerate(targets) if t == target]
        
        if selection_mode == 'first':
            selected_indices = target_indices[:num_images_per_category]
        elif selection_mode == 'random':
            selected_indices = random.sample(target_indices, min(num_images_per_category, len(target_indices)))
        else:
            raise ValueError(f"Invalid selection_mode: {selection_mode}. Choose 'first' or 'random'.")
        
        indices.extend(selected_indices)
    
    # Create a subset of the dataset based on the selected indices
    subset_dataset = Subset(full_dataset, indices)
    
    return subset_dataset

from sklearn.model_selection import KFold
import torch.optim as optim


# Define parameters
learning_rate = 0.001
num_folds = 5
epochs = 20
batch_size = 16

# Define loss criterion
criterion = torch.nn.MSELoss()
learning_rate = 0.001

# Define KFold
kfold = KFold(n_splits=num_folds, shuffle=True)

# Load data
root_dir = './Flowers299'
selected_dataset = load_images(root_dir, num_images_per_category=1)

# Store epoch losses for each config and fold
train_losses = {}
val_losses = {}

for config in tqdm(config_list, desc="Configurations", position=0):
    # Initialize a dict for current config
    train_losses[str(config)] = {}
    val_losses[str(config)] = {}

    for fold, (train_ids, val_ids) in enumerate(kfold.split(selected_dataset)):
        # Fold progress bar
        fold_pbar = tqdm(total=epochs, desc=f"Fold {fold+1}/{num_folds}", position=1, leave=False)
        
        # Sample elements randomly from a given list of ids, no replacement
        train_subsampler = Subset(selected_dataset, train_ids)
        val_subsampler = Subset(selected_dataset, val_ids)
        
        # Define data loaders for training and testing data for current fold
        trainloader = DataLoader(train_subsampler, batch_size=batch_size, shuffle=True)
        valloader = DataLoader(val_subsampler, batch_size=batch_size, shuffle=False)
        
        # Clear CUDA cache
        gc.collect()
        torch.cuda.empty_cache()
        
        # Initialize the AutoEncoder with current config
        model = AutoEncoder(config).cuda()
        
        # Define optimizer
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        
        # Store epoch losses for current fold
        train_losses_for_fold = []
        val_losses_for_fold = []

        for epoch in range(epochs):
            # Train model
            model.train()
            train_loss = 0.0
            for data in trainloader:
                inputs, _ = data  # Ignoring category labels
                inputs = inputs.type(dtype)
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, inputs)
                loss.backward()
                optimizer.step()
                train_loss += loss.item()

            # Validate model
            model.eval()
            val_loss = 0.0
            with torch.no_grad():
                for data in valloader:
                    inputs, _ = data  # Ignoring category labels
                    inputs = inputs.type(dtype)
                    outputs = model(inputs)
                    loss = criterion(outputs, inputs)
                    val_loss += loss.item()

            # Append epoch loss
            train_losses_for_fold.append(train_loss / len(trainloader))
            val_losses_for_fold.append(val_loss / len(valloader))
            
            # Update fold progress bar
            fold_pbar.set_postfix({"Epoch Val Loss": val_loss / len(valloader)})
            fold_pbar.update(1)
        
        fold_pbar.close()

        # Update epoch losses for current fold
        train_losses[str(config)][fold] = train_losses_for_fold
        val_losses[str(config)][fold] = val_losses_for_fold

Configurations:   0%|                                                                          | 0/324 [00:00<?, ?it/s]
Fold 1/5:   0%|                                                                                 | 0/10 [00:00<?, ?it/s][A
                                                                                                                       [A
Fold 1/5:   0%|                                                               | 0/10 [00:07<?, ?it/s, Epoch Loss=0.363][A
Fold 1/5:  10%|█████▌                                                 | 1/10 [00:07<01:06,  7.35s/it, Epoch Loss=0.363][A
Fold 1/5:  10%|█████▌                                                 | 1/10 [00:13<01:06,  7.35s/it, Epoch Loss=0.206][A
Fold 1/5:  20%|███████████                                            | 2/10 [00:13<00:54,  6.86s/it, Epoch Loss=0.206][A
Fold 1/5:  20%|███████████                                            | 2/10 [00:20<00:54,  6.86s/it, Epoch Loss=0.168][A
Fold 1/5:  30%|████

KeyboardInterrupt: 

In [21]:
import time
import gc
num_batch = 128
batch_sizes = [4, 8, 16, 32, 64]
time_taken = []
for batch_size in tqdm(batch_sizes):
    torch.cuda.empty_cache()
    autoencoder = AutoEncoder(test_config).cuda()
    start_time = time.time()
    for batch in range(int(num_batch/batch_size)):
        autoencoder.forward(torch.randn(batch_size, 3, 128, 128).type(dtype))
    time_taken.append((batch_size, time.time() - start_time))

100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:10<00:00,  2.11s/it]


In [22]:
for data_point in time_taken:
    print(f'Batch Size {data_point[0]}: {data_point[1]}')

Batch Size 4: 0.6248235702514648
Batch Size 8: 0.47405552864074707
Batch Size 16: 0.43006157875061035
Batch Size 32: 1.019606113433838
Batch Size 64: 4.961598634719849


In [20]:
import gc
gc.collect()
torch.cuda.empty_cache()

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


### WIP Stuffs