# Dataloader
The dataloader needs to support the following:  
[x] Data Augmentations.  
[x] Load CIFAR10 dataset through numpy.   
[x] Bag creation procedures.  
    [x] ucc1
    [x] ucc1-4

In [None]:
import numpy as np
import torch

dataset = np.load("./Data/splitted_cifar10_dataset.npz")

x_test, x_val, x_train, = torch.from_numpy(dataset['x_test']), torch.from_numpy(dataset['x_val']), torch.from_numpy(dataset['x_train'])
y_test, y_train, y_val = torch.from_numpy(dataset['y_test']), torch.from_numpy(dataset['y_train']), torch.from_numpy(dataset['y_val'])

import torch
from torchvision.transforms import v2

transforms = v2.Compose([
    v2.Resize((224,224)),
    v2.ToDtype(torch.float32)
])

from torch.utils.data import Dataset, DataLoader

class CIFAR10(Dataset):
    def __init__(self, image_tensors, image_labels, transform=None, target_transform=None):
        self.image_tensors = image_tensors
        self.image_labels = image_labels
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.image_labels)

    def __getitem__(self, idx):
        # Convert from HWC to CHW
        image = self.image_tensors[idx].permute(2, 1, 0)
        label = self.image_labels[idx].item()
        
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)

        return image, label

train_dataset = CIFAR10(x_train, y_train, transform=transforms)
val_dataset = CIFAR10(x_val, y_val)
test_dataset = CIFAR10(x_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=True)

train_image, train_label = next(iter(train_loader))
print(train_image.shape)
print(train_label.shape)


In [None]:
import matplotlib.pyplot as plt

train_image, train_label = next(iter(train_loader))
print(train_image.shape)
print(train_label.shape)

print(train_image[0].shape)
plt.imshow(train_image[0].type(torch.uint8).permute(2, 1, 0))
plt.axis('off')

In [None]:
import random
from itertools import combinations

def choose_classes(all_labels, ucc_range, generator="random"):

    classes = torch.unique(all_labels).tolist()
    chosen = [] 
    combination_min = 1 # for random generator only
    combination_max = 100 # for random generator only
    
    if generator == "random":
       for ucc in range(ucc_range[0], ucc_range[1] + 1):
            num_combinations = random.randint(combination_min, combination_max)
            for _ in range(num_combinations):
                chosen.append(tuple(random.sample(classes, ucc))) 

    elif generator == "combination":
        for ucc in range(ucc_range[0], ucc_range[1] + 1):
            chosen.extend(combinations(classes, ucc))

    random.shuffle(chosen)

    return chosen

def choose_instances(grouped_indices, classes, generator="even"):

    instances = []
    bag_size = 100

    if generator == "random":
        
        for current, class_index in enumerate(classes):
            # pick a random number of instances from that class
            remainder = bag_size - len(classes) + current + 1
            class_num_instances = random.randint(1, remainder)

            # pick random instances from that class
            for _ in range(class_num_instances):
                idx = random.randrange(0, len(grouped_indices[class_index]))
                instances.append(grouped_indices[class_index][idx])

            bag_size = bag_size - class_num_instances

        
    elif generator == "even":
        # evenly distribute instances across classes
        class_num_instances = [bag_size // len(classes)] * len(classes)
        # distribute the remainder
        remainder = bag_size % len(classes)
        for idx in range(remainder):
            class_num_instances[idx] += 1

        for i, class_index in enumerate(classes):
            for _ in range(class_num_instances[i]):
                idx = random.randrange(0, len(grouped_indices[class_index]))
                print(len(grouped_indices[class_index]), idx)
                instances.append(grouped_indices[class_index][idx])

    return instances
        
        

def bag_loader(dataset, labels, ucc_range, num_bags, bag_size, batch_size=16, shuffle=True):
    # group dataset by labels
    labels = torch.unique(labels)
    grouped_indices = []

    for label in labels:
        indices = []
        for idx, (_, y) in enumerate(dataset):
            if y == label: 
                indices.append(idx)
        grouped_indices.append(indices)

    bags = []
    
    # determine the combinations of classes to pick ahead of time
    picked = choose_classes(labels, ucc_range, generator="combination")
    
    for i in range(num_bags):

        classes = picked[i]
        indices = choose_instances(grouped_indices, classes, generator="random")
        subset = torch.utils.data.Subset(dataset, indices)
        bag = torch.utils.data.DataLoader(subset, batch_size=batch_size, shuffle=shuffle)
        bags.append(bag)

    return bags

bag_loader(val_dataset, y_train, (1,1), 10, 1, 7)

# Model
[x] Google Colab
[x] Autoencoder
    [x] Autoencoder
    [x] Decoder
    [x] ResNet
    [x] Wide ResNet


In [None]:
import numpy as np
import torch
import torch.nn as nn
from torchvision import datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

def data_loader(data_dir,
                batch_size,
                random_seed=42,
                valid_size=0.1,
                shuffle=True,
                test=False):

    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transforms
    transform = transforms.Compose([
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            normalize,
    ])

    if test:
        dataset = datasets.CIFAR10(
          root=data_dir, train=False,
          download=True, transform=transform,
        )

        data_loader = torch.utils.data.DataLoader(
            dataset, batch_size=batch_size, shuffle=shuffle
        )

        return data_loader

    # load the dataset
    train_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=transform,
    )

    valid_dataset = datasets.CIFAR10(
        root=data_dir, train=True,
        download=True, transform=transform,
    )

    num_train = len(train_dataset)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))

    if shuffle:
        np.random.seed(42)
        np.random.shuffle(indices)

    train_idx, valid_idx = indices[split:], indices[:split]
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=batch_size, sampler=train_sampler)

    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=batch_size, sampler=valid_sampler)

    return (train_loader, valid_loader)


# CIFAR10 dataset
train_loader, val_loader = data_loader(data_dir='./data',
                                         batch_size=64)

test_loader = data_loader(data_dir='./data',
                              batch_size=64,
                              test=True)


In [None]:
import torch.nn as nn

class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, residual_function = None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Sequential(
                        nn.Conv2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.Conv2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                        nn.BatchNorm2d(out_channels))
        self.residual_function = residual_function
        self.relu = nn.ReLU()
        self.out_channels = out_channels


    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.conv2(out)

        if self.residual_function:
            residual = self.residual_function(x)

        out += residual
        out = self.relu(out)

        return out

import torch.nn as nn

class Encoder(nn.Module):
    def __init__(self, block, layers, bottleneck_size = 256):
        super(Encoder, self).__init__()
        self.in_channels = 16
        self.conv1 = nn.Sequential(
                        nn.Conv2d(3, 16, kernel_size = 7, stride = 2, padding = 3),
                        nn.BatchNorm2d(16),
                        nn.ReLU())
        self.maxpool = nn.MaxPool2d(kernel_size = 3, stride = 2, padding = 1)
        self.layer0 = self._build_residual_layers(block, 16, layers[0], stride = 1)
        self.layer1 = self._build_residual_layers(block, 32, layers[1], stride = 2)
        self.layer2 = self._build_residual_layers(block, 64, layers[2], stride = 2)
        self.layer3 = self._build_residual_layers(block, 128, layers[3], stride = 2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.flatten = nn.Flatten()
        self.fc = nn.Linear(7*7*128, bottleneck_size)

    def _build_residual_layers(self, block, out_channels, num_blocks, stride=1):
                
        layers = []

        # downsample the first block of each residual layer except for the first residual layer
        if stride == 1:
            layers.append(block(self.in_channels, out_channels, 1, None))          
        else:
            downsample = nn.Sequential(
                nn.Conv2d(self.in_channels, out_channels, kernel_size=1, stride=stride),
                nn.BatchNorm2d(out_channels),
            )
            layers.append(block(self.in_channels, out_channels, stride, downsample))    
        
        self.in_channels = out_channels

        # the rest of the blocks in the residual layer don't downsample
        for i in range(1, num_blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)


    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)

        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.flatten(x)
        # x = self.avgpool(x)
        # x = x.view(x.size(0), -1)
        x = self.fc(x)


        return x

class ReverseResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride = 1, residual_function = None):
        super(ReverseResidualBlock, self).__init__()
        output_padding = 0 if stride == 1 else 1
        self.conv1 = nn.Sequential(
                        nn.ConvTranspose2d(in_channels, out_channels, kernel_size = 3, stride = stride, padding = 1, output_padding=output_padding),
                        nn.BatchNorm2d(out_channels),
                        nn.ReLU())
        self.conv2 = nn.Sequential(
                        nn.ConvTranspose2d(out_channels, out_channels, kernel_size = 3, stride = 1, padding = 1),
                        nn.BatchNorm2d(out_channels))
        self.residual_function = residual_function
        self.relu = nn.ReLU()
        self.out_channels = out_channels


    def forward(self, x):

        residual = x

        out = self.conv1(x)
        out = self.conv2(out)
        
        if self.residual_function:
            residual = self.residual_function(x)   

        out += residual
        out = self.relu(out)

        return out
    
class Decoder(nn.Module):
    def __init__(self, block, layers, bottleneck_size = 256):
        super(Decoder, self).__init__()
        self.in_channels = 128
        self.fc = nn.Linear(bottleneck_size, 7*7*128)
        # self.avgpool = nn.AvgPool2d(7, stride=1)

        self.layer0 = self._build_residual_layers(block, 64, layers[0], stride = 2)
        self.layer1 = self._build_residual_layers(block, 32, layers[1], stride = 2)
        self.layer2 = self._build_residual_layers(block, 16, layers[2], stride = 2)
        self.layer3 = self._build_residual_layers(block, 16, layers[3], stride = 1)

        # upsampling is computationally cheaper
        self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.conv1 = nn.Sequential(
                        nn.ConvTranspose2d(16, 3, kernel_size=6, stride=4, padding=1),
                        nn.BatchNorm2d(3),
                        nn.Sigmoid())

    def _build_residual_layers(self, block, out_channels, num_blocks, stride=1):
                
        layers = []

        # upsample the first block of each residual layer except for the last residual layer
        if stride == 1:
            layers.append(block(self.in_channels, out_channels, 1, None))          
        else:
            upsample = nn.Sequential(
                nn.ConvTranspose2d(self.in_channels, out_channels, kernel_size=1, stride=stride, output_padding=1),
                nn.BatchNorm2d(out_channels),
            )
            layers.append(block(self.in_channels, out_channels, stride, upsample))    
            # layers.append(nn.Upsample(scale_factor=2, mode='nearest'))
        
        self.in_channels = out_channels

        # the rest of the blocks in the residual layer don't upsample
        for i in range(1, num_blocks):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):

        x = self.fc(x)
        x = x.view(x.size(0), 128, 7, 7)
        
        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        
        x = self.conv1(x)

        return x

class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = Encoder(ResidualBlock, [1, 1, 1, 1])
        self.decoder = Decoder(ReverseResidualBlock, [1, 1, 1, 1])

    def forward(self, x):

        x = self.encoder(x)
        x = self.decoder(x)
                
        return x
    
num_epochs = 1
learning_rate = 0.001

model = Autoencoder()
model.to(device)

# criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
import matplotlib.pyplot as plt
import random

with torch.no_grad():
    test_image, test_label = next(iter(test_loader))
    images = test_image.to(device)

    idx = random.randint(0, len(test_image) - 1)
    outputs = model(images)
    # outputs = outputs.cpu()
    # result = model(test_image[idx].squeeze(0).to(device))
    # print(test_image[idx].permute(2, 1, 0))
    # print(outputs[idx].permute(2, 1, 0))
    
    fig, axs = plt.subplots(1, 2, figsize=(10, 5))
    axs[0].imshow(test_image[idx].permute(2, 1, 0))
    axs[0].axis('off')
    axs[1].imshow(outputs[idx].permute(2, 1, 0))
    axs[1].axis('off')

    plt.show()

    del test_image, test_label, outputs
    torch.cuda.empty_ca