In [56]:
import torch

from tqdm import tqdm

import numpy as np
import matplotlib.pyplot as plt

from src.data.project1.dataloader import get_loaders
from src.utils import invertNormalization

## Get dataloaders

In [1]:
from tqdm import tqdm

import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder

from src.utils import set_seed

class HotdogDataset(Dataset):
    def __init__(self, subset, transform=None):
        self.subset = subset
        self.transform = transform
        
    def __getitem__(self, index):
        x, y = self.subset[index]
        if self.transform:
            x = self.transform(x)
        return x, y
        
    def __len__(self):
        return len(self.subset)
    
def get_normalization_constants(root: str, seed: int = 0):
    # Set seed for split control
    set_seed(seed)

    # Define transforms (only resize as we want to compute means...)
    normalization_transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])
    trainvalset     = ImageFolder(f'{root}/train', transform=normalization_transform)

    # Get size of validation split
    N_trainval  = trainvalset.__len__()
    N_val       = int(0.2 * N_trainval)
    
    # Get trainset
    trainset    = torch.utils.data.Subset(trainvalset, range(N_val, N_trainval))     

    # Compute means and standard deviations from training set
    train_mean = torch.stack([t.mean(1).mean(1) for t, c in tqdm(trainset, desc='Computing mean of training split...')]).mean(0)
    train_std  = torch.stack([t.std(1).std(1) for t, c in tqdm(trainset, desc='Computing std. dev. of training split...')]).std(0)
    print(f"\nMean: {train_mean}\nStd. dev.: {train_std}")    
    return train_mean, train_std

def get_loaders(root: str = '/dtu/datasets1/02514/hotdog_nohotdog', batch_size: int = 64, seed: int = 0, train_transforms=None, test_transforms=None) -> dict:

    # Set seed for split control
    set_seed(seed)

    # Load images as datasets
    trainvalset = ImageFolder(f'{root}/train') #, transform=train_transforms)
    testset     = ImageFolder(f'{root}/test', transform=test_transforms)

    # Get validation set size
    N_trainval  = trainvalset.__len__()                                       # total training points
    N_val       = int(0.2 * N_trainval)                                       # take ~20% for validation

    # Split trainval dataset into train- and valset
    val_subset      = torch.utils.data.Subset(trainvalset, range(N_val))         
    train_subset    = torch.utils.data.Subset(trainvalset, range(N_val, N_trainval))     

    valset = HotdogDataset(val_subset, transform=test_transforms)
    trainset = HotdogDataset(train_subset, transform=train_transforms)

    # Get dataloaders
    trainloader = DataLoader(trainset,  batch_size=batch_size, shuffle=True, num_workers=1)
    valloader   = DataLoader(valset,    batch_size=batch_size, shuffle=True, num_workers=1)
    testloader  = DataLoader(testset,   batch_size=batch_size, shuffle=False, num_workers=1)

    # Return loaders in dictionary
    return {'train': trainloader, 'validation': valloader, 'test': testloader}
    

In [2]:
ROOT = '/dtu/datasets1/02514/hotdog_nothotdog'
SEED = 0

# Get normalization constants
train_mean, train_std = get_normalization_constants(root=ROOT, seed=SEED)

# Define transforms for training
train_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),         # flips "left-right"
    # transforms.RandomVerticalFlip(p=1.0),           # flips "upside-down"
    transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
    transforms.RandomRotation(degrees=(60, 70)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=train_mean, 
        std=train_std, 
    )
])

# Define transforms for test and validation
test_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=train_mean, 
        std=train_std, 
    )
])

# Get data loaders with applied transformations
loaders = get_loaders(root=ROOT, batch_size=64, seed=SEED, train_transforms=train_transforms, test_transforms=test_transforms)

Computing mean of training split...: 100%|█| 1638/1638 [00:05<00:00, 319.54
Computing std. dev. of training split...: 100%|█| 1638/1638 [00:06<00:00, 2


Mean: tensor([0.5132, 0.4369, 0.3576])
Std. dev.: tensor([0.0214, 0.0208, 0.0223])





In [3]:
# Extract loaders
trainloader = loaders['train']
valloader   = loaders['validation']
testloader  = loaders['test']

## Visualize examples

In [7]:
def visualize_examples(batch, targets, title, save_path, train_mean = [0.5132, 0.4369, 0.3576], train_std = [0.0214, 0.0208, 0.0223]):

    # "Re-normalize" data
    inverseNormalization = invertNormalization(train_mean=train_mean, train_std=train_std)
    data                 = inverseNormalization(batch).clamp(0, 1)

    # Get idx2class mapping
    class2idx = trainloader.dataset.subset.dataset.class_to_idx
    idx2class = {v: k for k, v in class2idx.items()}

    # Plot examples from batch
    fig, axs = plt.subplots(3, 10, figsize=(18, 5))
    for i in tqdm(range(10), desc=title):
        axs[0, i].imshow(data[i].permute(1,2,0).numpy())
        axs[1, i].imshow(data[i+10].permute(1,2,0).numpy())
        axs[2, i].imshow(data[i+20].permute(1,2,0).numpy())

        axs[0, i].set_title(idx2class[targets[i].item()])
        axs[1, i].set_title(idx2class[targets[i+10].item()])
        axs[2, i].set_title(idx2class[targets[i+20].item()])

        axs[0, i].axis('off')
        axs[1, i].axis('off')
        axs[2, i].axis('off')

    fig.suptitle(title)
    plt.tight_layout()
    plt.savefig(f"{save_path}/{title}.png")
    # plt.show()
    plt.close()

In [8]:
# Get example training batch
batch, targets = next(iter(trainloader)) 
visualize_examples(batch, targets, "Training set", save_path="/zhome/b8/5/147299/Desktop")

# Get example validation batch
batch, targets = next(iter(valloader)) 
visualize_examples(batch, targets, "Validation set", save_path="/zhome/b8/5/147299/Desktop")

# Get example test batch
batch, targets = next(iter(testloader)) 
visualize_examples(batch, targets, "Test set", save_path="/zhome/b8/5/147299/Desktop")

Training set: 100%|███████████████████████| 10/10 [00:00<00:00, 200.39it/s]
Validation set: 100%|█████████████████████| 10/10 [00:00<00:00, 209.55it/s]
Test set: 100%|███████████████████████████| 10/10 [00:00<00:00, 198.30it/s]
