In [2]:
import torch
import os
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import v2
from torchvision.io import read_image, write_jpeg
from torchvision.utils import save_image
import pandas as pd

In [3]:
if torch.cuda.is_available(): 
 torch.set_default_device('cuda')

In this notebook we apply different transformations to our original samples in order to get a much higher number of possible samples for later classification.

# Gaussian Noise

A function to apply Gaussian Noise to the images

In [4]:
def gauss_noise_tensor(img):
    assert isinstance(img, torch.Tensor)
    dtype = img.dtype
    if not img.is_floating_point():
        img = img.to(torch.float32)
    
    sigma = 2
    
    out = img + sigma * torch.randn_like(img)
    
    if out.dtype != dtype:
        out = out.to(dtype)
        
    return out

# Compose Transformation

We perform data augmentation on our original images in order to get more images overall. The operations performed are, in order:
- Random Cropping to a size of 400x400 (any higher would not be compatible with all images in the dataset)
- Apply Gaussian Noise
- An Horizontal Flip (50% probability)
- An Vertical Flip with a (50% probability)
- A random Erasing of parts of the image, from 2% up to 20% (50% probability)
- A random shift of Perspective of the image (50% probability)
- A random Affine transformation
- A random Elastic transformation

For each image we create 10 different augmented images, ending up with 11x our original sample size

In [5]:
data_folder = "./data/original"
destination_folder = "./data/augmented"

IMAGE_ITERATIONS = 10

WIDTH_RATIO = 0.3
HEIGHT_RATIO = 0.3

labels_dict = {}

for filename in os.listdir(f"{data_folder}"):
    if filename == ".ipynb_checkpoints":
        continue
    img = read_image(f"{data_folder}/{filename}")
    filename_no_extension = filename.split(".")[0]
    labels_dict[f"{filename}"] = {
            "HEALTHY": 1 if "HEALTHY" in filename else 0,
            "AKU": 1 if "AKU" in filename else 0
        }
    for x in range(IMAGE_ITERATIONS):
        transform = v2.Compose([
                                v2.RandomCrop(size=400),
                                v2.Lambda(gauss_noise_tensor),
                                v2.RandomHorizontalFlip(p=0.5),
                                v2.RandomVerticalFlip(p=0.5),
                                v2.RandomErasing(p=0.5, scale=(0.02, 0.2)),
                                v2.RandomPerspective(p=0.5, distortion_scale=0.3),
                                v2.RandomAffine(degrees=40, scale=[0.8, 1.2], translate=[0.2, 0.4], shear=5),
                                v2.ElasticTransform(alpha=90.0, sigma=9.0)
        ])                            
        out = transform(img)
        path = f'{destination_folder}/{filename_no_extension}_composed_{x}.jpg'
        write_jpeg(out, path)

        labels_dict[f"{filename_no_extension}_composed_{x}.jpg"] = {
            "HEALTHY": 1 if "HEALTHY" in filename else 0,
            "AKU": 1 if "AKU" in filename else 0
        }

  return func(*args, **kwargs)


# CutMix & MixUp

Here we create a custom Dataset class to use with Pytorch's DataLoader object. This object loads up the original images in batches of 4 and computes both a CutMix or MixUp on them.
- CutMix is an augmentation strategy that replaces part of an image with an identical sized part from another image
- MixUp is an augmentation strategy that blends both images into a single one, with a different level of transparency each time

We create a number of images equal to the number of images passed onto the DataLoader object. These augmented images' labels might be in the [0..1] range for both classes. The class with the higher value will be the one considered later on for classification purposes.

In [6]:
NUM_CLASSES = 2

class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
    
prepoc = v2.Compose([v2.PILToTensor(), v2.RandomCrop(size=400), v2.ToDtype(torch.float32, scale=True)])
BATCH_SIZE = 4

torch.set_default_device('cpu')
    
aku_dataset = CustomImageDataset('./labels.csv', f'{data_folder}', transform=prepoc)
dataloader = DataLoader(aku_dataset, batch_size=BATCH_SIZE, shuffle=True)

cutmix = v2.CutMix(num_classes=NUM_CLASSES)
mixup = v2.MixUp(num_classes=NUM_CLASSES)
#cutmix_or_mixup = v2.RandomChoice([cutmix, mixup])

x = 0
for images, labels in dataloader:
    cutimages, cutlabels = cutmix(images, labels)
    for i in range(cutimages.size(0)):
        filename = f"cutmix_{x+i}.jpg"
        save_image(cutimages[i, :, :, :], f'{destination_folder}/{filename}')    
        labels_dict[filename] = {
            "HEALTHY": cutlabels[i][0].item(),
            "AKU": cutlabels[i][1].item()
        }
    miximages, mixlabels = mixup(images, labels)
    for i in range(miximages.size(0)):
        filename = f"mixup_{x+i}.jpg"
        save_image(miximages[i, :, :, :], f'{destination_folder}/{filename}')
        labels_dict[filename] = {
            "HEALTHY": mixlabels[i][0].item(),
            "AKU": mixlabels[i][1].item()
        }
    x+= BATCH_SIZE

In [7]:
df = pd.DataFrame.from_dict(labels_dict, orient="index")
df.to_csv("./data/overall_labels.csv")