In [4]:
import torch
from PIL import Image
import os
import numpy as np
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import v2
from torchvision.io import read_image, write_jpeg
from torchvision.utils import save_image
import pandas as pd
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

#from mpmath.identification import transforms

# Gaussian Noise

In [5]:
def gauss_noise_tensor(img):
    assert isinstance(img, torch.Tensor)
    dtype = img.dtype
    if not img.is_floating_point():
        img = img.to(torch.float32)
    
    sigma = 2
    
    out = img + sigma * torch.randn_like(img)
    
    if out.dtype != dtype:
        out = out.to(dtype)
        
    return out

# Compose Transformation

In [6]:
data_folder = "./data/original"
destination_folder = "./data/augmented"

IMAGE_ITERATIONS = 10

WIDTH_RATIO = 0.3
HEIGHT_RATIO = 0.3

labels_dict = {}

for filename in os.listdir(f"{data_folder}"):
    if filename == ".ipynb_checkpoints":
        continue
    img = read_image(f"{data_folder}/{filename}")
    filename_no_extension = filename.split(".")[0]

    for x in range(IMAGE_ITERATIONS):
        transform = v2.Compose([
                                v2.RandomCrop(size=400),
                                v2.Lambda(gauss_noise_tensor),
                                v2.RandomHorizontalFlip(p=0.5),
                                v2.RandomVerticalFlip(p=0.5),
                                v2.RandomErasing(p=0.5),
                                v2.RandomPerspective(p=0.5),
                                v2.RandomAffine(degrees=70, scale=[0.2, 2], translate=[0.2, 0.7], shear=5),
                                v2.ElasticTransform(alpha=90.0, sigma=9.0)
        ])                            
        out = transform(img)
        path = f'{destination_folder}/{filename_no_extension}_composed_{x}.jpg'
        write_jpeg(out, path)

        labels_dict[f"{filename_no_extension}_composed_{x}.jpg"] = {
            "HEALTHY": 1 if "HEALTHY" in filename else 0,
            "AKU": 1 if "AKU" in filename else 0
        }

# CutMix & MixUp

In [7]:
NUM_CLASSES = 2

class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label
    
prepoc = v2.Compose([v2.PILToTensor(), v2.RandomCrop(size=400), v2.ToDtype(torch.float32, scale=True)])
    
aku_dataset = CustomImageDataset('./labels.csv', f'{data_folder}', transform=prepoc)
dataloader = DataLoader(aku_dataset, batch_size=4, shuffle=True)

cutmix = v2.CutMix(num_classes=NUM_CLASSES)
mixup = v2.MixUp(num_classes=NUM_CLASSES)
#cutmix_or_mixup = v2.RandomChoice([cutmix, mixup])

x = 0
for images, labels in dataloader:
    cutimages, cutlabels = cutmix(images, labels)
    for i in range(cutimages.size(0)):
        filename = f"cutmix_{x}.jpg"
        save_image(cutimages[i, :, :, :], f'{destination_folder}/{filename}')
        
        labels_dict[filename] = {
            "HEALTHY": cutlabels[i][0].item(),
            "AKU": cutlabels[i][1].item()
        }
        
        x+=1

x = 0
for images, labels in dataloader:
    miximages, mixlabels = mixup(images, labels)
    for i in range(miximages.size(0)):
        filename = f"mixup_{x}.jpg"
        save_image(miximages[i, :, :, :], f'{destination_folder}/{filename}')

        labels_dict[filename] = {
            "HEALTHY": mixlabels[i][0].item(),
            "AKU": mixlabels[i][1].item()
        }
        
        x+=1

In [12]:
df = pd.DataFrame.from_dict(labels_dict, orient="index")
df.to_csv("./data/overall_labels.csv")