### build in PyTorch then
- preparing data: resize, 
- custom dataset to apply augmentation
- Dataloader to apply train in batch, shuffle datasets

# Offline Data Augmentation

In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision import datasets
from torch.utils.data import DataLoader

import numpy as np
from tqdm import tqdm

import os
from PIL import Image

import matplotlib.pyplot as plt 

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device being used:", device)

if torch.cuda.is_available():
    print("CUDA is available!")
    print("GPU Device Name:", torch.cuda.get_device_name(0))
    print("Total GPU Count:", torch.cuda.device_count())
else:
    print("CUDA is not available, using CPU")

Device being used: cuda
CUDA is available!
GPU Device Name: NVIDIA GeForce RTX 3050 Laptop GPU
Total GPU Count: 1


In [None]:
aug_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),  # Horizontal flip
    transforms.RandomVerticalFlip(p=0.3),    # Vertical flip
    transforms.RandomRotation(15),           # Rotate between -15° to 15°
    # transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),  # Slight translation, no rotation here
    # transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.05),  # Mild color adjustments
    transforms.GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 1.0))  # Light blur
])

source_folder = "../data/train"
labels = ["NORMAL", "PNEUMONIA"]

LIMIT = 6
for l in labels:
    print(f"Augmenting {l} images...")
    image_folder = os.path.join(source_folder, l)
    images = [img for img in os.listdir(image_folder) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]
    print(images)
    for img_path in images:
        print(img_path)
        image = Image.open(os.path.join(source_folder, l, img_path)).convert("RGB")
        base_filename, ext = os.path.splitext(img_path)
        for i in range(LIMIT):
            augmented_img = aug_transform(image)
            augmented_img.save(f"{image_folder}/{base_filename}_{i}{ext}")
            print(f"Saved {image_folder}/{base_filename}_{i}{ext}")


Augmenting NORMAL images...
['IM-0128-0001.jpeg', 'IM-0151-0001.jpeg', 'IM-0158-0001.jpeg', 'IM-0162-0001.jpeg', 'IM-0164-0001.jpeg', 'IM-0166-0001.jpeg', 'IM-0170-0001.jpeg', 'IM-0189-0001.jpeg', 'IM-0201-0001.jpeg', 'IM-0210-0001.jpeg', 'IM-0223-0001.jpeg', 'IM-0225-0001.jpeg', 'IM-0262-0001.jpeg', 'IM-0273-0001.jpeg', 'IM-0283-0001.jpeg', 'IM-0291-0001.jpeg', 'IM-0298-0001.jpeg', 'IM-0313-0001.jpeg', 'IM-0316-0001.jpeg', 'IM-0325-0001.jpeg', 'IM-0350-0001.jpeg', 'IM-0355-0001.jpeg', 'IM-0369-0001.jpeg', 'IM-0389-0001.jpeg', 'IM-0393-0001.jpeg', 'IM-0394-0001.jpeg', 'IM-0404-0001.jpeg', 'IM-0409-0001.jpeg', 'IM-0437-0001-0002.jpeg', 'IM-0453-0001.jpeg', 'IM-0464-0001.jpeg', 'IM-0474-0001.jpeg', 'IM-0487-0001.jpeg', 'IM-0497-0001-0001.jpeg', 'IM-0505-0001-0002.jpeg', 'IM-0507-0001.jpeg', 'IM-0516-0001.jpeg', 'IM-0517-0001-0001.jpeg', 'IM-0517-0001.jpeg', 'IM-0523-0001-0001.jpeg', 'IM-0523-0001.jpeg', 'IM-0536-0001.jpeg', 'IM-0551-0001.jpeg', 'IM-0562-0001.jpeg', 'IM-0584-0001.jpeg', '

In [None]:
import pandas as pd
df = data_info(path, folders, labels)
print(df)

           train  val  test
NORMAL       720   60    20
PNEUMONIA    720   60    20
