In [35]:
import os
import torch
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from PIL import Image
import random
import shutil

source_dir = r'/run/media/magnusjsc/T7/Classification-and-3D-reconstruction-of-archaeological-artifacts_DATA/Splitted data by era - GAN project/test_iron'

In [61]:
# Bootstrap data - Random select images and copy them
image_size = 128
batch_size = 128
workers = (4 * 1)  # Multiply with the number of GPUs available

target_dir_boot = os.path.abspath(os.path.join(source_dir, "..", "bootstrap_iron"))
os.makedirs(target_dir_boot, exist_ok = True)

# Create a subdirectory within target_dir_boot to store the bootstrapped images
bootstrap_subdir = os.path.join(target_dir_boot, "bootstrapped_images")
os.makedirs(bootstrap_subdir, exist_ok = True)

transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
])

dataset = ImageFolder(root = source_dir, transform = transform)
# Image indices
indices = list(range(len(dataset)))
num_images_original = len(dataset)

print("Number of images in the original dataset:", num_images_original)

# n times the size of the original dataset
bootstrap_ratio = 4.0
num_bootstrap_images = int(bootstrap_ratio * num_images_original)

print("Intended number of bootstrap images:", num_bootstrap_images)

# Select a random sequence of images to bootstrap - With replacement
bootstrap_indices = random.choices(indices, k = num_bootstrap_images)
print("Bootstrap indicies: ", len(bootstrap_indices))

i = 0
for index in bootstrap_indices:
    image_path, _ = dataset.samples[index]
    image_name = str(i) + "_" + os.path.basename(image_path)
    target_image_path = os.path.join(bootstrap_subdir, image_name)  # Save in the subdirectory
    shutil.copy(image_path, target_image_path)
    i += 1

num_copied_images = len(os.listdir(bootstrap_subdir))
print("Number of images copied:", num_copied_images)
print("Done bootstrapping the images! :D")

Number of images in the original dataset: 1779
Intended number of bootstrap images: 7116
Bootstrap indicies:  7116
Number of images copied: 7116
Done bootstrapping the images! :D


In [66]:
# Augment data - Augment the data and add them 
target_dir_aug = os.path.abspath(os.path.join(source_dir, "..", "augmented_iron"))
os.makedirs(target_dir_aug, exist_ok=True)

# Create a subdirectory within target_dir_aug to store the augmented images
augmented_subdir = os.path.join(target_dir_aug, "augmented_images")
os.makedirs(augmented_subdir, exist_ok = True)

augment_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(90), # 90 degree rotation
    transforms.RandomAffine(
        degrees = 0, 
        scale = (0.8, 1.5), # 0.8 is a bit zoom out, 1.2 is a bit zoom in, and 1 is original
        translate = None,
        shear = None,
        center = None, 
        fill = 0, 
        # interpolation=InterpolationMode.NEAREST
    ), 
    transforms.ToTensor()
])

original_transform = transforms.Compose([
    transforms.Resize((image_size, image_size)), 
    transforms.ToTensor(),  
])

# Load dataset
dataset = ImageFolder(root = source_dir, transform = original_transform)

def save_augmented_images(images, labels, batch_index):
    for i in range(len(images)):
        # Convert tensor to PIL image
        image = transforms.ToPILImage()(images[i])
        image_name = f"augmented_iron_{labels[i]}_{batch_index * batch_size + i}.jpg"
        image_path = os.path.join(augmented_subdir, image_name)  # Save in the subdirectory
        image.save(image_path)

# The coins in the iron folder will get the label 0 by default since it's the first folder 
augmented_data_loader = DataLoader(dataset, batch_size, shuffle = True, num_workers = workers)

for batch_index, (images, labels) in enumerate(augmented_data_loader): 
    # Convert tensors to PIL images
    pil_images = [transforms.ToPILImage()(image) for image in images]
    # Apply augmentation
    augmented_images = [augment_transform(image) for image in pil_images]
    augmented_images = torch.stack(augmented_images)
    # Save the augmented images 
    save_augmented_images(augmented_images, labels, batch_index)

    # Stop after first run - Debug
    # break 

print("Done augmenting the images! :D")


Done augmenting the images! :D


In [67]:
# Augment the bootstraped images
source_dir = r'/run/media/magnusjsc/T7/Classification-and-3D-reconstruction-of-archaeological-artifacts_DATA/Splitted data by era - GAN project/test_iron'
target_dir_aug_boot = os.path.abspath(os.path.join(source_dir, "..", "augmented_boot_iron"))
os.makedirs(target_dir_aug_boot, exist_ok = True)

dataset_aug_boot = ImageFolder(root = target_dir_boot, transform = original_transform)

bootstrap_data_loader = DataLoader(
    dataset_aug_boot, 
    batch_size = batch_size, 
    shuffle = True, 
    num_workers = workers
)

def save_bootstrapped_augmented_images(images, labels, index):
    for i in range(len(images)):
        image = transforms.ToPILImage()(images[i])
        image_name = f"bootstrap_augmented_iron_{labels[i]}_{index * batch_size + i}.jpg"
        image_path = os.path.join(target_dir_aug_boot, image_name)
        image.save(image_path)


for index, (images, labels) in enumerate(bootstrap_data_loader):
    pil_images = [transforms.ToPILImage()(image) for image in images]
    # Apply augmentation
    augmented_images = [augment_transform(image) for image in pil_images]
    # Convert augmented images back to tensors and save
    augmented_images = torch.stack(augmented_images)
    save_bootstrapped_augmented_images(augmented_images, labels, index)

    # Stop after first run - Debug
    # break


print("Done augmenting the bootstrapped images! :D")

Done augmenting the bootstrapped images! :D
