In [None]:
import torch
torch.manual_seed(17)
import random

from torchvision import transforms, datasets
from torchvision.utils import save_image
from sklearn.model_selection import train_test_split
import os

import matplotlib.pyplot as plt

In [None]:
from config import Config

config = Config()

In [143]:
def imshow(inp, title=None):
    """Imshow for Tensors."""
    inp = inp.numpy().transpose((1, 2, 0))
    plt.figure(figsize=(15, 15))
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)

In [194]:
os.rename('data/train', 'data/train_transforms_1')
os.rename('data/val', 'data/val_transforms_1')

In [207]:
import shutil
shutil.rmtree(r'data/train')
shutil.rmtree(r'data/val')

In [208]:
os.mkdir(r'data/train')
os.mkdir(r'data/val')

In [209]:
# Save train set and validation set in different folders
for label in train:
    path_to_rome = os.path.join(r'data/train', label_to_rome[label])
    os.mkdir(path_to_rome)
    for i, img in enumerate(train[label]):
        save_image(img, os.path.join(path_to_rome, f'{i}.png'))

for label in val:
    path_to_rome = os.path.join(r'data/val', label_to_rome[label])
    os.mkdir(path_to_rome)
    for i, img in enumerate(val[label]):
        save_image(img, os.path.join(path_to_rome, f'{i}.png'))

# Transformations and Augmentations
Save 4 transformed versions for each image in the training set (only 4, so we stay below a total of 10,000 images as requested)

In [153]:
transformations = torch.nn.Sequential(
    transforms.RandomRotation(30, fill=1),
    transforms.GaussianBlur(random.randrange(1, 10, 2)),
    transforms.RandomResizedCrop(64, scale=(0.6, 1))
)

In [210]:
transformations = torch.nn.Sequential(
    transforms.RandomRotation(15, fill=1),
    transforms.GaussianBlur(random.randrange(1, 10, 2)),
    transforms.RandomResizedCrop(64, scale=(0.9, 1))
)

In [211]:
# Save transformed images
for label in train:
    path_to_rome = os.path.join(r'data/train', label_to_rome[label])
    for i, img in enumerate(train[label]):
        for j in range(4):
            save_image(transformations(img), os.path.join(path_to_rome, f'{i}_t{j}.png'))

# Check data

In [212]:
# Original data
train_dir = os.path.join("data", "train")
val_dir = os.path.join("data", "val")

In [213]:
# Resize the samples and transform them into tensors
data_transforms = transforms.Compose([transforms.Resize([64, 64]), transforms.ToTensor()])

# Create a pytorch dataset from a directory of images
train_dataset = datasets.ImageFolder(train_dir, data_transforms)
val_dataset = datasets.ImageFolder(val_dir, data_transforms)

In [214]:
train_dataset

Dataset ImageFolder
    Number of datapoints: 8290
    Root location: data/train
    StandardTransform
Transform: Compose(
               Resize(size=[64, 64], interpolation=bilinear, max_size=None, antialias=None)
               ToTensor()
           )

In [215]:
val_dataset

Dataset ImageFolder
    Number of datapoints: 419
    Root location: data/val
    StandardTransform
Transform: Compose(
               Resize(size=[64, 64], interpolation=bilinear, max_size=None, antialias=None)
               ToTensor()
           )