In [None]:
import os
source_dir = "../Dataset_Willys_2020/ORGINAL/"
target_dir = os.listdir(source_dir)
print(target_dir)

In [None]:
from PIL import Image
from glob import glob
for cat in target_dir:
    temp_path = os.path.join(source_dir,cat)
    cat_img = sorted(glob(os.path.join(temp_path,"*.jpg")))
    for img in cat_img:
        orig_img = Image.open(img)
        if orig_img.size != (198,198):
            print(img, '--', orig_img.size)
            if os.path.isfile(img):
                os.remove(img)
            else:
                print("Error %s file not found" %img)
            

from torchvision.datasets import ImageFolder
from torchvision.transforms import AutoAugmentPolicy, AutoAugment,RandomApply   

polices = [AutoAugmentPolicy.CIFAR10,AutoAugmentPolicy.IMAGENET, AutoAugmentPolicy.SVHN]
augments = [AutoAugment(policy) for policy in polices]

applier = RandomApply(transforms=augments, p=[0.2, 0.4, 0.4])
dataset = ImageFolder(root=source_dir, transform=applier)


In [None]:
#%%writefile load_data.py

from typing import Optional, Callable

from torchvision.transforms import AutoAugmentPolicy, AutoAugment, RandomApply, Compose, ToTensor
from torchvision.datasets import ImageFolder




def load_data_from_ImageFolder(root:str, transform:Optional[Callable]=None):
    """Takes path of the root directory for the images
    creates a dataset using pytorch ImageFolder class
    using AutoAugment for CIFAR,IMAGENET, SVHN with probability 
    p=[0.2, 0.4, 0.4] if transform is not provided.

    Args:
        root (str): path to the diroctroy where the images are separated into their classes by their folder
        transform (Optional): Applies transform for the images to increase the variaty.
    Return:
        ImageFolder dataset.
    
    """
    if transform is None:
        polices = [AutoAugmentPolicy.CIFAR10,
                   AutoAugmentPolicy.IMAGENET, AutoAugmentPolicy.SVHN]
        augments = [AutoAugment(policy) for policy in polices]
        applier = RandomApply(transforms=augments, p=0.7)
        composed = Compose([applier, ToTensor()])
    else:
        applier = transform
    dataset = ImageFolder(root=root, transform=composed)
    return dataset


In [None]:
dataset = load_data_from_ImageFolder(root=source_dir)
len(dataset)


In [None]:
valid_split = 0.15
batch_size = 1
n_jobs = 12
n_epochs = 50

In [None]:
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
import numpy as np

In [None]:

num_train = len(dataset)
indices = list(range(num_train))
split = int(valid_split*num_train)

valid_idx = np.random.choice(indices,size=split,replace=False)
train_idx = list(set(indices)-set(valid_idx))

train_sampler= SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = DataLoader(dataset, sampler=train_sampler, batch_size=batch_size,
num_workers=n_jobs,pin_memory=True)
valid_loader = DataLoader(dataset,sampler=valid_sampler, batch_size=batch_size
, num_workers=n_jobs,pin_memory = True)

In [None]:
sample = next(iter(train_loader))
type(sample)

In [None]:
sample[0].shape