In [9]:
from torch.utils.data import DataLoader
from torchvision import datasets
from imutils import paths
import numpy as np
import shutil
import os
import torch
import os

In [8]:
# Config file
# specify path to dataset
dataset_path = 'flowers'
base_path = 'dataset'

# define validation split and paths to seperate train and validation split
val_split = 0.1
train_path = os.path.join(base_path, 'train')
val_path = os.path.join(base_path, 'val')

# define imagenet mean and standard deviation and image size
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
image_size = 224

# determine device to be used for training and evaluation
device = 'cuda' if torch.cuda.is_available() else 'cpu'

#specify training hyperparameters
batch_size = 256
pred_batch_size = 4
epochs = 25
lr = 0.0001

# define paths to store training plot and trained model
plot = os.path.join('output', 'train_plot.png')
model = os.path.join('output', 'train_model.pth')

In [6]:
# build Dataset
def copy_images(image_paths, folder):
    # check to see if folder exists or not if not create one
    if not os.path.exists(folder):
        os.makedirs(folder)
        
    # loop over the image paths
    for path in image_paths:
        # take image name and image lable folder to make image name for destination folder
        image_name = path.split(os.path.sep)[-1]
        image_label = path.split(os.path.sep)[-2]
        label_folder = os.path.join(folder, image_label)
        
        # check to see if label folder exists or not if not create one
        if not os.path.exists(label_folder):
            os.makedirs(label_folder)
            
        # construct the destination image path and copy images
        destination = os.path.join(label_folder, image_name)
        shutil.copy(path, destination)
        
# load all the image paths and randomly shuffle them
print('[INFO] loading image paths...')
image_paths = list(paths.list_images(dataset_path))
np.random.shuffle(image_paths)

# generate training and validation  paths
val_path_len = int(len(image_paths) * val_split)
train_path_len = len(image_paths) - val_path_len
train_paths = image_paths[:train_path_len]
val_paths = image_paths[train_path_len:]

#copy training and validation images to their respective directories
print('[INFO] copying training and validation images...')
copy_images(train_paths, train_path)
copy_images(val_paths, val_path)

[INFO] loading image paths...
[INFO] copying training and validation images...


In [10]:
def get_data_loader(root_dir, transforms, batch_size, shuffle=True):
    # create a dataset and use it to create a dataloader
    ds = datasets.ImageFolder(root=root_dir, transform=transforms)
    loader = DataLoader(ds, batch_size=batch_size, shuffle=shuffle, num_workers = os.cpu_count(), 
                        pin_memory = True if device =='cuda' else False)
    
    return (ds, loader)