# Import libraries

In [3]:
import os
from tqdm import tqdm as tqdm
import torchvision.datasets as datasets 
from torch.utils.data import WeightedRandomSampler, DataLoader              # IMPORT WeightedRandomSampler TO DEAL WITH INBALANCE CLASSES.
                                                                            # THIS WILL MAKE OUR DATA INPUT PIPELINE MORE GENERAL, WHICH SUITES FOR ANY CASE.
                                                                            # IF WE HAVE MORE EXAMPLE IMAGES FOR A SPECIFIC BREED, THEN IT WOULD BE HANDLED BY THIS DATA PIPELINE.
                                                                            # IN OUR CASE DIFFERENT DOG BREEDS HAVE DIFFERENT NUMBER OF EXAMPLE IMAGES. SO THIS IS NEEDED.
                                                                            # IF DATASET IS PERFECTLY EVEN, THEN THIS WOULD BE AN UNNECESSARY THING TO DO.
import torchvision.transforms as transforms

# Preprocess images(Resizing, Cropping, Flipping(both vertical and horizontal))
### Create a weightRandomSampler to deal with imbalanced classes. 
### Load train data and validation data into the model

In [4]:
def get_loaders(train_dir, dev_dir, batch_size, image_size):
    print("getting loaders")
    train_transforms = transforms.Compose([
        transforms.Resize(( 300,300 )),
        transforms.RandomCrop(( image_size, image_size )),
        transforms.RandomHorizontalFlip( p = 0.5 ),
        transforms.RandomVerticalFlip( p = 0.05 ),
        transforms.ToTensor(),
        ])

    dev_transforms = transforms.Compose([
        transforms.Resize(( image_size, image_size )),
        transforms.ToTensor(),
        ])

    train_dataset = datasets.ImageFolder(root = train_dir, transform = train_transforms)                  # CREATING THE TRAIN DATASET.
       
    dev_dataset = datasets.ImageFolder(root = dev_dir,  transform = train_transforms)                     # CREATE THE DEV DATASET.

    val_loader = DataLoader( dev_dataset, batch_size = batch_size, num_workers = 2, pin_memory = True)    # CREATE THE VALIDATION LOADER.

    class_weights = []                                                      # WeightedRandomSampler FOR DEALING WITH IMBALANCED CLASSES.
    for root, subdir, files in os.walk(train_dir):                          # CREATING class_weights BY GOING THROUGH AND CHECKING EXACTLY HOW MANY FILES DO WE HAVE  OF EACH DOG BREED.
        if len(files) > 0:                                                  # CHECK FILES ARE EXISTING IN THE DIRECTORY.
            class_weights.append(1/len(files))                              # IF THERE ARE MORE EXAMPLES ON THAT PARTICULAR CLASS, THE WEIGHT IS GOING TO BE LOWER BECAUSE 1 IS DIVIDING BY LARGER NUMBER.

    sample_weights =  [0] * len(train_dataset)

    for idx, ( data, label ) in enumerate( tqdm( train_dataset.imgs )):     # WE NEED TO GO THROUGH ALL OF OUR EXAMPLES IN THE TRAINING DATASET.
        class_weight = class_weights[label]
        sample_weights[idx] = class_weight

    sampler = WeightedRandomSampler( sample_weights, num_samples = len(sample_weights), replacement = True )         
                                                    
    # SPECYFING num_workers AND pin_memory FOR A LITTLE BIT OF EFFICIENCY IN DATA LOADING.
    train_loader = DataLoader( train_dataset, batch_size = batch_size, sampler = sampler, num_workers = 2, pin_memory = True )
                                         
    
    return train_loader, val_loader