In [14]:
# Importing Libraries
import os
import random
from PIL import Image
from torchvision import transforms

In [15]:
''' A Custom Dataset class to load data.
    It returns a list of tuple elements, in format (img_array, label).
    The output data is in tensor format. 
'''

class DataSet():
    def __init__(self, data_dir):
        self.data_dir     = data_dir
        self.list_classes = os.listdir(data_dir)
        self.transform    = transforms.Compose([transforms.ToTensor()])
        self.images       = self.load_images()

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image, label = self.images[idx]
        return image, label
    
    def load_images(self):
        data_set = []
        self.list_classes = os.listdir(self.data_dir)
        for i in self.list_classes:
            sub_dir   = os.path.join(self.data_dir, i)
            sub_class = os.listdir(sub_dir)
            for img in sub_class:
                img_path  = os.path.join(sub_dir, img)
                img_array = self.transform(Image.open(img_path))
                data_set.append((img_array, i))
        return data_set

In [17]:
# Extracting data using the dataset class

data_set_train      = DataSet('train') # path to the folder
data_set_test       = DataSet('test')
data_set_validation = DataSet('validation')

print('Length of dataset:',len(data_set_train))
print('Length of dataset:',len(data_set_test))
print('Length of dataset:',len(data_set_validation))
print('Data at index:',data_set_train[3])


Length of dataset: 1034
Length of dataset: 128
Length of dataset: 133
Data at index: (tensor([[[0.3373, 0.2980, 0.3020,  ..., 0.2353, 0.2235, 0.2353],
         [0.3020, 0.2824, 0.2706,  ..., 0.2314, 0.2353, 0.2353],
         [0.2863, 0.2627, 0.2510,  ..., 0.2392, 0.2510, 0.2588],
         ...,
         [0.3529, 0.3804, 0.3882,  ..., 0.2157, 0.1843, 0.2039],
         [0.3961, 0.4235, 0.4314,  ..., 0.1961, 0.1765, 0.1843],
         [0.4549, 0.4784, 0.4706,  ..., 0.1765, 0.1647, 0.1647]],

        [[0.3412, 0.3020, 0.3059,  ..., 0.6784, 0.6784, 0.6902],
         [0.3059, 0.2863, 0.2745,  ..., 0.6745, 0.6784, 0.6902],
         [0.2863, 0.2627, 0.2510,  ..., 0.6824, 0.6941, 0.7020],
         ...,
         [0.3255, 0.3529, 0.3608,  ..., 0.6078, 0.5725, 0.5922],
         [0.3725, 0.4039, 0.4118,  ..., 0.5882, 0.5647, 0.5725],
         [0.4314, 0.4588, 0.4510,  ..., 0.5686, 0.5529, 0.5529]],

        [[0.3176, 0.2784, 0.2824,  ..., 0.1529, 0.1569, 0.1686],
         [0.2863, 0.2667, 0.2549,  ..

In [66]:
''' A custom DataLoader class to load data into batches
    Its features are batch sizes and shuffle mode
    It can iterate over the batch using loop
'''

class DataLoader():
    def __init__(self, dataset, batch_size, shuffle=True):
        self.batch_size = batch_size
        self.dataset    = dataset
        self.shuffle    = shuffle
    
    def __iter__(self):  # Magic Function to iterate using loop
        self.step = 0 
        return self
    
    def __next__(self): # Magic Function to call the next batch 
        if self.step >= len(self.dataset): # It is used to stop the iteration
                raise StopIteration        # of loop when limit reached   
                         
        batch_out  = []
        label_out  = []
        sample_out = []

        if self.shuffle:
            random.shuffle(self.dataset)

        for i in range(self.batch_size):
            if self.step >= len(self.dataset):
                break
            sample, label = self.dataset[self.step]
            sample_out.append(sample)
            label_out.append(label)
            self.step += 1
        batch_out.append(tuple(sample_out))
        batch_out.append(tuple(label_out))
        return batch_out

In [67]:
train_data      = DataLoader(data_set_train, batch_size=3, shuffle=False)
test_data       = DataLoader(data_set_test, batch_size=3, shuffle=False)
validation_data = DataLoader(data_set_validation, batch_size=3, shuffle=False)

next(train_data)

for i in train_data:
    print(i)
 

[(tensor([[[0.2196, 0.2196, 0.2196,  ..., 0.8000, 0.2588, 0.2118],
         [0.2118, 0.2157, 0.2157,  ..., 0.6706, 0.3843, 0.2078],
         [0.1961, 0.2000, 0.2039,  ..., 0.6706, 0.4039, 0.3333],
         ...,
         [0.2196, 0.1882, 0.2275,  ..., 0.5804, 0.6353, 0.5529],
         [0.1961, 0.1922, 0.2706,  ..., 0.6196, 0.5843, 0.6157],
         [0.2275, 0.2118, 0.1882,  ..., 0.5569, 0.6196, 0.5961]],

        [[0.1490, 0.1490, 0.1490,  ..., 0.6118, 0.0706, 0.0235],
         [0.1412, 0.1451, 0.1451,  ..., 0.4941, 0.2039, 0.0275],
         [0.1333, 0.1373, 0.1412,  ..., 0.5176, 0.2431, 0.1843],
         ...,
         [0.1020, 0.0706, 0.1098,  ..., 0.7020, 0.7569, 0.6745],
         [0.0784, 0.0745, 0.1529,  ..., 0.7255, 0.6824, 0.7137],
         [0.1098, 0.0941, 0.0706,  ..., 0.6510, 0.7059, 0.6824]],

        [[0.0078, 0.0078, 0.0078,  ..., 0.5255, 0.0157, 0.0000],
         [0.0000, 0.0039, 0.0039,  ..., 0.4039, 0.1412, 0.0000],
         [0.0000, 0.0039, 0.0078,  ..., 0.4039, 0.1569, 