Si legge il dataset, si assegnano le classi e si guarda la distribuzione

In [91]:
import os 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import random
import json


In [92]:
def read_dataset(info_filename):
    # for each image in the folder, read the image and the corresponding label
    # return a list of images and a list of labels
    dataset = {}

    # read the info file json which contains the labels for each image
    # read the json file
    # Opening JSON file
    f = open(info_filename+'.json')
    structure = json.load(f)
    # Closing file
    f.close()
    # the structure is a dictionary with the keys: 'label','name' bring the labels and the names of the images
    for i in range(len(structure)):
        dataset[i] = (structure[i]['name'],structure[i]['label'])

    return dataset



In [93]:

path_train = './chaoyang-data/train'
path_test = './chaoyang-data/test'
verbose = False

# read the train and test dataset
train_dataset = read_dataset(path_train)
test_dataset = read_dataset(path_test)

print('Train dataset size: ', len(train_dataset))
print('Test dataset size: ', len(test_dataset))
if verbose:
# print the first 10 images and labels
    for i in range(10):
        print(train_dataset[i])

Train dataset size:  4021
Test dataset size:  2139


In [94]:
#show class distribution
labels = []
for i in range(len(train_dataset)):
    labels.append(train_dataset[i][1])
labels = np.array(labels)
unique, counts = np.unique(labels, return_counts=True)
print('Class distribution: ', dict(zip(unique, counts)))

Class distribution:  {0: 1111, 1: 842, 2: 1404, 3: 664}


Data augmentation using albumnatations

In [95]:
import albumentations as A
from albumentations.pytorch import ToTensorV2

# define the augmentation pipeline
def get_train_transforms():
    return A.Compose([
        A.Resize(256, 256),
        A.HorizontalFlip(p=0.5),
        A.VerticalFlip(p=0.5),
        #rotate the image by 90 180 or 270 degrees
        A.RandomRotate90(p=0.5),
        A.RandomResizedCrop(256, 256, scale=(0.5, 1.0), p=0.5),
        A.GridDistortion(p=0.5, num_steps=5, distort_limit=0.3),
        ToTensorV2(p=1.0),
    ], p=1.)

def get_valid_transforms():
    return A.Compose([
        A.Resize(256, 256),
        ToTensorV2(p=1.0),
    ], p=1.)

def augment(dataset):
    #for each distribution of the dataset, calculate the number of images that need to be added
    #to have a balanced dataset
    #return the augmented dataset
    augmented_dataset = {}
    labels = []
    for i in range(len(dataset)):
        labels.append(dataset[i][1])
    labels = np.array(labels)
    unique, counts = np.unique(labels, return_counts=True)
    class_distribution = dict(zip(unique, counts))
    max_count = max(class_distribution.values())
    # for each class, create a sub dataset
    sub_datasets = {}
    for i in range(len(unique)):
        sub_datasets[i] = {}
    for i in range(len(dataset)):
        sub_datasets[dataset[i][1]][i] = dataset[i]
    # for each sub dataset, calculate the number of images that need to be added
    # to have a balanced dataset
    for i in range(len(unique)):
        sub_datasets[i]['number_of_images_to_add'] = max_count - class_distribution[i]
    # for each sub dataset, add the images random images from the same class
    for i in range(len(unique)):
        for j in range(sub_datasets[i]['number_of_images_to_add']):
            random_index = random.choice(list(sub_datasets[i].keys()))
            #if the extrcted element is a numpy.int64, then extract another element
            while type(sub_datasets[i][random_index]) == np.int64:
                random_index = random.choice(list(sub_datasets[i].keys()))
            augmented_dataset[len(augmented_dataset)] = sub_datasets[i][random_index]
            
    # add the original dataset
    for i in range(len(dataset)):
        augmented_dataset[len(augmented_dataset)] = dataset[i]
    #remove rows if the type is numpy.int64
    augmented_dataset = {k: v for k, v in augmented_dataset.items() if type(k) != np.int64}

    

    return augmented_dataset

In [96]:
#show new class distribution
augmented_train_dataset = augment(train_dataset)

#show first 10 images
verbose = False
if verbose:
    for i in range(10):
        print(augmented_train_dataset[i])

labels = []
print('Augmented train dataset size: ', len(augmented_train_dataset))
for i in range(len(augmented_train_dataset)):
    #print(augmented_train_dataset[i])
    labels.append(augmented_train_dataset[i][1])
labels = np.array(labels)
unique, counts = np.unique(labels, return_counts=True)
print('Class distribution: ', dict(zip(unique, counts)))

        

Augmented train dataset size:  5616
Class distribution:  {0: 1404, 1: 1404, 2: 1404, 3: 1404}


In [97]:
#create the train and test dataloaders
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

class Dataset(Dataset):
    def __init__(self, dataset, transform=None):
        self.dataset = dataset
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join('./chaoyang-data/', self.dataset[idx][0])
        image = cv2.imread(img_name)
        #if the image is grayscale,repeat the image 3 times to have 3 channels
        if len(image.shape) == 2:
            image = np.repeat(image[:, :, np.newaxis], 3, axis=2)
        # convert the image to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        #convert the image to float and normalize the values between 0 and 1
        image = image.astype(np.float32) / 255.0
        # read the label
        label = self.dataset[idx][1]
        # apply the augmentation pipeline
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        # return the image and the label
        return image, label

In [98]:
#use the created functions to create the train and test dataloaders with the augmentation pipeline in a single function
def from_path_to_dataloader(path, batch_size, shuffle, need_augmentation):
    # read the dataset
    dataset = read_dataset(path)
    # augment the dataset
    if need_augmentation:
        dataset = augment(dataset)
    # create the dataloader
    dataloader = DataLoader(Dataset(dataset, transform=get_train_transforms()), batch_size=batch_size, shuffle=shuffle)
    # return dataloader
    return dataloader


In [99]:
#check dataloader
train_dataloader = from_path_to_dataloader(path_train, 32, True, True)
test_dataloader = from_path_to_dataloader(path_test, 32, False, False)

#check the dataloader
verbose = False
if verbose:
    for i, data in enumerate(train_dataloader):
        print(i, data)
        if i == 3:
            break




0 [tensor([[[[0.6824, 0.6716, 0.5678,  ..., 0.9216, 0.9249, 0.9249],
          [0.6379, 0.6155, 0.5391,  ..., 0.9216, 0.9229, 0.9229],
          [0.5882, 0.5800, 0.5854,  ..., 0.9216, 0.9219, 0.9219],
          ...,
          [0.5825, 0.4805, 0.4070,  ..., 0.9106, 0.9115, 0.9115],
          [0.5667, 0.4336, 0.3389,  ..., 0.9113, 0.9113, 0.9113],
          [0.5667, 0.4336, 0.3389,  ..., 0.9113, 0.9113, 0.9113]],

         [[0.4059, 0.3770, 0.2809,  ..., 0.9020, 0.9053, 0.9053],
          [0.3431, 0.3017, 0.2077,  ..., 0.9020, 0.9033, 0.9033],
          [0.2481, 0.2051, 0.1765,  ..., 0.9020, 0.9023, 0.9023],
          ...,
          [0.3099, 0.2606, 0.2211,  ..., 0.9027, 0.9037, 0.9037],
          [0.2954, 0.2166, 0.1659,  ..., 0.9012, 0.9012, 0.9012],
          [0.2954, 0.2166, 0.1659,  ..., 0.9012, 0.9012, 0.9012]],

         [[0.6265, 0.6118, 0.5044,  ..., 0.9176, 0.9210, 0.9210],
          [0.5775, 0.5359, 0.4205,  ..., 0.9176, 0.9190, 0.9190],
          [0.5055, 0.4373, 0.3628,  ...