In [None]:
# useful libraries
import os
import glob
import cv2
import numpy as np
import torch
from tqdm import tqdm
import pandas as pd
import random

In [None]:
# check GPU
gpu = torch.cuda.is_available()
device = torch.device("cuda:0" if gpu else "cpu")
print("Available",device)

In [None]:
#import data from google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# create directories
train_dir = '/content/drive/MyDrive/SUPERVISED_PROJECT/train_set.zip'
val_dir = '/content/drive/MyDrive/SUPERVISED_PROJECT/val_set.zip' # test directory

In [None]:
# new directories
proc_train_dir = '/content/drive/MyDrive/SUPERVISED_PROJECT/processedData/processed_train_set'
proc_test_dir = '/content/drive/MyDrive/SUPERVISED_PROJECT/processedData/processed_test_set'
proc_val_dir = '/content/drive/MyDrive/SUPERVISED_PROJECT/processedData/processed_val_set'

In [None]:
os.makedirs(proc_train_dir, exist_ok=True)
os.makedirs(proc_test_dir, exist_ok=True)
os.makedirs(proc_val_dir, exist_ok=True)

In [None]:
# load the true labels from train_labels and val_labels
trainLabels = pd.read_csv('/content/drive/MyDrive/SUPERVISED_PROJECT/train_labels.csv')
testLabels = pd.read_csv('/content/drive/MyDrive/SUPERVISED_PROJECT/val_labels.csv')

trainClasses = trainLabels['label'].unique()
testClasses = testLabels['label'].unique()

In [None]:
# check the number of classes
print(len(trainClasses))
print(len(testClasses))

In [None]:
# sort the labels
trainClasses = np.sort(trainClasses)
testClasses = np.sort(testClasses)

In [None]:
# create a folder for each label for train, test and validation set

for i in trainClasses:
    os.makedirs(os.path.join(proc_train_dir, str(i)), exist_ok=True)

for j in testClasses:
    os.makedirs(os.path.join(proc_test_dir, str(j)), exist_ok=True)

for k in trainClasses:
    os.makedirs(os.path.join(proc_val_dir, str(k)), exist_ok=True)

In [None]:
# move every image to the associated class folder
def move_images(input_dir, output_dir, type):
    image_path = os.path.join(input_dir, "*.jpg")
    image_paths = glob.glob(image_path)

    for i in image_paths:
        if type == "train":
            imageClass = trainLabels[trainLabels['img_name'] == os.path.basename(i)]['label'].values[0]
        else:
            imageClass = testLabels[testLabels['img_name'] == os.path.basename(i)]['label'].values[0]

        base_filename = os.path.basename(i)
        proc_image_path = os.path.join(output_dir, str(imageClass), base_filename)

        os.rename(i, proc_image_path)

In [None]:
# Validation set creation (extract from the training set)
validation_set = []
for trainClass in trainClasses:
  class_set = []
  matching_rows = trainLabels[trainLabels['label'] == trainClass]
  for i in range(0,matching_rows.shape[0]):
   image = matching_rows['img_name'].values[i]
   image_path = os.path.join(train_dir, image)
   class_set.append(image_path)
  rand_choice = np.random.choice(class_set,int(len(class_set)*0.20), replace = False)
  validation_set.extend(rand_choice)

In [None]:
print(validation_set)

In [None]:
validation_set

In [None]:
# moving the images of the validation set to the corresponding class folders
classes = []
for image_path in validation_set:
  matching_rows = trainLabels[trainLabels['img_name'] == os.path.basename(image_path)]
  imageClass = matching_rows['label'].values[0]
  classes.append(imageClass)
  base_filename = os.path.basename(image_path)
  proc_image_path = os.path.join(proc_val_dir, str(imageClass), base_filename)
  # os.rename(image_path, proc_image_path)

In [None]:
# check if the validation set has been correctly constructed
#classes = np.array(classes)
for i in trainClasses:
  print(np.sum(classes == i))
  print(int(len(trainLabels[trainLabels['label'] == i])*0.20))

In [None]:
# verify if each class contains from 100 to 600 images and print the cardinalities of each class in the training and validation set
low_cardinality = []
train_cardinality = []
val_cardinality = []
for i in trainClasses:
  # print(len(trainLabels[trainLabels['label'] == i]))
  train_cardinality.append(int(len(trainLabels[trainLabels['label'] == i])*0.80)) # register cardinality of the class after the split of training and validation (training set)
  val_cardinality.append(int(len(trainLabels[trainLabels['label'] == i])*0.20)) # register cardinality of the class after the split of training and validation (validation set)
  if len(trainLabels[trainLabels['label'] == i]) < 300:
    low_cardinality.append(i)
print("classes with low total cardinality: ")
print(low_cardinality)
print("cardinality of all classes in the training set: ")
print(train_cardinality)
print("cardinality of all classes in the validation set: ")
print(val_cardinality)

train_cardinality = np.array(train_cardinality)
val_cardinality = np.array(val_cardinality)

# visualize the cardinality of top 4 low cardinality classes

print(train_cardinality[83])
print(train_cardinality[162])
print(train_cardinality[164])
print(train_cardinality[217])

print(val_cardinality[83])
print(val_cardinality[162])
print(val_cardinality[164])
print(val_cardinality[217])

In [None]:
# DATA AUGMENTATION
# We will here augment classes which have less than 300 images but more than 100 images (3 classes)
def augmentation(input_dir, output_dir, imageClass, type, cardinality):
    image_path = os.path.join(input_dir, str(imageClass), "*.jpg")
    image_paths = glob.glob(image_path)
    cardinality = cardinality[imageClass] # starting point for the cardinality

    if type == "train":
      minimum = 350*0.80
    else:
      minimum = 350*0.20


    for i in image_paths:
        if cardinality < minimum:
          image = cv2.imread(i)
          image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
          base_filename = os.path.basename(i)

          # rotation
          rows, columns, _ = image.shape
          alpha = random.randint(0,180)
          M = cv2.getRotationMatrix2D((columns / 2, rows / 2), alpha, 1)
          rotated_image = cv2.warpAffine(image, M, (columns, rows))
          rotated_image_path = os.path.join(output_dir, str(imageClass), 'rotated_' + base_filename)
          cv2.imwrite(rotated_image_path, cv2.cvtColor(rotated_image, cv2.COLOR_RGB2BGR)) # save the image

          # flipping
          possible_values = [0, 1, -1]
          f = random.choice(possible_values)
          flipped_image = cv2.flip(image, f)
          flipped_image_path = os.path.join(output_dir, str(imageClass), 'flipped_' + base_filename)
          cv2.imwrite(flipped_image_path, cv2.cvtColor(flipped_image, cv2.COLOR_RGB2BGR))

          # decreasing brightness
          b = random.randint(60, 70)
          brightness = np.ones(image.shape, dtype = "uint8")*b
          decreased_image = cv2.subtract(image,brightness)
          decreased_image_path = os.path.join(output_dir, str(imageClass), 'decreasedbrightness_' + base_filename)
          cv2.imwrite(decreased_image_path, cv2.cvtColor(decreased_image, cv2.COLOR_RGB2BGR))

          cardinality = cardinality + 3


# Augment training set for class 164, 83 and 213
for j in trainClasses:
    matching_rows = trainLabels[trainLabels['label'] == j]
    if 100 < len(matching_rows) < 300:
      augmentation(proc_train_dir, proc_train_dir, j, "train",train_cardinality)


# Augment validation set for class 164, 83 and 213
for k in trainClasses:
    matching_rows = trainLabels[trainLabels['label'] == k]
    if 100 < len(matching_rows) < 300:
        augmentation(proc_val_dir, proc_val_dir, k, "val", val_cardinality)


In [None]:
# PARTICULAR DATA AUGMENTATION FOR CLASS 162 (less than 100 images)
def augmentation162(input_dir, output_dir, imageClass, type, cardinality):
    image_path = os.path.join(input_dir, str(imageClass), "*.jpg")
    image_paths = glob.glob(image_path)
    cardinality = cardinality[imageClass] # starting point for the cardinality

    if type == "train":
      minimum = int(270*0.80)
    else:
      minimum = int(240*0.20)

    for i in image_paths:
      if cardinality < minimum:
          image = cv2.imread(i)
          image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
          base_filename = os.path.basename(i)

          # 2 rotations
          rows, columns, _ = image.shape
          alpha = random.randint(0,90)
          M = cv2.getRotationMatrix2D((columns / 2, rows / 2), alpha, 1)
          rotated_image = cv2.warpAffine(image, M, (columns, rows))
          rotated_image_path = os.path.join(output_dir, str(imageClass), 'rotated_' + base_filename)
          cv2.imwrite(rotated_image_path, cv2.cvtColor(rotated_image, cv2.COLOR_RGB2BGR)) # save the image

          rows, columns, _ = image.shape
          alpha1 = random.randint(90,180)
          M1 = cv2.getRotationMatrix2D((columns / 2, rows / 2), alpha1, 1)
          rotated1_image = cv2.warpAffine(image, M1, (columns, rows))
          rotated1_image_path = os.path.join(output_dir, str(imageClass), 'rotated1_' + base_filename)
          cv2.imwrite(rotated1_image_path, cv2.cvtColor(rotated1_image, cv2.COLOR_RGB2BGR))


          # 3 flippings
          flippedV_image = cv2.flip(image, 1)
          flippedV_image_path = os.path.join(output_dir, str(imageClass), 'flippedV_' + base_filename)
          cv2.imwrite(flippedV_image_path, cv2.cvtColor(flippedV_image, cv2.COLOR_RGB2BGR))


          flippedO_image = cv2.flip(image, -1)
          flippedO_image_path = os.path.join(output_dir, str(imageClass), 'flippedO_' + base_filename)
          cv2.imwrite(flippedO_image_path, cv2.cvtColor(flippedO_image, cv2.COLOR_RGB2BGR))


          flippedOV_image = cv2.flip(image, 0)
          flippedOV_image_path = os.path.join(output_dir, str(imageClass), 'flippedOV_' + base_filename)
          cv2.imwrite(flippedOV_image_path, cv2.cvtColor(flippedOV_image, cv2.COLOR_RGB2BGR))

          # decreasing brightness
          b = random.randint(60, 70)
          brightness = np.ones(image.shape, dtype = "uint8")*b
          decreased_image = cv2.subtract(image,brightness)
          decreased_image_path = os.path.join(output_dir, str(imageClass), 'decreasedbrightness_' + base_filename)
          cv2.imwrite(decreased_image_path, cv2.cvtColor(decreased_image, cv2.COLOR_RGB2BGR))

          # increasing brightness
          a = random.randint(60, 70)
          bright = np.ones(image.shape, dtype = "uint8")*a
          increased_image = cv2.add(image,bright)
          increased_image_path = os.path.join(output_dir, str(imageClass), 'increasedbrightness_' + base_filename)
          cv2.imwrite(increased_image_path, cv2.cvtColor(increased_image, cv2.COLOR_RGB2BGR))

          cardinality = cardinality + 7

augmentation162(proc_train_dir, proc_train_dir, 162, "train", train_cardinality)
augmentation162(proc_val_dir, proc_val_dir, 162, "val", val_cardinality)