## Preprocesamiento

In [1]:
import os
import shutil
from PIL import Image
from numpy import asarray, remainder

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [12]:
work_dir = '/content/drive/MyDrive/01BigData/BigDataProject'
src_dataset_dir = 'test_dataset'
aug_dataset_dir = 'test_augmented_dataset/pillow'
list_species = ['Black Sea Sprat', 'Gilt Head Bream', 'Horse Mackerel',
                'Red Mullet', 'Red Sea Bream', 'Sea Bass', 'Shrimp', 'Striped Red Mullet', 'Trout']

In [4]:
def clean_create_workspace():
    print("*** Clean_Create_Workspace ***")
    print("-- Delete previous directories --")
    for directory in list_species:
        try:
            dir_path = os.path.join(work_dir, aug_dataset_dir, directory)
            shutil.rmtree(dir_path)
            print("Directory successfully deleted:", dir_path)
        except OSError as e:
            print("OS msg:", e.strerror)

    print("-- Create directories --")
    for directory in list_species:
        dir_path = os.path.join(work_dir, aug_dataset_dir, directory)
        os.mkdir(dir_path)
        print("Directory successfully created:", dir_path)

In [9]:
# Helper function: usada para numerar los archivos aumentados
# Input: un numero tipo int
# Return: un numero tipo string
def convert_num_string(num):
    len_final_str = 5
    num_str = str(num)
    remainder = len_final_str - len(num_str)
    ans = remainder * '0' + num_str  # Eg.  ans = '00012'
    return ans

In [10]:
# Funcion que realiza augmentation de data: Resize(590, 445)px, rotation, flip
# Input: recibe la lista de especies 'list_spacies' y un factor 'n'
# Output: realiza un aumento de fotos de tamaño = #fotos_raw_data * n
def augmentation_data(list_species, n):
  print("*** Augmentation_Data ***")
  for specie in list_species:
    src_dir = os.path.join(work_dir, src_dataset_dir, specie)
    dest_dataset_dir = os.path.join(work_dir, aug_dataset_dir, specie)
    num_file = 1
    for filename in os.listdir(src_dir):
      # load image
      pic_path = src_dir + "/" + filename
      print("\nProcessing picture:", pic_path)
      image = Image.open(pic_path)
      # Resize Image
      print('> Original size: %s %s' % (image.size))
      image.thumbnail((590, 445))
      print('> Final size: %s %s' % (image.size))
      # Por cada iteracion genera 2 transformaciones del archivo
      num_iter = n + 1 if (n % 2 > 0) else n
      num_iter = int(num_iter / 2)
      for i in range(num_iter):
        # Transformation #1: Flip image
        img_transformed = image.transpose(Image.FLIP_LEFT_RIGHT)
        # Save flipped image
        format_file = filename.split('.')[1]
        num_pic_file = convert_num_string(num_file)
        dest_path_file = dest_dataset_dir + "/" + num_pic_file + "." + format_file
        print("save file: ", dest_path_file)
        img_transformed.save(dest_path_file)
        # Transformation #2: rotation
        img_transformed = img_transformed.rotate(25 * (i + 1))
        # Save rotated image
        num_file += 1
        num_pic_file = convert_num_string(num_file)
        dest_path_file = dest_dataset_dir + "/" + num_pic_file + "." + format_file
        img_transformed.save(dest_path_file)
        print("save file: ", dest_path_file)
        num_file += 1

## Main Workflow Execution

In [15]:
# Clean destination directory
clean_create_workspace()

*** Clean_Create_Workspace ***
-- Delete previous directories --
Directory successfully deleted: /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Black Sea Sprat
Directory successfully deleted: /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Gilt Head Bream
Directory successfully deleted: /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Horse Mackerel
Directory successfully deleted: /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Red Mullet
Directory successfully deleted: /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Red Sea Bream
Directory successfully deleted: /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Sea Bass
Directory successfully deleted: /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Shrimp
Directory successfully deleted: /content/drive/MyDrive/01BigData/BigDataProje

In [16]:
# Execute Augmentation
list_species_test = ['Black Sea Sprat', 'Gilt Head Bream', 'Horse Mackerel']
# Duplica el tamaño del dataset original para la lista de especies
n = 4
augmentation_data(list_species_test, n)  

*** Augmentation_Data ***

Processing picture: /content/drive/MyDrive/01BigData/BigDataProject/test_dataset/Black Sea Sprat/00001.png
> Original size: 1024 768
> Final size: 590 443
save file:  /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Black Sea Sprat/00001.png
save file:  /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Black Sea Sprat/00002.png
save file:  /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Black Sea Sprat/00003.png
save file:  /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Black Sea Sprat/00004.png

Processing picture: /content/drive/MyDrive/01BigData/BigDataProject/test_dataset/Black Sea Sprat/00002.png
> Original size: 1024 768
> Final size: 590 443
save file:  /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset/pillow/Black Sea Sprat/00005.png
save file:  /content/drive/MyDrive/01BigData/BigDataProject/test_augmented_dataset