# Data Augmentation

### Leemos el dataset

In [1]:
import numpy as np
import pandas as pd
import cv2
import glob
from PIL import Image
np.random.seed(1) #to have reproducible results 
pd.set_option('display.max_colwidth', None)

Lectura de imágenes apoyada en:  
(1) Sadangi, S. (2020). Introduction to Image Augmentation in Python (Part 1). Recuperado de:  
 https://betterprogramming.pub/introduction-to-image-augmentation-in-python-1691cbf8901f

In [2]:
glob.glob('.//Dataset/*/*.jpg', recursive=True)[0]

'.//Dataset\\AntMan\\am1.jpg'

In [3]:
paths = glob.glob('.//Dataset/*/*.jpg', recursive=True)
paths[0]

'.//Dataset\\AntMan\\am1.jpg'

In [4]:
orig = np.array([np.asarray(Image.open(img)) for img in paths], dtype='object')
orig[0].shape

(267, 189, 3)

#### CV2

In [5]:
import os
import numpy as np

In [6]:
import imgaug as ia
import imgaug.augmenters as iaa

### Resize

Tamaños nuevos

In [7]:
new_image_height = 250
new_image_width = 250

#### Resize al dataset con padding

Resize a través de padding apoyado de:  
(2) fmw42. (2021). Add padding to images to get them into the same shape. Recuperado de:  
https://stackoverflow.com/a/59698237

In [9]:
sized_dataset = []
for img_path in paths:
       img_name = img_path.split('\\')[-1].split('.')[0]
       dir_path = f"ResizedDataset\\"+img_path.split('\\')[-2]
       if not os.path.exists(dir_path):
              os.makedirs(dir_path)
    
       # read image
       img = cv2.imread(img_path)
       height, width, channels = img.shape
       
        #Si la imagen es relativamente pequeña
       if (width<new_image_width*.8 or height<new_image_height*.8):
              #percent by which the image is resized
              scale_percent = 101
              height = img.shape[0]
              width = img.shape[1]
              #calculate the 50 percent of original dimensions
              while scale_percent < 180:
                     width = int(img.shape[1] * scale_percent / 100)
                     height = int(img.shape[0] * scale_percent / 100) 
                     #print("Escala: ", end="")
                     #print(scale_percent, end=" Dims: ")
                     #print(height,width)
                     if height > new_image_height*.8 or width > new_image_width*.8:   
                            print(height,width)
                            scale_percent -= 1
                            width = int(img.shape[1] * scale_percent / 100)
                            height = int(img.shape[0] * scale_percent / 100) 
                            #print("Escala: ", end="")
                            #print(scale_percent, end=" Dims: ")
                            #print(height,width)
                            break
                     scale_percent += 1
       #Si la imagen es demasiado grande, se le hace resize
       if (width>new_image_width or height>new_image_height):
              #percent by which the image is resized
              scale_percent = 95
              height = img.shape[0]
              width = img.shape[1]
              #calculate the percent of original dimensions
              while scale_percent > 0:
                     width = int(img.shape[1] * scale_percent / 100)
                     height = int(img.shape[0] * scale_percent / 100)
                     if height <= new_image_height and width <= new_image_width:    
                            break
                     scale_percent -= 1
      
       # dsize
       dsize = (width, height)
       # resize image
       img = cv2.resize(img, dsize)

       color = (255,0,0)
       result = np.full((new_image_height,new_image_width, channels), color, dtype=np.uint8)

       # compute center offset
       x_center = (new_image_width - width) // 2
       y_center = (new_image_height - height) // 2

       # copy img image into center of result image
       
       result[y_center:y_center+height, 
              x_center:x_center+width] = img

       # save result
       sized_dataset.append(result)
       #print(sized_dataset)
       print(np.asarray(sized_dataset).shape)
       cv2.imwrite(f"{dir_path}/{img_name}_r.jpg", result)

269 190
(1, 250, 250, 3)
187 201
(2, 250, 250, 3)
202 141
(3, 250, 250, 3)
169 303
(4, 250, 250, 3)
178 201
(5, 250, 250, 3)
171 201
(6, 250, 250, 3)
169 303
(7, 250, 250, 3)
164 313
(8, 250, 250, 3)
184 277
(9, 250, 250, 3)
202 139
(10, 250, 250, 3)
201 187
(11, 250, 250, 3)
334 153
(12, 250, 250, 3)
168 304
(13, 250, 250, 3)
277 184
(14, 250, 250, 3)
202 200
(15, 250, 250, 3)
184 277
(16, 250, 250, 3)
171 299
(17, 250, 250, 3)
184 277
(18, 250, 250, 3)
207 151
(19, 250, 250, 3)
184 277
(20, 250, 250, 3)
169 301
(21, 250, 250, 3)
169 304
(22, 250, 250, 3)
169 303
(23, 250, 250, 3)
169 303
(24, 250, 250, 3)
201 201
(25, 250, 250, 3)
(26, 250, 250, 3)
(27, 250, 250, 3)
277 184
(28, 250, 250, 3)
(29, 250, 250, 3)
(30, 250, 250, 3)
(31, 250, 250, 3)
(32, 250, 250, 3)
(33, 250, 250, 3)
(34, 250, 250, 3)
(35, 250, 250, 3)
(36, 250, 250, 3)
(37, 250, 250, 3)
(38, 250, 250, 3)
(39, 250, 250, 3)
(40, 250, 250, 3)
184 277
(41, 250, 250, 3)
(42, 250, 250, 3)
(43, 250, 250, 3)
(44, 250, 250, 3)
(

In [10]:
print(np.asarray(sized_dataset).shape)

(1481, 250, 250, 3)


## Augmentation

In [12]:
marvel = {}
for i in range(len(paths)):
    character = paths[i].split('\\')[-2]
    # Cuenta imágenes para cada personaje
    if character not in marvel.keys():
        marvel[character] =1
    elif character in paths[i]:
        marvel[character] +=1
marvel

{'AntMan': 49,
 'BlackPanther': 50,
 'BlackWidow': 214,
 'CaptainAmerica': 157,
 'CaptainMarvel': 29,
 'Drax': 25,
 'DrStrange': 94,
 'Gamora': 48,
 'HawkEye': 32,
 'Hulk': 116,
 'Ironman': 165,
 'Loki': 49,
 'NickFury': 40,
 'Quake': 30,
 'ScarlettWitch': 46,
 'Spiderman': 122,
 'Thor': 100,
 'Valkyrie': 31,
 'Vision': 28,
 'WinterSoldier': 29,
 'Yondu': 27}

In [13]:
def saveImages(folderName,characterName,newImages):
    for i in range(len(newImages)):
        img_name = characterName + str(i+1) 
        dir_path = folderName+"\\"+characterName

        # Crea directorio y escribe
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)

        cv2.imwrite(f"{dir_path}/{img_name}.jpg", newImages[i])

Secuencias obtenidas y creadas desde:  
(3) IMGAUG. (2022). imgaug Docs. Recuperado de:  
https://imgaug.readthedocs.io/en/latest/index.html

In [14]:
#Se crea secuencia con cambios,después se le pasa sized dataset, que es de 4 dimensiones con dtype uint8
seq = iaa.Sequential([
    iaa.Fliplr(0.5), # horizontal flips
    iaa.Crop(percent=(0, 0.1)), # random crops
    # Small gaussian blur with random sigma between 0 and 0.5.
    # But we only blur about 50% of all images.
    iaa.Sometimes(0.5,iaa.GaussianBlur(sigma=(0, 0.5))),
    # Strengthen or weaken the contrast in each image.
    iaa.LinearContrast((0.75, 1.5)),
    # Add gaussian noise.
    # For 50% of all images, we sample the noise once per pixel.
    # For the other 50% of all images, we sample the noise per pixel AND
    # channel. This can change the color (not only brightness) of the
    # pixels.
    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
    # Make some images brighter and some darker.
    # In 20% of all cases, we sample the multiplier once per channel,
    # which can end up changing the color of the images.
    iaa.Multiply((0.8, 1.2), per_channel=0.2),
    # Apply affine transformations to each image.
    # Scale/zoom them, translate/move them, rotate them and shear them.
    iaa.Affine(
        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
        rotate=(-25, 25),
        shear=(-8, 8)
    )
], random_order=True) # apply augmenters in random order

seq2 = iaa.Sequential([
    iaa.Sometimes(0.5,iaa.Cutout(fill_mode="constant", 
                    cval=(0, 255), fill_per_channel=0.5, 
                    nb_iterations=(1, 5), size=0.10)),
    iaa.Sometimes(0.5,iaa.CoarseDropout(0.02, size_percent=0.15, per_channel=0.5)),
    iaa.ImpulseNoise((0,0.05)),
    iaa.MultiplySaturation((0, 1.2)),
    #iaa.ChangeColorTemperature((1100, 10000)),
    iaa.Sometimes(0.6, iaa.Rot90((1, 3), keep_size=False)),
    iaa.pillike.Affine(scale={"x": (0.8, 1.8), "y": (0.8, 1.2)}),
    iaa.Sometimes(0.4, iaa.pillike.Autocontrast((10, 10), per_channel=True)),
    iaa.pillike.EnhanceSharpness((0,0.3)),
    iaa.pillike.FilterSmooth()
], random_order=True) # apply augmenters in random order

seqLeve = iaa.Sequential([
    iaa.WithHueAndSaturation(iaa.WithChannels(0, iaa.Add((0, 50)))),
    iaa.Sometimes(0.7, iaa.Rot90((1, 3), keep_size=False)),
    iaa.Fliplr(0.7),
    iaa.Flipud(0.7)
], random_order=True) # apply augmenters in random order

In [24]:
# Variable para determinar nuevo tamaño máximo del dataset aumentado
augmented_dataset_size = max(marvel.values())
increase_rate = 0.5
# El tamaño será el de la categoría con máx cantidad de imágenes
augmented_dataset_size *= increase_rate

path_index = 0
marvel_aug = {}

for character in marvel:
    print(character)
    
    marvel_aug[character] = 0
    og_length = marvel[character]
    end_path_index = path_index + og_length
    source_dataset = sized_dataset
    i = 0
    while marvel_aug[character] < augmented_dataset_size:
        print("----Augmented size must be at least ", str(augmented_dataset_size) + ". Reached size: " + str(marvel_aug[character]))
        if (i == 1):
            print("Data did not reach size")
            
            print(len(images_seq1[0].shape))
            print((images_seq1[0]))
            print(len(images_seq1[0][0].shape))
            print((images_seq1[0][0]))

            print("Y la cosa es:")
            print(len(source_dataset[0].shape))
            print((images_seq1[0]))
            print(len(source_dataset[0][0].shape))
            print((images_seq1[0][0]))

            print(images_seq1)
            print(images_seq2)
            print(images_seq3)
            break

        print("--From " + str(path_index) + " to " + str(end_path_index))

        images_seq1 = seqLeve(images=source_dataset[path_index:end_path_index])
        images_seq2 = seq(images=source_dataset[path_index:end_path_index])
        images_seq3 = seq2(images=source_dataset[path_index:end_path_index])
        
        source_dataset = images_seq1
        source_dataset.extend(images_seq2)
        source_dataset.extend(images_seq3)
        
        saveImages("AugmentedDataset", character, source_dataset)
        marvel_aug[character] += len(source_dataset)
        i = 1

    path_index += og_length
    print("-From " + str(og_length) + " generated " + str(marvel_aug[character]) + " pictures")

AntMan
----Augmented size must be at least  107.0. Reached size: 0
--From 0 to 49
-From 49 generated 147 pictures
BlackPanther
----Augmented size must be at least  107.0. Reached size: 0
--From 49 to 99
-From 50 generated 150 pictures
BlackWidow
----Augmented size must be at least  107.0. Reached size: 0
--From 99 to 313
-From 214 generated 642 pictures
CaptainAmerica
----Augmented size must be at least  107.0. Reached size: 0
--From 313 to 470
-From 157 generated 471 pictures
CaptainMarvel
----Augmented size must be at least  107.0. Reached size: 0
--From 470 to 499
----Augmented size must be at least  107.0. Reached size: 87
Data did not reach size
3
[[[255  76   0]
  [255  76   0]
  [255  76   0]
  ...
  [255  76   0]
  [255  76   0]
  [255  76   0]]

 [[255  76   0]
  [255  76   0]
  [255  76   0]
  ...
  [255  76   0]
  [255  76   0]
  [255  76   0]]

 [[255  76   0]
  [255  76   0]
  [255  76   0]
  ...
  [255  76   0]
  [255  76   0]
  [255  76   0]]

 ...

 [[255  76   0]
  [25

## Otras Referencias

(4) Leung, K. (2021). Top Python libraries for Image Augmentation in Computer Vision.  
Recuperado de: https://towardsdatascience.com/top-python-libraries-for-image-augmentation-in-computer-vision-2566bed0533e