# Test-05
Ejecutado en **Kaggle**.

## Descripción
[Complete]

In [1]:
import os

In [2]:
import pandas as pd

In [3]:
import numpy as np

In [4]:
import matplotlib.pyplot as plt

## Cargando las bases de datos

In [5]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/rn2021q1-itba-cifar100/y_train.npy
/kaggle/input/rn2021q1-itba-cifar100/x_test.npy
/kaggle/input/rn2021q1-itba-cifar100/x_train.npy


In [6]:
x_train_valid = np.load('/kaggle/input/rn2021q1-itba-cifar100/x_train.npy')
y_train_valid = np.load('/kaggle/input/rn2021q1-itba-cifar100/y_train.npy')
x_test = np.load('/kaggle/input/rn2021q1-itba-cifar100/x_test.npy')

# Separando conjuntos para entrenamiento y validación

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
x_train, x_valid, y_train, y_valid = train_test_split(x_train_valid, y_train_valid, test_size=0.2, random_state=15, stratify=y_train_valid)

# Normalización de los datos

In [9]:
x_valid_norm = x_valid / 255
x_test_norm = x_test / 255
x_train_norm = x_train / 255

# Data Augmentation con Albumentation
Se aplica **data augmentation** utilizando la biblioteca Albumentations para aumentar el tamaño del conjunto de datos para entrenamiento. El conjunto de validación permanece intacto para validar que la técnica tuvo buenos resultados sin contaminar los datos de dicho conjunto.

In [10]:
from tensorflow.keras.utils import Sequence

In [11]:
from albumentations import (Compose, ToFloat, HorizontalFlip, VerticalFlip, Rotate,
                            RandomBrightnessContrast, ShiftScaleRotate, RandomSizedCrop,
                            GridDistortion, ElasticTransform)

In [12]:
class AugmentedSequence(Sequence):
  """ Dataset generator with data augmentation """

  def __init__(self, x, y, batch_size, augmentation, shuffle=True):
    """ Create an instance of the data augmented generator, which is a 
        dataset generator to provide 'on the fly' data augmentation.
        @param x
        @param y
        @param batch_size
        @param augmentation
        @param shuffle
    """
    # Save internal parameters of the augmented sequence
    self.x = x
    self.y = y
    self.batch_size = batch_size
    self.augmentation = augmentation
    self.shuffle = shuffle

    # Initialization
    self.on_epoch_end()
  
  def __len__(self):
    """ Compute the length of an epoch measured in batches
    """
    return int(np.floor(len(self.x) / float(self.batch_size)))
  
  def __getitem__(self, index):
    """ Return the item from the sequence at the given index
        @param index
    """
    # Generate indexes of the batch
    indexes = self.indexes[index * self.batch_size : (index + 1) * self.batch_size]

    # Extract the input and output batch from the original dataset
    batch_x = self.x[indexes]
    batch_y = self.y[indexes]
    
    # Return an augmented version of the batch
    return np.array([
      self.augmentation(image=x)['image'] for x in batch_x
    ]), np.array(batch_y)

  def on_epoch_end(self):
    """ Updates indexes after each epoch
    """
    self.indexes = np.arange(len(self.x))
    if self.shuffle is True:
        np.random.shuffle(self.indexes)


In [22]:
# Create the AugmentedSequence
album_generator = AugmentedSequence(x_train,
                                    y_train,
                                    40000,
                                    Compose([
                                        ShiftScaleRotate(shift_limit=0.1,
                                                         scale_limit=0.2,
                                                         rotate_limit=30,
                                                         p=0.5),
                                        HorizontalFlip(p=0.5),
                                        VerticalFlip(p=0.5),
                                        GridDistortion(p=0.2),
                                        ElasticTransform(p=0.2),
                                        RandomBrightnessContrast(p=0.5),
                                        ToFloat()
                                        ])
                                    )

# Data Augmentation con Keras ImageDataGenerator
Se aplica **data augmentation** utilizando la biblioteca Keras para aumentar el tamaño del conjunto de datos para entrenamiento. El conjunto de validación permanece intacto para validar que la técnica tuvo buenos resultados sin contaminar los datos de dicho conjunto.

In [14]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [15]:
# Create the data generator with Keras preprocessing library
datagen = ImageDataGenerator(
    rotation_range=30,
    zoom_range=0.25,
    horizontal_flip=True,
    vertical_flip=True,
    width_shift_range=0.15,
    height_shift_range=0.15
)

# Keras dataset generator
keras_generator = datagen.flow(x_train_norm, y_train, batch_size=40000)

# Modelos

In [16]:
from tensorflow.keras.layers import (Dense, Flatten, Activation, BatchNormalization, Dropout, Conv2D, 
                                     MaxPooling2D, InputLayer, AveragePooling2D)

In [17]:
from tensorflow.keras.models import Sequential

In [18]:
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping

In [19]:
from tensorflow.keras.optimizers import Adam

In [20]:
from tensorflow import keras

## Modelo # 1

In [21]:
# Create the model
model = Sequential()
model.add(InputLayer(input_shape=(32, 32, 3)))
model.add(Conv2D(128, 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(AveragePooling2D())
model.add(Conv2D(256, 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(AveragePooling2D())
model.add(Conv2D(512, 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(AveragePooling2D())
model.add(Flatten())
model.add(Dense(256))
model.add(Dropout(0.8))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(100))
model.add(BatchNormalization())
model.add(Activation('softmax'))

# Compile
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=Adam(learning_rate=0.001),
              metrics=['accuracy']
             )

# Summarize
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 30, 30, 128)       3584      
_________________________________________________________________
batch_normalization (BatchNo (None, 30, 30, 128)       512       
_________________________________________________________________
activation (Activation)      (None, 30, 30, 128)       0         
_________________________________________________________________
average_pooling2d (AveragePo (None, 15, 15, 128)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 256)       295168    
_________________________________________________________________
batch_normalization_1 (Batch (None, 13, 13, 256)       1024      
_________________________________________________________________
activation_1 (Activation)    (None, 13, 13, 256)       0

In [22]:
# Create the ModelCheckpoint callback to save the best model during training
mc_callback = ModelCheckpoint('model_1.hdf5',
                              monitor='val_accuracy',
                              save_best_only=True,
                              verbose=0,
                              mode='max'
                             )

# Train the model
epochs = 10
batch_size = 256
augmented_factor = 10
for i in range(augmented_factor):
  batch_x, batch_y = album_generator[0]
  model.fit(batch_x,
            batch_y, 
            validation_data=(x_valid_norm, y_valid), 
            callbacks=[mc_callback],
            batch_size=batch_size,
            epochs=epochs
            )

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
E

In [23]:
# Load the model and show the final metrics
model = keras.models.load_model('model_1.hdf5')

# Train and validation metrics
_, train_acc = model.evaluate(x_train_norm, y_train, verbose=0)
_, valid_acc = model.evaluate(x_valid_norm, y_valid, verbose=0)

# Show result
print(f'[Accuracy] Train: {round(train_acc, 3)} Valid: {round(valid_acc, 3)}')

[Accuracy] Train: 0.452 Valid: 0.396


## Modelo #2

In [24]:
# Create the model
model = Sequential()
model.add(InputLayer(input_shape=(32, 32, 3)))
model.add(Conv2D(128, 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D())
model.add(Conv2D(256, 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D())
model.add(Conv2D(512, 3))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(MaxPooling2D())
model.add(Flatten())
model.add(Dense(256))
model.add(Dropout(0.8))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dense(100))
model.add(BatchNormalization())
model.add(Activation('softmax'))

# Compile
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=Adam(learning_rate=0.001),
              metrics=['accuracy']
             )

# Summarize
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_3 (Conv2D)            (None, 30, 30, 128)       3584      
_________________________________________________________________
batch_normalization_5 (Batch (None, 30, 30, 128)       512       
_________________________________________________________________
activation_5 (Activation)    (None, 30, 30, 128)       0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 128)       0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 13, 13, 256)       295168    
_________________________________________________________________
batch_normalization_6 (Batch (None, 13, 13, 256)       1024      
_________________________________________________________________
activation_6 (Activation)    (None, 13, 13, 256)      

In [25]:
# Create the ModelCheckpoint callback to save the best model during training
mc_callback = ModelCheckpoint('model_2.hdf5',
                              monitor='val_accuracy',
                              save_best_only=True,
                              verbose=0,
                              mode='max'
                             )

# Train the model
epochs = 5
batch_size = 512
augmented_factor = 20
for i in range(augmented_factor):
  batch_x, batch_y = album_generator[0]
  model.fit(batch_x,
            batch_y, 
            validation_data=(x_valid_norm, y_valid), 
            callbacks=[mc_callback],
            batch_size=batch_size,
            epochs=epochs
            )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [26]:
# Load the model and show the final metrics
model = keras.models.load_model('model_2.hdf5')

# Train and validation metrics
_, train_acc = model.evaluate(x_train_norm, y_train, verbose=0)
_, valid_acc = model.evaluate(x_valid_norm, y_valid, verbose=0)

# Show result
print(f'[Accuracy] Train: {round(train_acc, 3)} Valid: {round(valid_acc, 3)}')

[Accuracy] Train: 0.47 Valid: 0.422


## Modelo #3

In [30]:
# Create the model
model = Sequential()
model.add(InputLayer(input_shape=(32, 32, 3)))
model.add(Conv2D(128, 3, padding='same'))
model.add(Conv2D(128, 3, padding='same'))
model.add(BatchNormalization())
model.add(Activation('elu'))
model.add(MaxPooling2D(strides=(2, 2), padding='same'))
model.add(Conv2D(256, 3, padding='same'))
model.add(Conv2D(256, 3, padding='same'))
model.add(BatchNormalization())
model.add(Activation('elu'))
model.add(MaxPooling2D(strides=(2, 2), padding='same'))
model.add(Conv2D(512, 3, padding='same'))
model.add(Conv2D(512, 3, padding='same'))
model.add(BatchNormalization())
model.add(Activation('elu'))
model.add(MaxPooling2D(strides=(2, 2), padding='same'))
model.add(Flatten())
model.add(Dense(1024))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Activation('elu'))
model.add(Dense(1024))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Activation('elu'))
model.add(Dense(100))
model.add(BatchNormalization())
model.add(Activation('softmax'))

# Compile
model.compile(loss='sparse_categorical_crossentropy',
              optimizer=Adam(learning_rate=0.0001),
              metrics=['accuracy']
             )

# Summarize
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_9 (Conv2D)            (None, 32, 32, 128)       3584      
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 32, 32, 128)       147584    
_________________________________________________________________
batch_normalization_11 (Batc (None, 32, 32, 128)       512       
_________________________________________________________________
activation_11 (Activation)   (None, 32, 32, 128)       0         
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 16, 16, 128)       0         
_________________________________________________________________
conv2d_11 (Conv2D)           (None, 16, 16, 256)       295168    
_________________________________________________________________
conv2d_12 (Conv2D)           (None, 16, 16, 256)      

In [23]:
# Create the ModelCheckpoint callback to save the best model during training
mc_callback = ModelCheckpoint('model_3.hdf5',
                              monitor='val_accuracy',
                              save_best_only=True,
                              verbose=0,
                              mode='max'
                             )

# Create the EarlyStopping callback to stop when not improving during training
es_callback = EarlyStopping(monitor='val_accuracy', mode='min', verbose=1, patience=20)

# Train the model
epochs = 1 # First trained with 5
batch_size = 512 # First trained with 256
augmented_factor = 20 # First trained with 40
for i in range(augmented_factor):
  batch_x, batch_y = album_generator[0]
  model.fit(batch_x,
            batch_y, 
            validation_data=(x_valid_norm, y_valid), 
            callbacks=[mc_callback, es_callback],
            batch_size=batch_size,
            epochs=epochs
            )



In [24]:
# Load the model and show the final metrics
model = keras.models.load_model('model_3.hdf5')

# Train and validation metrics
_, train_acc = model.evaluate(x_train_norm, y_train, verbose=0)
_, valid_acc = model.evaluate(x_valid_norm, y_valid, verbose=0)

# Show result
print(f'[Accuracy] Train: {round(train_acc, 3)} Valid: {round(valid_acc, 3)}')

[Accuracy] Train: 0.825 Valid: 0.637
