# CIFAR - Image Augmentation
En este problema resolveremos CIFAR utilizando una red neuronal convolucional (CNN)con Image Augmentation

## Paso 1. Cargar información

In [None]:
import keras
from keras.datasets import cifar10

# load the pre-shuffled train and test data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

## Paso 2 y 3. Comprender la información y modificarla.

En la siguiente celda vamos a visualizar el dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

fig = plt.figure(figsize=(20,5))
for i in range(36):
    ax = fig.add_subplot(3, 12, i + 1, xticks=[], yticks=[])
    ax.imshow(np.squeeze(x_train[i]))

In [None]:
x_train = x_train.astype('float32')/255
x_test = x_test.astype('float32')/255

### Hacemos One-hot encoding del label

In [None]:
from keras.utils import np_utils

num_classes = len(np.unique(y_train))
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

### Separamos el training set en training y validation.

In [None]:
# break training set into training and validation sets
(x_train, x_valid) = x_train[5000:], x_train[:5000]
(y_train, y_valid) = y_train[5000:], y_train[:5000]

# print shape of training set
print('x_train shape:', x_train.shape)

# print number of training, validation, and test images
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print(x_valid.shape[0], 'validation samples')

### Configuramos un generador de imágenes

In [None]:
from keras.preprocessing.image import ImageDataGenerator

# crea y configura un generador de aumentación de imágenes
datagen_train = ImageDataGenerator(
    width_shift_range=0.1,  # aleatoriamente mueve las imágenes horizontalmente (10% del width)
    height_shift_range=0.1,   # aleatoriamente mueve las imágenes verticalmente(10% del height)
    horizontal_flip=True) # aleatoriamente invierte la imagen aleatoriamente

# utiliza el generador en el data de entrenamiento
datagen_train.fit(x_train)

### Visualizamos data aumentada

In [None]:
import matplotlib.pyplot as plt

# take subset of training data
x_train_subset = x_train[:12]

# visualize subset of training data
fig = plt.figure(figsize=(20,2))
for i in range(0, len(x_train_subset)):
    ax = fig.add_subplot(1, 12, i+1)
    ax.imshow(x_train_subset[i])
fig.suptitle('Subset of Original Training Images', fontsize=20)
plt.show()

# visualize augmented images
fig = plt.figure(figsize=(20,2))
for x_batch in datagen_train.flow(x_train_subset, batch_size=12):
    for i in range(0, 12):
        ax = fig.add_subplot(1, 12, i+1)
        ax.imshow(x_batch[i])
    fig.suptitle('Augmented Images', fontsize=20)
    plt.show()
    break;

## Paso 4. Definimos Arquitectura del modelo
Utiliza la misma arquitectura que la del ejercicio anterior

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

#TODO: Definir el modelo

In [None]:
#TODO: Compila el modelo con categorical_crossentropy y RMSProp

## Paso 5. Entrenamos el Modelo
Nota que la sintaxis es diferente para este caso porque utilizamos datagen_train.flow.

In [None]:
from keras.callbacks import ModelCheckpoint   

# Normalmente utilizaríamos GPU y 100 epochs
batch_size = 32
epochs = 10
checkpointer = ModelCheckpoint(filepath='aug_model.weights.best.hdf5', verbose=1, 
                               save_best_only=True)
model.fit_generator(datagen_train.flow(x_train, y_train, batch_size=batch_size),
                    steps_per_epoch=x_train.shape[0] // batch_size,
                    epochs=epochs, verbose=2, callbacks=[checkpointer],
                    validation_data=(x_valid, y_valid),
                    validation_steps=x_valid.shape[0] // batch_size)

## Paso 6. Cargamos el mejor modelo y lo evaluamos

In [None]:
model.load_weights('aug_model.weights.best.hdf5')
score = model.evaluate(x_test, y_test, verbose=0)
print('\n', 'Test accuracy:', score[1])