# Red neuronal convolucional avanzada para detectar objetos

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops
import os
import sys
import tarfile
from six.moves import urllib

In [2]:
session = tf.Session()

In [3]:
batch_size = 128 # Número de imágenes
output_every = 50
generations = 20000 # Número de iteraciones
eval_every = 500

image_height = 32
image_width = 32

crop_height = 24 # Tamaño al que cambiaremos la images para que la red neuronal aprenda todavía más
crop_width = 24

num_channels = 3 # Será imágenes rgb
num_targets = 10

data_folder = "cifar-10-batches-bin"

$$ Learning\ rate = 0.1 \cdot 0.9^{\frac{x}{250}} $$

Empieza en 0.1 y baja un 10% cada 250 iteraciones

In [4]:
learning_rate = 0.1
lr_decay = 0.9
num_generations_to_wait = 250

In [5]:
image_vect_length = image_width*image_height*num_channels
record_lenght = 1 + image_vect_length

### Descarga y procesamiento de CIFAR 10

In [6]:
# Directorio donde queremos guardar las imágenes
data_dir = "../../datasets/cifar-10-temp"

# Si no existe el directorio, lo crea
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

# Url desde la que descargamos los datos
cifar_10_url = "http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz"
# Nombre del fichero comprimido descargado
data_file = os.path.join(data_dir, "cifar-10-binary.tar.gz")

# Si el fichero no existe
if not os.path.isfile(data_file):
    # Guarda el fichero de la url 'cifar-10-url' en la ruta 'data_file'
    file_path, _ = urllib.request.urlretrieve(url=cifar_10_url, filename=data_file)
    # Extraer los ficheros del archivo comprimido
    tarfile.open(file_path, "r:gz").extractall(data_dir)

In [7]:
def read_cifar_files(filename_queue, distord_images=True):
    reader = tf.FixedLengthRecordReader(record_bytes=record_lenght)
    key, record_string = reader.read(filename_queue)
    # Creamos fichero binario
    record_bytes = tf.decode_raw(record_string, tf.uint8)
    # Extraemos la etiqueta
    image_label = tf.cast(tf.slice(record_bytes, begin=[0], size=[1]), tf.int32)
    # Extraemos la imagen
    image_extracted = tf.reshape(tf.slice(record_bytes, begin=[1], size=[image_vect_length]), 
                                 [num_channels, image_height, image_width])
    # Redimensión de imagen
    reshaped_image = tf.transpose(image_extracted, [1,2,0])
    reshaped_image = tf.cast(reshaped_image, tf.float32)
    
    # Crop (corte) aleatorio
    final_image = tf.image.resize_image_with_crop_or_pad(image=reshaped_image, 
                                                         target_height=crop_height, 
                                                         target_width=crop_width)
    
    if distord_images:
        # Flip horizontal aleatorio, cambios brillo y contraste
        final_image = tf.image.random_flip_left_right(final_image)
        final_image = tf.image.random_brightness(final_image, max_delta=63)
        final_image = tf.image.random_contrast(final_image, lower=0.2, upper=1.8)
    
    # Estandarización por color
    final_image = tf.image.per_image_standardization(final_image)
    
    return final_image, image_label

In [8]:
# Función para obtener los conjuntos de imágenes
def input_pipeline(batch_size, train_logical=True):
    # Si estamos en entrenamiento
    if train_logical:
        files = [os.path.join(data_dir, data_folder, "data_batch_{}.bin".format(i)) for i in range(1,6)]
    # Si estamos en testing
    else:
        files = [os.path.join(data_dir, data_folder, "test_batch.bin")]
        
    filename_queue = tf.train.string_input_producer(files)
    image, label = read_cifar_files(filename_queue)
    
    # Tamaño mínimo del buffer para poder cargar y muestrear la imagen
    min_after_dequeue = 1000
    capacity = min_after_dequeue + 3*batch_size # número de hilos + margen*batch_size
    
    example_batch, label_batch = tf.train.shuffle_batch(tensors=[image, label], 
                                                        batch_size=batch_size, 
                                                        capacity=capacity, 
                                                        min_after_dequeue=min_after_dequeue)
    
    return example_batch, label_batch

### Modelo de CNN

- 2 Redes de convolución 
    - 64 nodos cada una
- 3 capas totalmente conectadas
    - 384 nodos la primera
    - 192 nodos la segunda
    - 10 clases en la capa final (predicción)

In [9]:
def cifar_cnn_model(input_images, batch_size, train_logical=True):
    def truncated_normal_var(name, shape, dtype):
        return tf.get_variable(name=name, 
                               shape=shape, 
                               dtype=dtype, 
                               initializer=tf.truncated_normal_initializer(stddev=0.05))
    def zero_var(name, shape, dtype):
        return tf.get_variable(name=name, shape=shape, dtype=dtype, initializer=tf.constant_initializer(0.0))
    
    # Primera capa de convolución
    with tf.variable_scope("conv1") as scope:
        # filtro de convolución de 5x5 para 3 canales de color de entrada y 64 nodos de salida
        conv1_kernel = truncated_normal_var(name="conv_kernel1", shape=[5,5,num_channels,64], dtype=tf.float32)
        conv1 = tf.nn.conv2d(input=input_images, filter=conv1_kernel, strides=[1,1,1,1], padding="SAME")
        conv1_bias = zero_var(name="conv_bias1", shape=[64], dtype=tf.float32)
        conv1 = tf.nn.bias_add(value=conv1, bias=conv1_bias)
        # Capa ReLu
        relu1 = tf.nn.relu(conv1)
        
    # Max pooling de 3x3 y desplazamiento 2x2
    pool1 = tf.nn.max_pool(value=relu1, ksize=[1,3,3,1], strides=[1,2,2,1], padding="SAME", name="pool_layer1")
    
    # Normalización de las imágenes
    norm1 = tf.nn.lrn(pool1, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75, name="norm1")
    
    # Segunda capa de convolución
    with tf.variable_scope("conv2") as scope:
        # filtro de convolución de 5x5 para 64 nodos de entrada y 64 nodos de salida
        conv2_kernel = truncated_normal_var(name="conv_kernel2", shape=[5,5,64,64], dtype=tf.float32)
        conv2 = tf.nn.conv2d(input=norm1, filter=conv2_kernel, strides=[1,1,1,1], padding="SAME")
        conv2_bias = zero_var(name="conv_bias2", shape=[64], dtype=tf.float32)
        conv2 = tf.nn.bias_add(value=conv2, bias=conv2_bias)
        # Capa ReLu
        relu2 = tf.nn.relu(conv2)
        
    # Max pooling de 3x3 y desplazamiento 2x2
    pool2 = tf.nn.max_pool(value=relu2, ksize=[1,3,3,1], strides=[1,2,2,1], padding="SAME", name="pool_layer2")
    
    # Normalización de las imágenes
    norm2 = tf.nn.lrn(pool2, depth_radius=5, bias=2.0, alpha=1e-3, beta=0.75, name="norm2")
    
    # Redimensionar a una matriz para poder multiplicar en las capas totalmente conectadas
    reshaped_output = tf.reshape(norm2, [batch_size, -1]) # Vector columna
    reshaped_dim = reshaped_output.get_shape()[1].value
    
    # Primera capa totalmente conectada
    with tf.variable_scope("full1") as scope:
        weight_full_layer1 = truncated_normal_var(name="weight_full_layer1", 
                                                  shape=[reshaped_dim, 384], 
                                                  dtype=tf.float32)
        bias_full_layer1 = zero_var(name="bias_full_layer1", shape=[384], dtype=tf.float32)
        full_layer1 = tf.nn.relu(tf.add(tf.matmul(reshaped_output, weight_full_layer1), bias_full_layer1))
    
    # Segunda capa totalmente conectada
    with tf.variable_scope("full2") as scope:
        weight_full_layer2 = truncated_normal_var(name="weight_full_layer2", 
                                                  shape=[384, 192], 
                                                  dtype=tf.float32)
        bias_full_layer2 = zero_var(name="bias_full_layer2", shape=[192], dtype=tf.float32)
        full_layer2 = tf.nn.relu(tf.add(tf.matmul(full_layer1, weight_full_layer2), bias_full_layer2))
        
    # Última capa totalmente conectada
    with tf.variable_scope("full3") as scope:
        weight_full_layer3 = truncated_normal_var(name="weight_full_layer3", 
                                                  shape=[192, num_targets], 
                                                  dtype=tf.float32)
        bias_full_layer3 = zero_var(name="bias_full_layer3", shape=[num_targets], dtype=tf.float32)
        final_output = tf.add(tf.matmul(full_layer2, weight_full_layer3), bias_full_layer3)
        
    return final_output

In [10]:
def cifar_loss(logits, targets):
    targets = tf.squeeze(tf.cast(targets, tf.int32))
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets)
    return tf.reduce_mean(cross_entropy)

In [11]:
def train(loss_value, generation_number):
    model_learning_rate = tf.train.exponential_decay(learning_rate=learning_rate, 
                                                     global_step=generation_number,
                                                     decay_rate=lr_decay, 
                                                     decay_steps=num_generations_to_wait, 
                                                     staircase=True)
    
    return tf.train.GradientDescentOptimizer(model_learning_rate).minimize(loss_value)

In [12]:
def get_accuracy(logits, targets):
    targets = tf.squeeze(tf.cast(targets, tf.int32))
    predictions = tf.cast(tf.argmax(logits, axis=1), tf.int32)
    return tf.reduce_mean(tf.cast(tf.equal(predictions, targets), tf.float32))

### Entrenamiento

In [13]:
# No necesitamos placeholders, ya que queda todo perfectamente definido
images, targets = input_pipeline(batch_size, train_logical=True)
test_images, test_targets = input_pipeline(batch_size, train_logical=False)

Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.FixedLe

In [14]:
with tf.variable_scope("model_definition") as scope:
    model_output = cifar_cnn_model(images, batch_size)
    scope.reuse_variables()
    test_output = cifar_cnn_model(test_images, batch_size, train_logical=False)

In [15]:
loss = cifar_loss(model_output, targets)
accuracy = get_accuracy(test_output, test_targets)
generation_num = tf.Variable(initial_value=0, trainable=False) # El grafo no modificará este valor
train_operation = train(loss, generation_num)

In [16]:
init = tf.global_variables_initializer()
session.run(init)

In [17]:
# Iniciamos los hilos de ejecución en paralelo
tf.train.start_queue_runners(sess=session)

Instructions for updating:
To construct input pipelines, use the `tf.data` module.


[<Thread(QueueRunnerThread-input_producer-input_producer/input_producer_EnqueueMany, started daemon 123145579294720)>,
 <Thread(QueueRunnerThread-shuffle_batch/random_shuffle_queue-shuffle_batch/random_shuffle_queue_enqueue, started daemon 123145584549888)>,
 <Thread(QueueRunnerThread-input_producer_1-input_producer_1/input_producer_1_EnqueueMany, started daemon 123145589805056)>,
 <Thread(QueueRunnerThread-shuffle_batch_1/random_shuffle_queue-shuffle_batch_1/random_shuffle_queue_enqueue, started daemon 123145595060224)>]

In [None]:
loss_vector = []
test_acc = []

for i in range(generations):
    _, loss_value = session.run([train_operation, loss])
    
    if (i+1)%output_every == 0:
        loss_vector.append(loss_value)
        print("Paso #{} ==> Loss: {:.5f}".format(i+1, loss_value))

    if (i+1)%eval_every == 0:
        [temp_acc] = session.run([accuracy])
        test_acc.append(temp_acc)
        print("--- Precisión en test {:.2f} ---".format(temp_acc))

Paso #50 ==> Loss: 1.92823
Paso #100 ==> Loss: 1.95564
Paso #150 ==> Loss: 1.73939
Paso #200 ==> Loss: 1.54637
Paso #250 ==> Loss: 1.49652
Paso #300 ==> Loss: 1.68269
Paso #350 ==> Loss: 1.35595
Paso #400 ==> Loss: 1.53400
Paso #450 ==> Loss: 1.41719
Paso #500 ==> Loss: 1.32514
--- Precisión en test 0.52 ---
Paso #550 ==> Loss: 1.39472
Paso #600 ==> Loss: 1.27843
Paso #650 ==> Loss: 1.51472
Paso #700 ==> Loss: 1.39203
Paso #750 ==> Loss: 1.40739
Paso #800 ==> Loss: 1.16718
Paso #850 ==> Loss: 1.20609
Paso #900 ==> Loss: 1.14297
Paso #950 ==> Loss: 1.27602
Paso #1000 ==> Loss: 1.20972
--- Precisión en test 0.54 ---
Paso #1050 ==> Loss: 1.13170
Paso #1100 ==> Loss: 1.35724
Paso #1150 ==> Loss: 1.11064
Paso #1200 ==> Loss: 1.03331
Paso #1250 ==> Loss: 1.17287
Paso #1300 ==> Loss: 1.09078
Paso #1350 ==> Loss: 0.95662
Paso #1400 ==> Loss: 1.05994
Paso #1450 ==> Loss: 1.10032
Paso #1500 ==> Loss: 0.97102
--- Precisión en test 0.54 ---
Paso #1550 ==> Loss: 0.88629
Paso #1600 ==> Loss: 0.93358

In [None]:
eval_idx = range(0, generations, eval_every)
output_idx = range(0, generations, output_every)

plt.figure(figsize=(15,8))
plt.plot(output_idx, loss_vector)
plt.title("Softmax Loss", fontsize=15)
plt.xlabel("Iteración", fontsize=14)
plt.ylabel("Pérdidas", fontsize=14)
plt.legend(loc="upper right")
plt.show()

In [None]:
plt.figure(figsize=(15,8))
plt.plot(eval_idx, test_acc)
plt.title("Función de precisión", fontsize=15)
plt.xlabel("Iteración", fontsize=14)
plt.ylabel("Precisión", fontsize=14)
plt.legend(loc="lower right")
plt.show()