## Importando librerías a usar

In [6]:
from keras.models import Model
from keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler
import os, gc, zipfile
import numpy as np, pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

## Importando y procesando todas las imagenes

Descomprimir en la carpeta multilayer el zip que se encuentra en el siguiente enlace: https://drive.google.com/file/d/1xZM7b8Jqq9Wu0GdpGYI8wym8z4vG0789/view?usp=sharing

Una vez acabado eliminar el zip para evitar problemas de rutas.

In [25]:
# Ejemplo de imágenes
PATH = './datasets/multilayer/all-dogs/'
IMAGES = os.listdir(PATH)
print('There are', len(IMAGES), 'images. Here are 5 example filesnames:')
print(IMAGES[:5])

There are 20579 images. Here are 5 example filesnames:
['n02105056_26.jpg', 'n02088632_3993.jpg', 'n02096051_738.jpg', 'n02097047_6503.jpg', 'n02115641_4262.jpg']


Vamos a recortar aleatoriamente las 20.000 imágenes originales y hacer 500.000 nuevas imágenes de entrenamiento.

In [None]:
if os.path.isdir('./tmp'):
    os.rmdir('./tmp')

os.mkdir('./tmp')

tmp = './tmp/'

# Creación de imágenes aleatorias
for i in range(500000):
    img = Image.open(PATH + IMAGES[i % len(IMAGES)])
    img = img.resize((100, int(img.size[1] / (img.size[0] / 100))), Image.ANTIALIAS)
    w = img.size[0]; h = img.size[1]; a = 0; b = 0
    if w > 64: a = np.random.randint(0, w - 64)
    if h > 64: b = np.random.randint(0, h - 64)
    img = img.crop((a, b, 64 + a, 64 + b))
    img.save('./tmp/' + str(i) + '.png', 'PNG')
    if i % 100000 == 0: print('created', i, 'cropped images')
print('created 500000 cropped images')

created 0 cropped images


In [2]:
BATCH_SIZE = 256; EPOCHS = 10
train_datagen = ImageDataGenerator(rescale=1./255)
train_batches = train_datagen.flow_from_directory('./tmp/',
        target_size=(64,64), shuffle=True, class_mode='input', batch_size=BATCH_SIZE)

Found 20579 images belonging to 1 classes.


# 1. Construir el autoencoder

In [5]:
# Codificar
input_img = Input(shape = (64, 64, 3))  
x = Conv2D(48, (3, 3), activation = 'relu', padding = 'same')(input_img)
x = MaxPooling2D((2, 2), padding = 'same')(x)
x = Conv2D(96, (3, 3), activation = 'relu', padding = 'same')(x)
x = MaxPooling2D((2, 2), padding = 'same')(x)
x = Conv2D(192, (3, 3), activation = 'relu', padding = 'same')(x)
x = MaxPooling2D((2, 2), padding = 'same')(x)
encoded = Conv2D(32, (1, 1), activation = 'relu', padding = 'same')(x)

# Espacio latente
latentSize = (8,8,32)

# Decodificar
direct_input = Input(shape = latentSize)
x = Conv2D(192, (1, 1), activation = 'relu', padding = 'same')(direct_input)
x = UpSampling2D((2, 2))(x)
x = Conv2D(192, (3, 3), activation = 'relu', padding = 'same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(96, (3, 3), activation = 'relu', padding = 'same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(48, (3, 3), activation = 'relu', padding = 'same')(x)
decoded = Conv2D(3, (3, 3), activation = 'sigmoid', padding = 'same')(x)

# Compilar
encoder = Model(input_img, encoded)
decoder = Model(direct_input, decoded)
autoencoder = Model(input_img, decoder(encoded))

autoencoder.compile(optimizer = 'Adam', loss = 'binary_crossentropy')

# 2. Entrenar el autoencoder

In [None]:
history = autoencoder.fit(train_batches,
        steps_per_epoch = train_batches.samples // BATCH_SIZE,
        epochs = EPOCHS, verbose = 2)

Epoch 1/10


# 3. Ver la reconstrucción

Nuestro codificador trabaja mapeando imagenes desde un espacio dimensional de 12288(64 x64 x3) a un espacio dimensional de 2048 (imagen latente). Esto es un ratio de compresión x6. Y nuestro decodificador trabaja mapeando nuestra imagen latente hacía atras hasta el espacio dimensional de 12288.

In [1]:
images = next(iter(train_batches))[0]
for i in range(5):

    plt.figure(figsize = (15, 5))
    plt.subplot(1, 3, 1)
    
    # Imagen original
    orig = images[i, :, :, :].reshape((-1, 64, 64, 3))
    img = Image.fromarray( (255 * orig).astype('uint8').reshape((64, 64, 3)))
    plt.title('Original')
    plt.imshow(img)

    # Imagen latente
    latent_img = encoder.predict(orig)
    mx = np.max(latent_img[0])
    mn = np.min(latent_img[0])
    latent_flat = ((latent_img[0] - mn) * 255/(mx - mn)).flatten(order = 'F')
    img = Image.fromarray(latent_flat[:2025].astype('uint8').reshape((45, 45)), mode = 'L') 
    plt.subplot(1, 3, 2)
    plt.title('Latent')
    plt.xlim((-10, 55))
    plt.ylim((-10, 55))
    plt.axis('off')
    plt.imshow(img)

    # Imagen reconstruida
    decoded_imgs = decoder.predict(latent_img[0].reshape((-1, latentSize[0], latentSize[1], latentSize[2])))
    img = Image.fromarray( (255 * decoded_imgs[0]).astype('uint8').reshape((64, 64, 3)))
    plt.subplot(1, 3, 3)
    plt.title('Reconstructed')
    plt.imshow(img)
    
    plt.show()

NameError: name 'train_batches' is not defined

# 4. Generación de perros

In [None]:
# Genera 1000 imágenes recortadas
x = np.random.choice(np.arange(20000), 10000)
images = np.zeros((10000, 64, 64, 3))
for i in range(len(x)):
    img = Image.open(PATH + IMAGES[x[i]])
    img = img.resize((100, int(img.size[1] / (img.size[0] / 100))), Image.ANTIALIAS)
    img = img.crop((18, 0, 82, 64))
    images[i, :, :, :] = np.asarray(img).astype('float32') / 255.
        
# Calcula el elipsoide a partir de 1000 imágenes     
encoded_imgs = encoder.predict(images)
sz = latentSize[0] * latentSize[1] * latentSize[2]
encoded_imgs = encoded_imgs.reshape((-1, sz))
mm = np.mean(encoded_imgs, axis = 0)
ss = np.cov(encoded_imgs, rowvar = False)

# Genera 9 imágenes random de perros
generated = np.random.multivariate_normal(mm,ss,9)
generated = generated.reshape((-1,latentSize[0],latentSize[1],latentSize[2]))

In [1]:
# Muestra esas 9 imágenes
for k in range(3):
    plt.figure(figsize = (15, 5))
    plt.subplot(1, 3, 1)
    decoded_imgs = decoder.predict(generated[k * 3].reshape((-1, latentSize[0], latentSize[1], latentSize[2])))
    img = Image.fromarray( (255 * decoded_imgs[0]).astype('uint8').reshape((64, 64, 3)))
    plt.imshow(img)
    plt.subplot(1, 3, 2)
    decoded_imgs = decoder.predict(generated[k * 3 + 1].reshape((-1, latentSize[0], latentSize[1], latentSize[2])))
    img = Image.fromarray( (255 * decoded_imgs[0]).astype('uint8').reshape((64, 64, 3)))
    plt.imshow(img)
    plt.subplot(1, 3, 3)
    decoded_imgs = decoder.predict(generated[k * 3 + 2].reshape((-1, latentSize[0], latentSize[1], latentSize[2])))
    img = Image.fromarray( (255 * decoded_imgs[0]).astype('uint8').reshape((64, 64, 3)))
    plt.imshow(img)
    plt.show()

NameError: name 'plt' is not defined

# 5. Generar mejores perros

Las imágenes de perros generadas no son muy reconocibles. En vez de escoger puntos randoms en el espacio latente, vamos a escoger puntos cercanos a las imágenes de entrenamiento.

In [None]:
# Distancia a moverse desde las imagenes de entrenamiento existentes
beta = 0.35

# Genera 9 imágenes random de perros
generated = np.random.multivariate_normal(mm, ss, 9)
generated = beta * generated + (1 - beta) * encoded_imgs[:9]

In [None]:
# Muestra esas 9 imágenes
for k in range(3):
    plt.figure(figsize = (15, 5))
    plt.subplot(1, 3, 1)
    decoded_imgs = decoder.predict(generated[k * 3].reshape((-1, latentSize[0], latentSize[1], latentSize[2])))
    img = Image.fromarray( (255 * decoded_imgs[0]).astype('uint8').reshape((64, 64, 3)))
    plt.imshow(img)
    plt.subplot(1, 3, 2)
    decoded_imgs = decoder.predict(generated[k * 3 + 1].reshape((-1, latentSize[0], latentSize[1], latentSize[2])))
    img = Image.fromarray( (255 * decoded_imgs[0]).astype('uint8').reshape((64, 64, 3)))
    plt.imshow(img)
    plt.subplot(1,3,3)
    decoded_imgs = decoder.predict(generated[k * 3 + 2].reshape((-1, latentSize[0], latentSize[1], latentSize[2])))
    img = Image.fromarray( (255 * decoded_imgs[0]).astype('uint8').reshape((64, 64, 3)))
    plt.imshow(img)
    plt.show()

# 6. Resultados

Se puede visualizar la calidad del autoencoder obtenido pidiendole que genere una cantidad de imagenes totalmente nuevas. 

In [None]:
beta = 0.2

# Genera 100 imagenes de perros random
generated = np.random.multivariate_normal(mm, ss, 10000)
encoded_imgs = beta * generated + (1 - beta) * encoded_imgs
decoded_imgs = decoder.predict(encoded_imgs.reshape((-1, latentSize[0], latentSize[1], latentSize[2])))
decoded_imgs.shape

In [None]:
# Se guarda en un archivo zip llamado 'images.zip'
z = zipfile.PyZipFile('images.zip', mode = 'w')
for k in range(100):
    img = Image.fromarray( (255 * decoded_imgs[k]).astype('uint8').reshape((64, 64, 3)))   
    f = str(k) + '.png'
    img.save(f, 'PNG'); z.write(f); os.remove(f)
z.close()