In [305]:
from tensorflow.keras.layers import Input, Dense, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.models import Model
from tensorflow.keras import backend as K
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from math import ceil

In [300]:
import glob
import pickle
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt

def read_n_images(data, start, end, loc):
    images_list = list(zip(*data[mode][start:end]))[0]
    labels_list = list(zip(*data[mode][start:end]))[1]
    labels_list = np.array(labels_list).nonzero()[-1] # Convert dummy encoding to categorical (one number per category)
    images = [cv2.imread("{}/{}.jpg".format(loc, image)) for image in images_list]
    return np.array(images), labels_list

def generate_img_from_folder(data_dir, mode, batch_size, autoencoder=True):
    """must be jpg"""
    loc = "{}/{}".format(data_dir,mode)    
    while True:
        with open('{}/labels.pickle'.format(data_dir), 'rb') as f:
            data = pickle.load(f)

        modes = list(data.keys())
        del modes[-1]
        
        assert mode in modes, "'{}' not a valid mode (must be one of {})".format(mode, str(modes))
        assert glob.glob(loc), "Check directory."
        assert glob.glob("{}/*.jpg".format(loc)), "Check file extension (should be 'jpg')."
        
        
        for idx in range(0,len(data[mode]), batch_size):
            start = idx
            end = idx+batch_size
            
            images, labels = read_n_images(data, start, end, loc)
            if autoencoder:
                yield (images, images) # would be label if not autoencoder
            else:
                yield (images, labels) # For classification (can't classify two images at once)
                
                
def get_input_shape(data_dir, mode):
    """must be jpg"""
    loc = "{}/{}".format(data_dir,mode)
    with open('{}/labels.pickle'.format(data_dir), 'rb') as f:
        data = pickle.load(f)
    idx = 0 # Arbitrarily chosen
    img = cv2.imread("{}/{}.jpg".format(loc, data[mode][idx][0]))
    return img.shape

def get_num_examples(data_dir, mode):
    with open('{}/labels.pickle'.format(data_dir), 'rb') as f:
        data = pickle.load(f)
    return len(data[mode])

def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
    plt.xlabel('Epoch')
    plt.ylabel('MSE')
    plt.plot(hist['epoch'], hist['mean_squared_error'],
           label='Train Error')
    plt.plot(hist['epoch'], hist['val_mean_squared_error'],
           label = 'Val Error')
    #plt.ylim([20,40])
    plt.grid()
    plt.legend()
    plt.show()

In [301]:
DATA_DIR = '../data'
MODE = 'training'
in_shape = get_input_shape(DATA_DIR, MODE)



input_img = Input(shape=in_shape)  # adapt this if using `channels_first` image data format
#input_img = Input(shape=(28, 28, 1))  # adapt this if using `channels_first` image data format

x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = MaxPooling2D((2, 2), padding='same')(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = MaxPooling2D((2, 2), padding='same')(x)

# at this point the representation is (4, 4, 8) i.e. 128-dimensional

x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded)
x = UpSampling2D((2, 2))(x)
x = Conv2D(8, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
x = Conv2D(16, (3, 3), activation='relu', padding='same')(x)
x = UpSampling2D((2, 2))(x)
decoded = Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

In [302]:
autoencoder = Model(input_img, decoded)
autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])


In [303]:
autoencoder.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_25 (InputLayer)        (None, 200, 200, 3)       0         
_________________________________________________________________
conv2d_140 (Conv2D)          (None, 200, 200, 16)      448       
_________________________________________________________________
max_pooling2d_60 (MaxPooling (None, 100, 100, 16)      0         
_________________________________________________________________
conv2d_141 (Conv2D)          (None, 100, 100, 8)       1160      
_________________________________________________________________
max_pooling2d_61 (MaxPooling (None, 50, 50, 8)         0         
_________________________________________________________________
conv2d_142 (Conv2D)          (None, 50, 50, 8)         584       
_________________________________________________________________
max_pooling2d_62 (MaxPooling (None, 25, 25, 8)         0         
__________

In [306]:
NUM_SAMPLES = get_num_examples(DATA_DIR, MODE)
BATCH_SIZE = 10
STEPS_PER_EPOCH = ceil(NUM_SAMPLES/BATCH_SIZE)

early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
history = autoencoder.fit_generator(generate_img_from_folder(DATA_DIR, MODE, BATCH_SIZE), shuffle=True, steps_per_epoch=STEPS_PER_EPOCH, epochs=1)




In [309]:
pd.DataFrame(history.history)

Unnamed: 0,loss,acc
0,-17724.606836,0.002878
