# Spacial variational autoencoder for anomaly detection

In [None]:
from __future__ import absolute_import, division, print_function
import math
import numpy as np
import tensorflow as tf
import argparse
import os
import sys
import cv2
from random import seed
from random import randint
from sklearn.model_selection import train_test_split
import scipy.misc
from scipy.misc import imsave
from progressbar import ETA, Bar, Percentage, ProgressBar
from vae import VAE
import time
from imutils import paths
from keras.utils. generic_utils import Progbar
from ops import *
flags = tf.flags
logging = tf.logging

## Dataset

In [None]:
def normalize_data(img):
    return (img.astype(np.float32) - 127.5) / 127.5

# load tomato images
def load_real_samples():
    # folder where data is placed
    BASE_FOLDER = '/floyd/input/tomato_dataset/training'
    
    trainAug = ImageDataGenerator(
            rotation_range=20,
            zoom_range=0.05,
            width_shift_range=0.1,
            height_shift_range=0.1,
            shear_range=0.05,
            horizontal_flip=True,
            vertical_flip=True,
            fill_mode="nearest",
            preprocessing_function=normalize_data)
    
    trainGen = trainAug.flow_from_directory(
            BASE_FOLDER,
            class_mode="input",
            target_size=(48, 48),
            color_mode="rgb",
            shuffle=True,
            batch_size=64)
    
    return trainGen

In [None]:
def get_coordinates(filename):
  filename_without_format = filename[:-4]
  filename_list = filename_without_format.split('_')
  coordinates_list = filename_list[-1].split('x')
  for item in range(len(coordinates_list)):
    coordinates_list[item] = int(coordinates_list[item])
  return coordinates_list

def load_test_data():
    # folder where data is placed
    BASE_FOLDER = '/floyd/input/tomato_dataset/testing/2019-09-19_06_47_32/'
    img_list = glob.glob(BASE_FOLDER + '*.png')
    images = list()
    coordinates = list()
    
    for img_path in img_list:
        img = cv2.imread(img_path)
        img = cv2.resize(img, (48, 48), interpolation = cv2.INTER_NEAREST)
        img = (img.astype(np.float32) - 127.5) / 127.5
        images.append(img)
        position = get_coordinates(img_path)
        coordinates.append(position)
        
    print('Found ' + str(len(images)) + ' images for test.')
    return (np.asarray(images), coordinates)

In [None]:
# select real samples
def generate_real_samples(dataset, n_samples):
    # get batch
    X, _ = dataset.next()
    # choose random instances
    ix = randint(0, X.shape[0], n_samples)
    # select images
    X = X[ix]
    # generate class labels
    y = ones((n_samples, 1))
    return X, y

In [None]:
# select real samples
def generate_test_samples(X, n_samples):
    # choose random instances
    ix = randint(0, X.shape[0], n_samples)
    # select images
    X = X[ix]
    # generate class labels
    y = ones((n_samples, 1))
    return X, y

In [None]:
# load image data
dataset = load_real_samples()
test_dataset, test_coordinates = load_test_data()

## Model

In [None]:
# Model parameters
batch_size = 32
updates_per_epoch = 1600
max_epoch = 2000
max_test_epoch = 100
learning_rate = 1e-4
working_directory = "/floyd/home/models/svae"
hidden_size = 2
channel = 96
checkpoint = 1450
model_name = "low_rank"

In [None]:
model = VAE(hidden_size, batch_size, learning_rate, channel, model_name)

## Train

In [None]:
for epoch in range(max_epoch): 
    training_loss = 0.0
    print('epoch', epoch)
    progress_bar = Progbar(target=updates_per_epoch)
    t_start= time.clock()
    for i in range(updates_per_epoch):
        images, _ = generate_real_samples(dataset, batch_size)
        loss_value, kl_loss, rec_loss = model.update_params(images, epoch*updates_per_epoch + i)
        training_loss += loss_value
        progress_bar.update(i, values=[('loss_value', loss_value), ('kl_loss', kl_loss), ('rec_loss', rec_loss)])
    t_end = time.clock()
    print ("training per epoch time ====== %f" %(t_end-t_start))
    model.save(epoch)
    training_loss = training_loss/ (updates_per_epoch * batch_size)
    print ("Loss %f" % training_loss)
    print('')
    model.generate_and_save_images(batch_size, working_directory)
    dataset.on_epoch_end()

## Evaluation

In [None]:
model.reload(checkpoint)
samples= model.generate_samples()
sigmas = np.logspace(-1.0, 0.0, 10)
lls = []
for sigma in sigmas:
    print("sigma: ", sigma)
    nlls =[]
    for i in range(1, 10+1):
        X = generate_test_samples(test_dataset, batch_size)
        nll = parzen_cpu_batch(X, samples, sigma=sigma, batch_size=batch_size, num_of_samples=10000, data_size=12288)
        nlls.extend(nll)
    nlls = np.array(nlls).reshape(1000) # 1000 valid images
    print("sigma: ", sigma)
    print("ll: %d" % (np.mean(nlls)))
    lls.append(np.mean(nlls))
sigma = sigmas[np.argmax(lls)]           

nlls = []
for i in range(1,100+1): # number of test batches = 100
    X = generate_test_samples(test_dataset, batch_size)
    nll = parzen_cpu_batch(X, samples, sigma=sigma, batch_size=batch_size, num_of_samples=10000, data_size=12288)
    nlls.extend(nll)
nlls = np.array(nlls).reshape(10000) # 10000 test images
print("sigma: ", sigma)
print("ll: %d" % (np.mean(nlls)))
print("se: %d" % (nlls.std() / np.sqrt(10000)))