In [None]:
import numpy as np
import matplotlib.pyplot as plt
import cv2
import sys

from collections import defaultdict
from time import time
from os import makedirs
from os.path import join, isdir
from glob import glob
from keras.callbacks import TensorBoard

sys.path.append('scripts')
from model_helpers import *
from data_helpers import *
from train_helpers import *

%load_ext autoreload
%autoreload 2

os.environ['CUDA_VISIBLE_DEVICES']='1'

In [None]:
IMGS_PATH = '/data'
OUT_PATH = 'saved/logs'
if not isdir(OUT_PATH):
    makedirs(OUT_PATH)

# Hyperparameters

In [None]:
patch_size = (128, 128)
center_size = (32, 32)
num_train_test = num_test_test = 10
batch_size = 128
num_epochs = 500
model_width = 2

# Load data

In [None]:
files = np.random.choice(glob(join(IMGS_PATH, '*jpg')), 10000, replace=False)
train_files = files[:-(num_train_test + num_test_test)]
train_test_files, test_test_files = files[-(num_train_test + num_test_test):-num_test_test], files[-num_test_test:]
fake_files = glob('data/fake_files/*jpg')

In [None]:
imgss = defaultdict(list)
start = time()
for dataset, files in zip(['train', 'train_test', 'test_test', 'fake'], 
                          [train_files, train_test_files, test_test_files, fake_files]):
    for i, f in enumerate(files):
        imgss[dataset].append(cv2.imread(f, 0))
        if i % 1000 == 0 and i != 0:
            end = time()
            print('Processing dataset {}: {:.0f} sec - ETA: {:.0f} sec'.format(
                dataset, end-start, ((end-start) / (i + 1)) * (len(files) - i)))

In [None]:
fig, ax = plt.subplots(1, len(imgss.keys()), figsize=(20, 30))
for i, (dataset, imgs) in enumerate(imgss.items()):
    ax[i].imshow(imgs[0], 'gray')
    ax[i].set_title('{} image'.format(dataset))

In [None]:
plt.figure(figsize=(30, 20))
plt.imshow(imgss['fake'][1], 'gray')

## Square images and resize to same size

In [None]:
for dataset, imgs in imgss.items():
    res_imgs = [img[:min(img.shape), :min(img.shape)] for img in imgs] 
    min_size = np.min([img.shape[0] for img in res_imgs])

    res_imgs = [cv2.resize(img, (min_size, min_size), interpolation=cv2.INTER_CUBIC) for img in res_imgs]
    imgss[dataset] = np.expand_dims(res_imgs, axis=3)

## Create training generator and evaluation images

In [None]:
train_generator = DataGenerator(imgss['train'], patch_size, center_size, batch_size=batch_size, shuffle=True)

In [None]:
x_test = []
for patches in patchess.values():
    x_test.extend(patches)
x_test = np.array(x_test)

In [None]:
mask = create_center_mask(patch_size, center_size)
patchess = defaultdict(list)
for i, (dataset, imgs) in enumerate(imgss.items()):
    if dataset != 'train':
        for j, img in enumerate(imgs):
            patchess[dataset].append(prepare_patch(img, patch_size, mask, i + j))

In [None]:
plt.imshow(np.squeeze(patchess['fake'][3]), 'gray')

# Create model

In [None]:
model = create_anomaly_cnn(model_width=model_width)

In [None]:
model.patch_size = patch_size
model.center_size = center_size
model.batch_size = batch_size
model.num_epochs = num_epochs
model.model_width = model_width

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam', loss=reconstruction_loss(patch_size, mask=mask), metrics=['accuracy'])

In [None]:
images_callback = TensorBoardImages(OUT_PATH, patchess, vis_every=1)
checkpoint_callback = CustomModelCheckpoint(OUT_PATH, save_weights_only=False)
losses_callback = TensorBoard(log_dir=OUT_PATH, batch_size=batch_size)

In [None]:
history_dict = model.fit_generator(train_generator, 
                                   validation_data=(x_test, x_test),
                                   epochs=2, 
                                   verbose=1, 
                                   callbacks=[images_callback, checkpoint_callback, losses_callback], 
                                   workers=10, 
                                   use_multiprocessing=True, 
                                   shuffle=False, 
                                   initial_epoch=0)