In [None]:
%load_ext autoreload
%autoreload 2
import sys
paths = ['../models', '../dataset', '../util']
for path in paths:
    if path not in sys.path:
        sys.path.append(path)

import json
from keras.models import Sequential, load_model
from keras.layers import Convolution2D, Dense, Flatten, Dropout, MaxPooling2D, BatchNormalization
from keras.optimizers import Adam, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau
from keras.utils import np_utils
from collections import defaultdict, Counter
import cv2

import pandas as pd
import matplotlib.pyplot as plt
import glob
import numpy as np
import os
from tqdm import tqdm_notebook as tqdm
from sklearn.externals import joblib

from scipy.misc import imread

import dataset
from data_generators import get_data
from bounding_boxes import get_bounding_boxes, largest_bbox_per_image

%matplotlib inline

In [None]:
DATA_FOLDER = 'C:/Users/Thomas/Documents/MLIP-BigAITuna/Data'
#bboxes = get_bounding_boxes('/media/joris/Scrub/Data/bounding_boxes')
#bboxes = largest_bbox_per_image(bboxes)
bboxes = {}
files = glob.glob(DATA_FOLDER + '/bounding_boxes/*.json')
for ff in files:
    with open(ff) as f:
        data = json.load(f)
        for image in data:
            for annot in image['annotations']:
                bboxes[os.path.basename(image['filename'])] = (int(max(0, annot['x'])), int(max(0, annot['y'])), int(annot['width']), int(annot['height'])) 
train_data = dataset.train
val_data = dataset.validation
labels = dataset.labels

In [None]:
train_data = train_data[train_data.filename.isin(bboxes.keys())]
val_data = val_data[val_data.filename.isin(bboxes.keys())]
test_files = sorted([os.path.basename(p) for p in glob.glob(os.path.join(DATA_FOLDER, 'test_stg1/*.jpg'))])
test_data = pd.DataFrame({'filename':test_files, 'label': ['ALB']*len(test_files)})

print len(train_data)
print len(val_data)

In [None]:
model = Sequential()
model.add(BatchNormalization(input_shape=[3, 256, 256], axis=1))
model.add(Convolution2D(16, 3, 3, border_mode='same', input_shape=[3, 256, 256], 
                        activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(32, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(len(labels), activation='softmax'))

model.compile(loss='categorical_crossentropy',
             optimizer=Adam(),
             metrics=['accuracy', 'categorical_crossentropy'])

model.summary()

In [None]:
train_gen = get_data(train_data, os.path.join(DATA_FOLDER, 'train'), augmentation=True,
                     bboxes=bboxes, flip=True, shift_x=5, shift_y=5, rot_range=10, 
                     elastic_trans=False)
val_gen = get_data(val_data, os.path.join(DATA_FOLDER, 'train'), bboxes=bboxes,
                   augmentation=False, shuffle=False)


In [None]:
batch = next(train_gen)
print batch[1]
# for img in batch[0]:
#     print img.shape
#     plt.imshow(img.transpose(1, 2, 0) + 0.5)
#     plt.show()

In [None]:
csv_logger = CSVLogger('run4_adam.csv')
lr_plateau = ReduceLROnPlateau(monitor='val_loss', patience=2, verbose=1, factor=0.5)
checkpoint = ModelCheckpoint(filepath='../model/model.{epoch:02d}-{val_loss}.hdf5',
                             verbose=1, save_best_only=True)

model.fit_generator(train_gen, samples_per_epoch=len(train_data),
                    nb_epoch=500,
                    callbacks=[csv_logger, lr_plateau, checkpoint],
                    validation_data=val_gen, nb_val_samples=len(val_data),
                    pickle_safe=False, nb_worker=3)

In [None]:
model = load_model('/media/joris/Scrub/Models/fcn_crop/model.19-0.585225485265.hdf5')

In [None]:
def remove_labels(gen):
    for x, _ in gen:
        yield x
test_gen = get_data(test_data, os.path.join(DATA_FOLDER, 'test'), bboxes=bboxes,
                   augmentation=False, shuffle=False)
test_gen = remove_labels(test_gen)

In [None]:
preds = model.predict_generator(test_gen, val_samples=1024)[:1000]

In [None]:
preds = np.clip(preds, 0.02, 0.98)

In [None]:
with open('sub.csv', 'w') as sub_file:
    sub_file.write('image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT\n')
    for img, pred in zip(test_files, preds):
        csv = '%s,%s\n' % (img, ','.join([str(f) for f in pred]))
        sub_file.write(csv)