In [None]:
%load_ext autoreload
%autoreload 2
import sys
paths = ['../models', '../dataset', '../util']
for path in paths:
    if path not in sys.path:
        sys.path.append(path)

import json
from keras.models import Sequential, load_model
from keras.layers import Convolution2D, Dense, Flatten, Dropout, MaxPooling2D, BatchNormalization
from keras.optimizers import Adam, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import CSVLogger, ModelCheckpoint, ReduceLROnPlateau
from keras.utils import np_utils
from collections import defaultdict, Counter
import cv2

import pandas as pd
import matplotlib.pyplot as plt
import glob
import numpy as np
import os
from tqdm import tqdm_notebook as tqdm
from sklearn.externals import joblib

from scipy.misc import imread

import dataset
from data_generators import get_data
from bounding_boxes import get_bounding_boxes, largest_bbox_per_image

%matplotlib inline

In [None]:
DATA_FOLDER = 'E:/Data'
bboxes = joblib.load(os.path.join(DATA_FOLDER, 'learned_boxes/all_boxes.pkl'))
train_data = dataset.train
val_data = dataset.validation
labels = dataset.labels

In [None]:
train_data = train_data[train_data.filename.isin(bboxes.keys())]
val_data = val_data[val_data.filename.isin(bboxes.keys())]
test_files = sorted([os.path.basename(p) for p in glob.glob(os.path.join(DATA_FOLDER, 'test_stg1/*.jpg'))])
test_data = pd.DataFrame({'filename':test_files, 'label': ['ALB']*len(test_files)})

print len(train_data)
print len(val_data)

In [None]:
model = Sequential()

# model.add(BatchNormalization(input_shape=[3, 256, 256], axis=1))

model.add(Convolution2D(32, 3, 3, input_shape=[3, 256, 256], activation='relu'))
# model.add(BatchNormalization(axis=1))
model.add(Convolution2D(32, 3, 3, activation='relu'))
# model.add(BatchNormalization(axis=1))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(64, 3, 3, activation='relu'))
# model.add(BatchNormalization(axis=1))
model.add(Convolution2D(64, 3, 3, activation='relu'))
# model.add(BatchNormalization(axis=1))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(128, 3, 3, activation='relu'))
# model.add(BatchNormalization(axis=1))
model.add(Convolution2D(128, 3, 3, activation='relu'))
# model.add(BatchNormalization(axis=1))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(len(labels), activation='softmax'))

model.compile(loss='categorical_crossentropy',
             optimizer=Adam(),
             metrics=['accuracy', 'categorical_crossentropy'])

model.summary()

In [None]:
train_gen = get_data(train_data, os.path.join(DATA_FOLDER, 'train'), labels, augmentation=True,
                     bboxes=bboxes, flip=True, shift_x=5, shift_y=5, rot_range=2, balance_batches=True,
                     elastic_trans=False, batch_size=16)
val_gen = get_data(val_data, os.path.join(DATA_FOLDER, 'train'), labels, bboxes=bboxes, balance_batches=True,
                   augmentation=False, shuffle=False, batch_size=16)


In [None]:
batch = next(train_gen)
for img in batch[0]:
    print img.shape
    plt.imshow(img.transpose(1, 2, 0) + 0.5)
    plt.show()

In [None]:
batch[1].sum(axis=0)

In [None]:
print next(train_gen)[0].min()

In [None]:
csv_logger = CSVLogger('run4_adam.csv')
lr_plateau = ReduceLROnPlateau(monitor='val_loss', patience=1, verbose=1, factor=0.5)
checkpoint = ModelCheckpoint(filepath='E:/Models/serious1_balanced/model.{epoch:02d}-{val_loss:.2f}.hdf5',
                             verbose=1, save_best_only=True)

model.fit_generator(train_gen, samples_per_epoch=len(train_data),
                    nb_epoch=500, verbose=2,
                    callbacks=[csv_logger, lr_plateau, checkpoint],
                    validation_data=val_gen, nb_val_samples=len(val_data))

In [None]:
model = load_model('E:/Models/serious1_balanced/model.12-0.87.hdf5')

In [None]:
other_bboxes = joblib.load('E:/Data/learned_boxes/small_fcn_no_cn.pkl')
val_gen = get_data(val_data, os.path.join(DATA_FOLDER, 'train'), labels, bboxes=other_bboxes,
                   augmentation=False, shuffle=False, batch_size=16)
model.evaluate_generator(val_gen, val_samples=len(val_data))
# next(val_gen)

In [None]:
from preprocess import preprocess
test_files = glob.glob('E:/Data/test_stg1/*.jpg')
preds = np.zeros((1000, 8))
for i, file in tqdm(enumerate(sorted(test_files)), total=1000):
    img = cv2.imread(file, cv2.IMREAD_COLOR)[:, :, ::-1]
    img_name = os.path.basename(file)
    if img_name not in bboxes:
        # no bbox -> no fish
        preds[i] = np.array([0, 0, 0, 0, 1, 0, 0, 0])
        continue
    boxes = bboxes[img_name]
    all_preds = []
    for x, y, width, height in boxes:
        crop = img[y:y+height, x:x+width]
        crop = preprocess(crop, scale=1./255., zero_center=True, target_size=(256, 256))
        pred = model.predict(np.expand_dims(crop, 0))[0]
        all_preds.append(pred)
    avg_pred = np.array(all_preds).mean(axis=0)
    preds[i] = avg_pred
    

In [None]:
def remove_labels(gen):
    for x, _ in gen:
        yield x
test_gen = get_data(test_data, os.path.join(DATA_FOLDER, 'test'), bboxes=bboxes,
                   augmentation=False, shuffle=False)
test_gen = remove_labels(test_gen)

In [None]:
preds = model.predict_generator(test_gen, val_samples=1024)[:1000]

In [None]:
preds = np.clip(preds, 0.05, 0.95)

In [None]:
with open('sub.csv', 'w') as sub_file:
    sub_file.write('image,ALB,BET,DOL,LAG,NoF,OTHER,SHARK,YFT\n')
    for img, pred in zip(sorted(test_files), preds):
        csv = '%s,%s\n' % (os.path.basename(img), ','.join([str(f) for f in pred]))
        sub_file.write(csv)

In [None]:
np.argmax()