In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

import glob
import os

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.metrics import fbeta_score

from keras.models import Sequential
from keras.layers import *
from keras.callbacks import CSVLogger, ReduceLROnPlateau, ModelCheckpoint
from keras import backend as K
import paths
from rainforest.data import get_data

In [None]:
train_data = get_data(train=True)
val_data = get_data(train=False)

In [None]:
batch_size=32
input_size=(64, 64)

In [None]:
def data_generator(data_df, batch_size=32, target_size=(256, 256), shuffle=True):
    n = len(data_df)
    while True:
        # Maybe shuffle
        data = data_df.sample(frac=1) if shuffle else data_df
        data = data.append(data, ignore_index=True)
        i = 0
        while i < n:
            X_batch = np.zeros((batch_size, 3) + target_size, dtype=np.float32)
            y_batch = np.zeros((batch_size, 17), dtype=np.uint8)
            
            for j in range(batch_size):
                img = data.iloc[i]
                img_path = os.path.join(paths.DATA_FOLDER, 'train-jpg', img.image_name+'.jpg')
                norm_image = cv2.imread(img_path, cv2.IMREAD_COLOR).astype(np.float32) / 255
                norm_image = cv2.resize(norm_image, tuple(reversed(target_size)), cv2.INTER_LINEAR)
                X_batch[j] = norm_image.transpose(2, 0, 1)
                y_batch[j] = img[1:].values
                i += 1
            
            yield X_batch, y_batch

In [None]:
def fb_score(beta=1, smooth=1.):
    
    def score(y_true, y_pred, smooth=1):
        y_pred = y_pred > 0.5
        recall = K.sum(y_true * y_pred, axis=1) / K.sum(y_true, axis=1)
        precision = K.sum(y_true * y_pred, axis=1) / K.sum(y_pred, axis=1)
        return K.mean( ((1+beta**2) * (precision*recall)+smooth) / (beta**2*precision+recall+smooth) )
    
    return score

In [None]:
model = Sequential([
    Conv2D(32, 3, activation='relu', kernel_initializer='he_normal', input_shape=(3,)+input_size),
    BatchNormalization(axis=1),
    Conv2D(32, 3, activation='relu', kernel_initializer='he_normal'),
    BatchNormalization(axis=1),
    MaxPool2D(),
    
    Conv2D(64, 3, activation='relu', kernel_initializer='he_normal'),
    BatchNormalization(axis=1),
    Conv2D(64, 3, activation='relu', kernel_initializer='he_normal'),
    BatchNormalization(axis=1),
    MaxPool2D(),
    
    Conv2D(128, 3, activation='relu', kernel_initializer='he_normal'),
    BatchNormalization(axis=1),
    Conv2D(128, 3, activation='relu', kernel_initializer='he_normal'),
    BatchNormalization(axis=1),
    MaxPool2D(),
    
    Flatten(),
    Dense(1024, activation='relu', kernel_initializer='he_normal'),
    BatchNormalization(axis=1),
    Dense(17, activation='sigmoid')
])

model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', fb_score(beta=2)])

In [None]:
train_gen = data_generator(train_data, batch_size=batch_size, target_size=input_size, shuffle=True)
val_gen = data_generator(val_data, batch_size=batch_size, target_size=input_size, shuffle=False)

In [None]:
csv_logger = CSVLogger('log.csv')
lr_plateau = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=0.5)
checkpoint = ModelCheckpoint(filepath='E:/Models/brainforest/multilabel.hdf5', verbose=1, save_best_only=True)

In [None]:
train_steps = len(train_data) // batch_size
val_steps = len(val_data) // batch_size
model.fit_generator(train_gen, train_steps, epochs=50, callbacks=[csv_logger, lr_plateau, checkpoint],
                    validation_data=val_gen, validation_steps=val_steps)

In [None]:
model.load_weights('E:/Models/brainforest/multilabel.hdf5')

def strip_labels(gen):
    while True:
        imgs, _ = next(gen)
        yield imgs

val_steps = int(np.ceil(len(val_data) // batch_size)) + 1
val_gen = strip_labels(data_generator(val_data, batch_size=batch_size, target_size=input_size, shuffle=False))
preds = model.predict_generator(val_gen, val_steps)
preds = preds[:len(val_data)]

In [None]:
y_true = val_data.iloc[:, 1:].values
y_pred =  preds > 0.5
print 'f2 score:', fbeta_score(y_true, y_pred, 2, average='samples')

In [None]:
preds.shape