In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

from spectral import get_rgb, ndvi
import glob
import os

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.metrics import fbeta_score
from sklearn.preprocessing import MinMaxScaler
from skimage.io import imread

from keras.models import Sequential
from keras.layers import *
from keras.callbacks import CSVLogger, ReduceLROnPlateau, ModelCheckpoint
from keras import backend as K
import paths

from rainforest.data import get_data, labels
from rainforest.preprocess import preprocess
from rainforest.models.resnet import ResNet50
from rainforest.models.densenet import create_dense_net

In [None]:
train_data = get_data(train=True)
val_data = get_data(train=False)

In [None]:
batch_size=32
input_size=(64, 64)

In [None]:
def preprocess_image1(img):
    img_rgb = get_rgb(img, [2, 1, 0])  # R-G-B
    rescaleimg = np.reshape(img_rgb, (-1, 1))
    scaler = MinMaxScaler(feature_range=(0, 255))
    rescaleimg = scaler.fit_transform(rescaleimg)  # .astype(np.float32)
    img_scaled = (np.reshape(rescaleimg, img_rgb.shape)) / 255.
    img_scaled = img_scaled.transpose(2,0,1)
    img_nir = get_rgb(img, [3, 2, 1])  # NIR-R-G
    img_nir_red = (img_nir[:, :, 0] - img_nir[:, :, 1]) / (img_nir[:, :, 0] + img_nir[:, :, 1] + np.finfo(float).eps)  # (NIR - RED) / (NIR + RED)
    img_nir_red = np.expand_dims(np.clip(img_nir_red, -1, 1), axis=0)
    img_nir_green = (img_nir[:, :, 2] - img_nir[:, :, 0]) / (img_nir[:, :, 2] + img_nir[:, :, 0] + np.finfo(float).eps)  # (GREEN - NIR) / (GREEN + NIR)
    img_nir_green = np.expand_dims(np.clip(img_nir_green, -1, 1), axis=0)

    return np.concatenate((img_scaled, img_nir_red, img_nir_green), axis=0).transpose(1, 2, 0)

In [None]:
def data_generator(data_df, batch_size=32, target_size=(256, 256), shuffle=True, augmentation=True, subfolder='train-jpg',
                  extension='jpg'):
    n = len(data_df)
    while True:
        # Maybe shuffle
        data = data_df.sample(frac=1) if shuffle else data_df
        data = data.append(data, ignore_index=True)
        i = 0
        while i < n:
            X_batch = np.zeros((batch_size, target_size[0], target_size[1], 5) , dtype=np.float32)
            y_batch = np.zeros((batch_size, 17), dtype=np.uint8)
            
            for j in range(batch_size):
                img = data.iloc[i]
                img_path = os.path.join(paths.DATA_FOLDER, subfolder, img.image_name+'.'+extension)
                image = imread(img_path)
                image = preprocess_image1(image)
                image = preprocess(image, target_size=target_size, augmentation=augmentation,
                           hflip=True, vflip=True, shift_x=3, shift_y=3, rot_range=5)
                image = np.transpose(image, (1, 2, 0))
                X_batch[j] = image
                y_batch[j] = img[1:].values
                i += 1
            
            yield X_batch, y_batch

In [None]:
def fb_score(beta=1, smooth=1e-6, threshold=0.2):
    
    def fscore(y_true, y_pred):
        y_pred = y_pred > threshold
        recall = (K.sum(y_true * y_pred, axis=1) + smooth) / (K.sum(y_true, axis=1) + smooth)
        precision = (K.sum(y_true * y_pred, axis=1) + smooth) / (K.sum(y_pred, axis=1) + smooth)
        return K.mean( ((1+beta**2) * (precision*recall)+smooth) / (beta**2*precision+recall+smooth) )
    
    fscore.__name__ = 'F%d_score' % beta
    
    return fscore

In [None]:
def resnet_like():
    model = ResNet50(input_shape=(64, 64, 3), classes=17, classification='sigmoid', layer1_filters=32)
    return model

In [None]:
def vgg_like():
    model = Sequential([
        Conv2D(32, 3, activation='relu', kernel_initializer='he_normal', input_shape=(3,)+input_size),
        BatchNormalization(axis=1),
        Conv2D(32, 3, activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(axis=1),
        MaxPool2D(),

        Conv2D(64, 3, activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(axis=1),
        Conv2D(64, 3, activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(axis=1),
        MaxPool2D(),

        Conv2D(128, 3, activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(axis=1),
        Conv2D(128, 3, activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(axis=1),
        MaxPool2D(),

        Flatten(),
        Dense(1024, activation='relu', kernel_initializer='he_normal'),
        BatchNormalization(),
        Dense(17, activation='sigmoid')
    ])
    
    return model


In [None]:
model = create_dense_net(17, (64, 64, 5), weight_decay=0)

model.summary()
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', fb_score(beta=2)])

In [None]:
train_gen = data_generator(train_data, batch_size=batch_size, target_size=input_size, shuffle=True, augmentation=True,
                           extension='tif', subfolder='train-tif-v2')
val_gen = data_generator(val_data, batch_size=batch_size, target_size=input_size, shuffle=False, augmentation=False,
                        extension='tif', subfolder='train-tif-v2')

In [None]:
csv_logger = CSVLogger('log.csv')
lr_plateau = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=0.25)
checkpoint = ModelCheckpoint(filepath='E:/Models/brainforest/densenet_64x64_tif.hdf5', verbose=1, save_best_only=True)

In [None]:
train_steps = len(train_data) // batch_size
val_steps = len(val_data) // batch_size
model.fit_generator(train_gen, train_steps, epochs=50, callbacks=[csv_logger, lr_plateau, checkpoint],
                    validation_data=val_gen, validation_steps=val_steps)

In [None]:
model.load_weights('E:/Models/brainforest/resnet16_64x64.hdf5')

def strip_labels(gen):
    while True:
        imgs, _ = next(gen)
        yield imgs

val_steps = int(np.ceil(len(val_data) // batch_size)) + 1
val_gen = strip_labels(data_generator(val_data, batch_size=batch_size, target_size=input_size, shuffle=False))
preds = model.predict_generator(val_gen, val_steps)
preds = preds[:len(val_data)]

In [None]:
for threshold in np.arange(0.1, 0.3, 0.02):
    y_true = val_data.iloc[:, 1:].values
    y_pred =  preds > threshold
    print threshold, 'f2 score:', fbeta_score(y_true, y_pred, 2, average='samples')

In [None]:
test_files = glob.glob(os.path.join(paths.DATA_FOLDER, 'test-jpg', '*.jpg'))
test_files = [os.path.basename(os.path.splitext(f)[0]) for f in test_files]
test_data = pd.DataFrame(test_files, columns=['image_name'])
test_data['bogus_label'] = np.zeros(len(test_files))

In [None]:
test_data.head()

In [None]:
test_steps = int(np.ceil(len(test_data) // batch_size)) + 1
test_gen = strip_labels(data_generator(test_data, batch_size=batch_size, target_size=input_size, shuffle=False, subfolder='test-jpg'))
preds = model.predict_generator(test_gen, test_steps)
preds = preds[:len(test_data)]

In [None]:
tpreds = preds > 0.16

In [None]:
with open('submission2.csv', 'w') as file:
    file.write('image_name,tags\n')
    for img, pred in zip(test_files, tpreds):
        indices = np.flatnonzero(pred)
        tags = ' '.join([labels[i] for i in indices])
        file.write('%s,%s\n' % (img, tags))

In [None]:
from sklearn.externals import joblib
joblib.dump(preds, 'multilabel2405_128x128.pkl')

In [None]:
preds = (joblib.load('multilabel2405_128x128.pkl') + joblib.load('multilabel2405.pkl')) / 2

In [None]:
tpreds = preds > 0.16

In [None]:
with open('submission3.csv', 'w') as file:
    file.write('image_name,tags\n')
    for img, pred in zip(test_files, tpreds):
        indices = np.flatnonzero(pred)
        tags = ' '.join([labels[i] for i in indices])
        file.write('%s,%s\n' % (img, tags))