In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')
import glob
import os
from shutil import copy2

import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
from sklearn.externals import joblib
from sklearn.metrics import fbeta_score

from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.callbacks import CSVLogger, ReduceLROnPlateau, ModelCheckpoint

from keras.layers.convolutional import Conv2D
from keras.layers import Dense, Flatten
from keras.layers.normalization import BatchNormalization
from keras.layers.pooling import MaxPool2D

import paths
from rainforest.data import get_class_data, get_data
from tqdm import tqdm_notebook

%matplotlib inline

In [None]:
input_shape = (64, 64)
batch_size = 32

In [None]:
val_dir = os.path.join(paths.DATA_FOLDER, 'val-jpg')
if not os.path.isdir(val_dir):
    os.mkdir(val_dir)
    val_data = get_data(train=False)
    for file in tqdm_notebook(val_data.image_name, total=len(val_data)):
        copy2(os.path.join(paths.DATA_FOLDER, 'train-jpg', file+'.jpg'), val_dir)

In [None]:
classname = 'cultivation'

tra_pos_folder = os.path.join(paths.DATA_FOLDER, 'tra_'+classname)
tra_neg_folder = os.path.join(paths.DATA_FOLDER, 'tra_negative_'+classname)
val_pos_folder = os.path.join(paths.DATA_FOLDER, 'val_'+classname)
val_neg_folder = os.path.join(paths.DATA_FOLDER, 'val_negative_'+classname)

os.mkdir(tra_pos_folder)
os.mkdir(tra_neg_folder)
os.mkdir(val_pos_folder)
os.mkdir(val_neg_folder)

In [None]:
train_data = get_class_data(train=True, label=classname)
val_data = get_class_data(train=False, label=classname)

In [None]:
train_pos = train_data[train_data[classname] == 1]
train_neg = train_data[train_data[classname] == 0].sample(len(train_pos))
val_pos = val_data[val_data[classname] == 1]
val_neg = val_data[val_data[classname] == 0].sample(len(val_pos))

for file in train_pos.image_name:
    copy2(os.path.join(paths.DATA_FOLDER, 'train-jpg', file+'.jpg'), tra_pos_folder)
    
for file in train_neg.image_name:
    copy2(os.path.join(paths.DATA_FOLDER, 'train-jpg', file+'.jpg'), tra_neg_folder)
    
for file in val_pos.image_name:
    copy2(os.path.join(paths.DATA_FOLDER, 'train-jpg', file+'.jpg'), val_pos_folder)
    
for file in val_neg.image_name:
    copy2(os.path.join(paths.DATA_FOLDER, 'train-jpg', file+'.jpg'), val_neg_folder)

In [None]:
model = Sequential([
    Conv2D(16, 3, activation='relu', kernel_initializer='he_normal', input_shape=(3,)+input_shape),
    Conv2D(16, 3, activation='relu', kernel_initializer='he_normal'),
    MaxPool2D(),
    Conv2D(32, 3, activation='relu', kernel_initializer='he_normal'),
    Conv2D(32, 3, activation='relu', kernel_initializer='he_normal'),
    MaxPool2D(),
    Conv2D(64, 3, activation='relu', kernel_initializer='he_normal'),
    Conv2D(64, 3, activation='relu', kernel_initializer='he_normal'),
    MaxPool2D(),
    Flatten(),
    Dense(2, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
train_gen = ImageDataGenerator(rescale=1./255., rotation_range=10, zoom_range=0.1, shear_range=0.1,
                               horizontal_flip=True, vertical_flip=True, width_shift_range=0.05, height_shift_range=0.05,
                               fill_mode='reflect'
                              ).flow_from_directory(paths.DATA_FOLDER, target_size=input_shape, 
                                                    classes=['tra_'+classname, 'tra_negative_'+classname],
                                                    shuffle=True, batch_size=batch_size
                                                    )

val_gen = ImageDataGenerator(rescale=1./255.).flow_from_directory(paths.DATA_FOLDER, target_size=input_shape, 
                                                    classes=['val_'+classname, 'val_negative_'+classname], batch_size=batch_size
                                                    )

In [None]:
train_steps = train_gen.n / batch_size
val_steps = val_gen.n / batch_size

In [None]:
csv_logger = CSVLogger('log.csv')
lr_plateau = ReduceLROnPlateau(monitor='val_loss', patience=3, verbose=1, factor=0.5)
checkpoint = ModelCheckpoint(filepath='E:/Models/brainforest/'+classname+'.hdf5',
                             verbose=1, save_best_only=True)
model.fit_generator(train_gen, train_steps, callbacks=[csv_logger, lr_plateau, checkpoint], epochs=50,
                    validation_data=val_gen, validation_steps=val_steps)

In [None]:
model.load_weights('E:/Models/brainforest/'+classname+'.hdf5')
val_gen = ImageDataGenerator(rescale=1./255.).flow_from_directory(paths.DATA_FOLDER, target_size=input_shape, 
                                                    classes=['val-jpg'], batch_size=batch_size, class_mode=None,
                                                    shuffle=False)
val_steps = int(np.ceil(float(val_gen.n) / float(batch_size)))
preds = model.predict_generator(val_gen, val_steps)

In [None]:
val_data = get_data(train=False)
preds = preds[:len(val_data)]

In [None]:
y_true = val_data[classname].values
y_pred =  preds[:, 1] > 0.5
print 'f2 score:', fbeta_score(y_true, y_pred, 2)

In [None]:
(preds[:, 0] > 0.5).sum()