## Imports

In [None]:
from keras.models import Model, Sequential, load_model
from keras.layers import Dense, Flatten, BatchNormalization
from keras.optimizers import Adam, SGD
from keras.callbacks import *
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16, preprocess_input

import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import glob
import os
import sys
sys.path.insert(0, '..')
import paths
sys.path.insert(0, '..\scripts')
import data
import extract_class

## Labels

In [None]:
all_labels = ['agriculture', 'artisinal_mine', 'bare_ground', 'blooming', 'blow_down', 'clear', 'cloudy', 'conventional_mine', 'cultivation', 'habitation', 
              'haze', 'partly_cloudy', 'primary', 'road', 'selective_logging', 'slash_burn', 'water']
personal_labels = ['agriculture', 'bare_ground', 'habitation']

current_label = personal_labels[0]
negative_current_label = 'negative_' + current_label

## Paths and directories

In [None]:
data_folder = paths.DATA_FOLDER

train_folder = data_folder + '/' + current_label
validation_folder = data_folder + '/' + 'validation_' + current_label

# Create seperate data folders:
# extract_class.extract_class(current_label, 'jpg')
# extract_class.extract_negative(current_label, n_train)

## Data

In [None]:
train_data = data.get_class_data(label=current_label)
validation_data = data.get_class_data(train=False, label=current_label)
validation_data = validation_data[validation_data[current_label] == 1]



all_train_files = glob.glob(data_folder + '/' + current_label + '/*')

for file in all_train_files:
    if os.path.splitext(os.path.basename(file))[0] in validation_data['image_name'].values: 
        os.rename(file, file.replace(current_label, 'validation_' + current_label))
        

nb_train = len(glob.glob(train_folder + '*/*.*'))
nb_val = len(glob.glob(validation_folder + '*/*.*'))

image_size = 256

## Preprocessing

In [None]:
def preprocess_generator(gen):
    for X, y in gen:
        yield preprocess_input(X)/255., y

data_generator = ImageDataGenerator(
                     rotation_range=25,
                     width_shift_range=0.1,
                     height_shift_range=0.1,
                     zoom_range=0.2,
                     horizontal_flip=True,
                     vertical_flip=True)

## Network structure

In [None]:
model = VGG16(include_top=False, weights='imagenet', input_shape=(3, image_size, image_size))

for layer in model.layers:
    layer.trainable = False
    
layer = Flatten()(model.output)
layer = BatchNormalization()(layer)
layer = Dense(512, activation='relu')(layer)
layer = BatchNormalization()(layer)

output_layer = Dense(2, activation='softmax')(layer)

## Data generation and training

In [None]:
model = Model(input=model.input, output=output_layer)
model.compile(loss='categorical_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

csv_logger = CSVLogger('../log.csv')
lr_plateau = ReduceLROnPlateau(monitor='val_loss', patience=0, verbose=1, factor=0.1)
checkpoint = ModelCheckpoint(filepath='../models/model.' + current_label + '.{epoch:02d}-{val_loss}.hdf5', verbose=1, save_best_only=True)

train_generator = data_generator.flow_from_directory(data_folder, target_size=(image_size, image_size), 
                                               batch_size=8, shuffle=True, classes=[current_label, negative_current_label])
val_generator = ImageDataGenerator().flow_from_directory(data_folder, target_size=(image_size, image_size), 
                                                   batch_size=8, shuffle=False, classes=['validation_' + current_label, negative_current_label[:nb_val]])

train_generator = preprocess_generator(train_generator)
val_generator = preprocess_generator(val_generator)

model.fit_generator(train_generator, samples_per_epoch=nb_train, nb_epoch=20, verbose=1,
                    callbacks=[csv_logger, lr_plateau, checkpoint],
                    validation_data=val_generator, nb_val_samples=nb_val)