# To check https://stackoverflow.com/questions/42081257/keras-binary-crossentropy-vs-categorical-crossentropy-performance

In [1]:
import json
import numpy as np

import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline

from keras.optimizers import SGD, RMSprop, Adagrad

from keras.applications.vgg16 import VGG16

import balloonml
import cv2

import importlib
importlib.reload(balloonml)

Using TensorFlow backend.


<module 'balloonml' from '/home/wouter/notebooks/disdat-ml/keras/balloonml.py'>

## Defining training parameters

In [7]:
config ={}


config['train_data_dir'] = '../data8020'
config['validation_data_dir'] = '../validation8020'

config['bottleneck_train'] = 'bottleneck_features_train-120-8020.npy'
config['bottleneck_validation'] = 'bottleneck_features_validation-120-8020.npy'

# True if bottleneck features are not available yet, 
# False if they are and can be lodaded
calculate_bottleneck = True

config['epochs'] = 60
config['epochs-refine'] = 20

config['batch_size'] = 32

lrs = [0.001]
decays = [0]

## Saving or loading bottleneck features

In [None]:
if calculate_bottleneck:
    model = VGG16(include_top=False, weights='imagenet')
    train_data = balloonml.bottleneck(model,config['train_data_dir'],config['bottleneck_train'], config['batch_size'])
    validation_data = balloonml.bottleneck(model,config['validation_data_dir'],config['bottleneck_validation'], config['batch_size'])
else:
    train_data = np.load(config['bottleneck_train'])
    validation_data = np.load(config['bottleneck_validation'])

Found 132819 images belonging to 120 classes.
Found 33145 images belonging to 120 classes.


In [None]:
for lr, decay in zip(lrs, decays):
    config['optimizer']='sgd'
    config['lr']=lr
    config['decay']=decay * (lr)/(100000/config['batch_size'])
    config['output']='disdat-120-v2-sigmoid-8020'+'-lr'+str(lr)+'-decay'+str(decay)

    with open('configs/'+config['output']+'.config', 'w') as fp:
        json.dump(config, fp)
        
    if config['optimizer']=='sgd':
        optimizer=SGD(lr=config['lr'], decay=config['decay'])
    elif config['optimizer']=='rmsprop':
        optimizer=RMSprop(lr=config['lr'], decay=config['decay'])
    elif config['optimizer']=='adagrad':
        optimizer=Adagrad(lr=lr, epsilon=1e-08, decay=0.0)
    history, model = balloonml.train_top(
        train_data, 
        validation_data,
        train_data_dir=config['train_data_dir'], 
        validation_data_dir=config['validation_data_dir'],
        optimizer=optimizer,
        batch_size = config['batch_size'],
        epochs=config['epochs'],
        output=config['output'])

Found 132819 images belonging to 120 classes.
Found 33145 images belonging to 120 classes.
Train on 132819 samples, validate on 33145 samples
Epoch 1/60
Epoch 2/60

## Running model

In [None]:
config['output']='disdat-120-v1-sigmoid-lr0.001-decay0'

## Refining model

In [None]:
refine_optimizer=SGD(lr=2e-4, momentum=0.9)

refined_history, refined_model = balloonml.finetune(
    train_data_dir=config['train_data_dir'],
    validation_data_dir=config['validation_data_dir'], 
    optimizer=refine_optimizer, 
    weights_top_layer=config['output']+'.h5', 
    batch_size=config['batch_size'], 
    epochs=config['epochs-refine'],
    output=config['output']+'-refined')
balloonml.plotResult(refined_history)

In [None]:
probabilities = model.predict(image)
print(classes)
print(probabilities)