In [2]:
%matplotlib inline

# automatically reload modules when they have changed
%load_ext autoreload
%autoreload 2

import json 
import os
import io
import collections
import math
import random

import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime

from PIL import Image

from pprint import pprint
from shutil import copyfile, move

import keras_resnet.models

import keras
from keras.optimizers import Adam
import keras.backend as K
from keras.callbacks import ModelCheckpoint, TensorBoard, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Flatten, Dense, Dropout, Input
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD

def get_session():
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
#     config.gpu_options.per_process_gpu_memory_fraction = .7
    return tf.Session(config=config)

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

keras.backend.tensorflow_backend.set_session(get_session())

In [3]:
WIDTH = 224
HEIGHT = 224
as_grey = False
batch_size = 32
test_batch_size = 32
epochs = 500

In [4]:
# annotation_path_train = 'U:\PycharmProjects\AnimalBiometrics\data\\annotations\\train_annotations.json'
# annotation_path_val = 'U:\PycharmProjects\AnimalBiometrics\data\\annotations\\val_annotations.json'

# load labels
# train_dict = {}
# train_labels=open(annotation_path_train)
# data_train = json.load(train_labels)
# for elem in data_train['annotations']:
#     train_dict[elem['image_id']] = elem['category_id']

# train_labels.close()
# training_size = len(train_dict.values())

# val_dict = {}
# val_labels=open(annotation_path_val)
# data_val = json.load(val_labels)
# for elem in data_val['annotations']:
#     val_dict[elem['image_id']] = elem['category_id']    

# val_labels.close()
# val_size = len(val_dict.values())

# for key, value in train_dict.items():
#     copyfile(os.getcwd() + '/data/train_val/{}.jpg'.format(key), os.getcwd() + '/data/train/{}/{}.jpg'.format(value, key))
    
# for key, value in val_dict.items():
#     copyfile(os.getcwd() + '/data/train_val/{}.jpg'.format(key), os.getcwd() + '/data/val/{}/{}.jpg'.format(value, key))

In [5]:
# load images with generators

# train_path = os.getcwd() + '/data/Clouded_leopard_ID/train_cleaned_cropped'
# test_path = os.getcwd() + '/data/Clouded_leopard_ID/test_cleaned_cropped'

train_path = 'D://Species//train_cropped'
test_path = 'D://Species//test_cropped'


images = []

# augmentation configuration for training
train_datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1,
        rescale=1./255,
        shear_range=0.1,
#         zoom_range=0.25,
        horizontal_flip=True,
        vertical_flip=True)

# augmentation configuration for testing
validation_datagen = ImageDataGenerator(
#         rotation_range=90,
#         width_shift_range=0.25,
#         height_shift_range=0.25,
        rescale=1./255,
#         shear_range=0.25,
#         zoom_range=0.25,
#         horizontal_flip=True,
#         vertical_flip=True
)

# train batches of augmented image data
train_generator = train_datagen.flow_from_directory(
        train_path,
        target_size=(HEIGHT, WIDTH),
        batch_size=batch_size,
        class_mode='categorical')

# test batches of augmented image data
validation_generator = validation_datagen.flow_from_directory(
        test_path,
        target_size=(HEIGHT, WIDTH),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=False)

unique, counts = np.unique(train_generator.classes, return_counts=True)

n_samples = len(train_generator.filenames)
n_classes = len(np.unique(list(train_generator.class_indices.keys())))
count_norm = n_samples / (n_classes * counts)
class_weights = dict(zip(unique, count_norm))

Found 408979 images belonging to 87 classes.
Found 102288 images belonging to 87 classes.


In [7]:
# def save_pred(pred):
#     ids = [file[4:-4] for file in test_generator.filenames]
#     df = pd.DataFrame(np.append(ids, pred).reshape(2, len(pred)).T)
#     df.columns = ['id','animal_present']
#     df.to_csv('./predictions2.csv', index=False)

In [9]:
from keras.utils import get_file
import keras_resnet.models

def download_imagenet(depth):
    resnet_filename = 'ResNet-{}-model.keras.h5'
    resnet_resource = 'https://github.com/fizyr/keras-models/releases/download/v0.0.1/{}'.format(resnet_filename)

    filename = resnet_filename.format(depth)
    resource = resnet_resource.format(depth)
    if depth == 50:
        checksum = '3e9f4e4f77bbe2c9bec13b53ee1c2319'
    elif depth == 101:
        checksum = '05dc86924389e5b401a9ea0348a3213c'
    elif depth == 152:
        checksum = '6ee11ef2b135592f8031058820bb9e71'

    return get_file(
        filename,
        resource,
        cache_subdir='models',
        md5_hash=checksum
    )

In [None]:
run_name='inital_class_weights_balanced'
date = 'Saturday_9_29_2018'

weights = download_imagenet(depth=50)
shape = (HEIGHT, WIDTH, 3)

training_size = len(train_generator.filenames)
validation_size = len(validation_generator.filenames)
n_classes = len(np.unique(list(train_generator.class_indices.keys())))

# keras_resnet model
x = keras.layers.Input(shape)
model = keras_resnet.models.ResNet50(x, include_top=True, freeze_bn=False, classes=n_classes)
model.load_weights(weights, by_name=True, skip_mismatch=skip_mismatch)


# ResNet-50 from keras applications without FC layers (this was used for the currently best model)
# resnet_model = keras.applications.resnet50.ResNet50(include_top=False, weights='imagenet',
#                                                     input_shape=(WIDTH, HEIGHT, 3), classes=n_classes)

# add classification block
# x = Flatten(name='flatten')(resnet_model.output)
# # x = Dense(1024, activation='relu', name='fc1')(x)
# # x = Dense(512, activation='relu', name='fc2')(x)
# x = Dense(n_classes, activation='softmax', name='predictions')(x)
# model = Model(input=resnet_model.input, output=x)


# load exisiting model to continue training
# model = keras.models.load_model(os.getcwd() + '/models/cloud_June 18, 2018.hdf5')


# set up tensorboard for progress and analysis 
# writing_grads might not work with some tf versions
tensorboard = TensorBoard(log_dir='D:\\Species_Graphs/{}_{}'.format(date, run_name), histogram_freq=0,
                                          batch_size=batch_size, write_graph=False, write_grads=False,
                                          write_images=True)
# save model after each epoch
root = 'D:\\Species_Snapshots/{}_{}'.format(date, run_name)
if not os.path.isdir(root):
    os.mkdir(root)
file_path = root + '/weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5'
checkpoint = ModelCheckpoint(filepath=file_path, monitor='val_acc', verbose=1, save_best_only=False)

# reduce lr on plateau of val loss
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0)

adam = Adam(lr=1e-5, beta_1=0.9, beta_2=0.999, epsilon=1e-10, decay=0.0, amsgrad=False)
model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
# model.summary()

train_steps = math.ceil(training_size / batch_size)
val_steps = math.ceil(validation_size / batch_size)

# train_steps = 1
# val_steps = 1

history = model.fit_generator(train_generator, steps_per_epoch=train_steps, epochs=epochs,
                    validation_data=validation_generator, validation_steps=val_steps, verbose=1,
                              callbacks=[tensorboard, checkpoint, reduce_lr],
                              class_weight=class_weights, 
                             )

Epoch 1/500

In [None]:
# # compute with exiting model which had not 29 classes

# # input_tensor = Input(shape=(WIDTH, HEIGHT, 3))

# # # VGG without FC layers
# # # model = keras.applications.vgg16.VGG16(include_top=False, weights="imagenet")
# # model = keras.applications.resnet50.ResNet50(include_top=False, weights='imagenet',
# #                                      input_tensor=input_tensor, classes=2)
# # x = model(input_tensor)

# training_size = len(train_generator.filenames) * 10
# validation_size = len(validation_generator.filenames) * 10

# model = keras.models.load_model(os.getcwd() + '/models/pretrained.h5')
# model.layers.pop()
# input_layer = model.input
# resnet = model.layers[1]
# x = resnet(input_layer)
# # model.add(Dense(29, activation='softmax', name='predictions'))
# # input_tensor = model.input
# # x = model.layers[1:-2]

# for layer in model.layers[2:-1]:
#     x = layer(x)

# # Classification block
# # x = Flatten(name='flatten')(x)
# x = Dense(1024, activation='relu', name='fc1')(x)
# x = Dense(1024, activation='relu', name='fc2')(x)
# x = Dense(29, activation='softmax', name='predictions')(x)
# model = Model(input=input_layer, output=x)
# print(model.layers)
    
# # set up tensorboard for progress and analysis 
# # tensorboard = TensorBoard(log_dir=os.getcwd()+"/logs/cloud/{}".format(datetime.now().strftime("%B %d, %Y")), histogram_freq=0,
# #                                           batch_size=batch_size, write_graph=True, write_grads=True,
# #                                           write_images=True)

# # checkpoint = ModelCheckpoint(filepath='/model_checkpoints/large_dense_fine/weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5', 
# #                              monitor='val_acc', verbose=1, save_best_only=True, mode='max')

# model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# model.summary()

# history = model.fit_generator(train_generator, steps_per_epoch=training_size / batch_size, epochs=epochs,
#                     validation_data=validation_generator, validation_steps=validation_size / batch_size, verbose=1,
# #                               callbacks=[tensorboard, checkpoint]
#                              )

# model.save(os.getcwd() + '/models/cloud_{}.hdf5'.format(datetime.now().strftime("%B %d, %Y")))
# visualize(history)

In [None]:
# pred = model.predict_generator(test_generator, len([name for name in os.listdir(os.getcwd() + '/data/test/all')]) // test_batch_size, verbose=1)
# save_pred(pred.argmax(axis=-1))