In [2]:
%matplotlib inline
from os.path import basename, join
from skimage.io import imread
from skimage.transform import resize
from sklearn.preprocessing import StandardScaler
#from keras.callbacks import ReduceLROnPlateau
import numpy as np
from numpy import array
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
#from keras.layers.convolutional import Conv2D
#from keras.layers.pooling import MaxPooling2D
#from keras.optimizers import SGD
from keras.callbacks import LearningRateScheduler, ModelCheckpoint, History
from keras.models import load_model
from keras.layers.normalization import BatchNormalization
from keras.layers import Dropout
from os.path import basename, join
from glob import glob
from keras.callbacks import ModelCheckpoint
from keras.layers import GlobalAveragePooling2D

from keras.models import Model
from keras.applications.resnet50 import ResNet50
from keras.utils import print_summary
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import RMSprop


def read_csv(filename):
    res = {}
    with open(filename) as fhandle:
        next(fhandle)
        for line in fhandle:
            parts = line.rstrip('\n').split(',')
            coords = array([float(x) for x in parts[1:]], dtype='float64')
            res[parts[0]] = coords
    return res

def train_classifier(train_gt, train_img_dir, fast_train = True):    
    new_height = 200
    new_width = 200
    num_classes = 50
    
    jpeg_list = sorted(glob(join(train_img_dir, '*jpg')))
    num_samples = len(jpeg_list)
    
    X = np.zeros([num_samples, new_height, new_width, 3])
    y = np.zeros([num_samples, num_classes])
    yy = np.zeros([num_samples])
    
    count = 0
    for path in jpeg_list:
        image = imread(path)
        
        # resize the image
        scale_width = image.shape[1] / new_width
        scale_height = image.shape[0] / new_height
        image_resized = resize(image, (new_height, new_width))
        
        # handle grayscale images
        if len(image_resized.shape) == 2:
            tmp_image = np.zeros([new_height, new_width, 3])
            tmp_image[:, :, 0] = image_resized
            tmp_image[:, :, 1] = image_resized
            tmp_image[:, :, 2] = image_resized
            image_resized = tmp_image
        
        X[count] = image_resized
        class_ind = int(train_gt[basename(path)])
        y[count, class_ind] = 1
        yy[count] = int(class_ind)
        
        count = count + 1
    
    X_train = X
    yy_train = y
    X_test = None
    yy_test = None
    
    if fast_train is not True:
        X_train, X_test, yy_train, yy_test = train_test_split(X, yy, test_size = 0.2,
                                                            random_state = 2017, shuffle = True,
                                                            stratify = yy)
        
    y_train = np.zeros([len(yy_train), num_classes])
    y_test = np.zeros([len(yy_test), num_classes])
    
    for i in range(len(yy_train)):
        class_ind = int(yy_train[i]) 
        y_train[i, class_ind] = 1
        
    for i in range(len(yy_test)):
        class_ind = int(yy_test[i])
        y_test[i, class_ind] = 1
    
    base_model = ResNet50(include_top = False, weights='imagenet', input_shape = [new_height, new_width, 3])
    #print_summary(base_model)
    
    x = base_model.output
    x = Flatten()(x)
    
    #x = Dropout(0.2)(x)
    #x = Dense(2 * num_classes, activation = 'relu')(x)
    
    x = Dropout(0.3)(x)
    predictions = Dense(num_classes, activation = 'softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    
    num_layers = len(base_model.layers)
    ind_layer = 1
    for layer in base_model.layers:
        if ind_layer < (num_layers - 7): # do not froze the last five layers
            layer.trainable = False
        ind_layer = ind_layer + 1
    
    #print_summary(model)
    #return

    # compile the model (should be done *after* setting layers to non-trainable)
    rmps_prop = RMSprop(lr=0.00001)
    
    model.compile(optimizer = rmps_prop, loss = 'categorical_crossentropy', metrics=['accuracy'])
    checkpointer = ModelCheckpoint(filepath = 'birds_model.hdf5', verbose = 1, save_best_only = True)

    epochs = 4000
    batch_size = 50

    if fast_train is True:
        epochs = 1
    
    datagen = ImageDataGenerator(
        featurewise_center = False,
        featurewise_std_normalization = False,
        rotation_range = 20,
        width_shift_range = 0.2,
        height_shift_range = 0.2,
        horizontal_flip = True)
    
    #model.fit_generator(datagen.flow(x_train, y_train, batch_size=32),
    #                steps_per_epoch=len(x_train) / 32, epochs=epochs)

    #model.fit_generator(datagen.flow(X_train, y_train, batch_size = batch_size),
    #                    validation_data = (X_test, y_test),
    #                    steps_per_epoch = len(X_train) / batch_size,
    #                    epochs=epochs,
    #                    callbacks = [checkpointer])

    # train the model on the new data
    #print_summary(model)
    model.fit(X_train, y_train,
              batch_size=batch_size,
              validation_data = (X_test, y_test),
              epochs=epochs,
              callbacks = [checkpointer]
             )

    #model.save("birds_model.hdf5")
    
    pass


def classify(model, test_img_dir):
    new_height = 200
    new_width = 200
    num_classes = 50
    
    jpeg_list = sorted(glob(join(test_img_dir, '*jpg')))
    num_samples = len(jpeg_list)
    
    X = np.zeros([num_samples, new_height, new_width, 3])
    y = np.zeros([num_samples, num_classes])
    
    count = 0
    test_gt = {}
    test_filenames = []
    for path in jpeg_list:
        image = imread(path)
        test_gt[basename(path)] = 0
        
        # resize the image
        scale_width = image.shape[1] / new_width
        scale_height = image.shape[0] / new_height
        image_resized = resize(image, (new_height, new_width))
        
        # handle grayscale images
        if len(image_resized.shape) == 2:
            tmp_image = np.zeros([new_height, new_width, 3])
            tmp_image[:, :, 0] = image_resized
            tmp_image[:, :, 1] = image_resized
            tmp_image[:, :, 2] = image_resized
            image_resized = tmp_image
        
        X[count] = image_resized
        test_filenames.append(basename(path))
        count = count + 1
    
    yy = model.predict(X)
    y = np.where(yy == 1)[1]
    
    for picture_ind in range(len(test_filenames)):
        test_filename = test_filenames[picture_ind]
        test_gt[test_filename] = y[picture_ind]

    return test_gt


# call to train a model
train_dir = 'data/00_input/train'
train_img_dir = 'data/00_input/train/images'
train_gt = read_csv(join(train_dir, 'gt.csv'))
train_classifier(train_gt, train_img_dir, False)

# call to test trained model
#test_img_dir = 'data/00_input/test/images'
#model = load_model('facepoints_model.hdf5')
#detected_points = detect(model, test_img_dir)



  warn("The default mode, 'constant', will be changed to 'reflect' in "


Train on 2000 samples, validate on 500 samples
Epoch 1/4000
Epoch 2/4000
Epoch 3/4000
Epoch 4/4000
Epoch 5/4000
Epoch 6/4000
Epoch 7/4000
Epoch 8/4000
Epoch 9/4000
Epoch 10/4000
Epoch 11/4000
Epoch 12/4000
Epoch 13/4000
Epoch 14/4000
Epoch 15/4000
Epoch 16/4000
Epoch 17/4000
Epoch 18/4000
Epoch 19/4000
Epoch 20/4000
Epoch 21/4000
Epoch 22/4000
Epoch 23/4000
Epoch 24/4000
Epoch 25/4000
Epoch 26/4000
Epoch 27/4000
Epoch 28/4000
Epoch 29/4000
Epoch 30/4000


Epoch 31/4000
Epoch 32/4000
Epoch 33/4000
Epoch 34/4000
Epoch 35/4000
Epoch 36/4000
Epoch 37/4000
Epoch 38/4000
Epoch 39/4000
Epoch 40/4000
Epoch 41/4000
Epoch 42/4000
Epoch 43/4000
Epoch 44/4000
Epoch 45/4000
Epoch 46/4000
Epoch 47/4000
Epoch 48/4000
Epoch 49/4000
Epoch 50/4000
Epoch 51/4000
Epoch 52/4000
Epoch 53/4000
Epoch 54/4000
Epoch 55/4000
Epoch 56/4000
Epoch 57/4000


Epoch 58/4000
Epoch 59/4000
Epoch 60/4000
Epoch 61/4000
Epoch 62/4000
Epoch 63/4000
Epoch 64/4000
Epoch 65/4000
Epoch 66/4000
Epoch 67/4000
Epoch 68/4000
Epoch 69/4000
Epoch 70/4000
Epoch 71/4000
Epoch 72/4000
Epoch 73/4000
Epoch 74/4000
Epoch 75/4000
Epoch 76/4000
Epoch 77/4000
Epoch 78/4000
Epoch 79/4000
Epoch 80/4000
Epoch 81/4000
Epoch 82/4000
Epoch 83/4000


Epoch 84/4000
Epoch 85/4000
Epoch 86/4000
Epoch 87/4000
Epoch 88/4000
Epoch 89/4000
Epoch 90/4000
Epoch 91/4000
Epoch 92/4000
Epoch 93/4000
Epoch 94/4000
Epoch 95/4000
Epoch 96/4000
Epoch 97/4000
Epoch 98/4000
Epoch 99/4000
Epoch 100/4000
Epoch 101/4000
Epoch 102/4000
Epoch 103/4000
Epoch 104/4000
Epoch 105/4000
Epoch 106/4000
Epoch 107/4000
Epoch 108/4000
Epoch 109/4000
Epoch 110/4000


Epoch 111/4000
Epoch 112/4000
Epoch 113/4000
Epoch 114/4000
Epoch 115/4000
Epoch 116/4000
Epoch 117/4000
Epoch 118/4000
Epoch 119/4000
Epoch 120/4000
Epoch 121/4000
Epoch 122/4000
Epoch 123/4000
Epoch 124/4000
Epoch 125/4000
Epoch 126/4000
Epoch 127/4000
Epoch 128/4000
Epoch 129/4000
Epoch 130/4000
Epoch 131/4000
Epoch 132/4000
Epoch 133/4000
Epoch 134/4000
Epoch 135/4000
Epoch 136/4000
Epoch 137/4000


Epoch 138/4000
Epoch 139/4000
Epoch 140/4000
Epoch 141/4000
Epoch 142/4000
Epoch 143/4000
Epoch 144/4000

KeyboardInterrupt: 