In [None]:
import os
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF

def get_session(gpu_fraction=0.3):
    '''Assume that you have 6GB of GPU memory and want to allocate ~2GB'''

    num_threads = os.environ.get('OMP_NUM_THREADS')
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)

    if num_threads:
        return tf.Session(config=tf.ConfigProto(
            gpu_options=gpu_options, intra_op_parallelism_threads=num_threads))
    else:
        return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

get_session()

In [67]:
import argparse
import collections
import datetime
import enum
import glob
import logging
import numpy as np
import os
import random
import sys
import tempfile
import tensorflow as tf

from keras.applications.imagenet_utils import preprocess_input
from keras.applications.resnet50 import ResNet50
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential                              
from keras.layers import Dense, Dropout, Flatten, Input, Reshape            
from keras.preprocessing import image

import pelops.const as const
from pelops.datasets.dgcars import DGCarsDataset
from pelops.utils import SetType, setup_custom_logger

In [68]:
train_dir_path = "./datasets/train/" 
val_dir_path = "./datasets/test/"
train_features_path = None
val_features_path = None
dataset_type = "DGCarsDataset"
conv_model_type = "ResNet50"
conv_model_name = "ResNet50"

nb_epoch = 10
dropout_rate = 0.5
batch_size = 32
seed = 11
img_height = 224
img_width = 224
img_dimension = 3
index_accuracy = 1

np.random.seed(seed)
random.seed(seed)

# 1. Extract features

In [69]:
train_datagen = image.ImageDataGenerator()
val_datagen = image.ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(
    directory=train_dir_path,
    target_size=(img_height, img_width),
    seed=seed, 
    follow_links=True
)
val_generator = val_datagen.flow_from_directory(
    directory=val_dir_path,
    target_size=(img_height, img_width),
    seed=seed,
    follow_links=True
)

print("number of classes: {}".format(train_generator.nb_class))

print("number of images for training: {}".format(train_generator.nb_sample))
for i in train_generator:
    x, y = i
    print("shape of train x: {}, y: {}".format(x.shape, y.shape))
    break

# assumption: 151 images, therefore generator will output 32 * 151 images? 
print("number of images for validation: {}".format(val_generator.nb_sample))

Found 194 images belonging to 3 classes.
Found 151 images belonging to 3 classes.
number of classes: 3
number of images for training: 194
shape of train x: (32, 224, 224, 3), y: (32, 3)
number of images for validation: 151


In [70]:
model = ResNet50(
            include_top=False,
            weights="imagenet",
            input_tensor=Input(
                shape=(
                    img_height, 
                    img_width, 
                    img_dimension
                )
            )
        )

In [71]:
for layer in model.layers:
    layer.trainable = False

In [72]:
def __extract_features(generator, model, batch_size, set_type):
    feature_dirpath = "./features/"
    print("create a feature directory to store saved features: {}".format(feature_dirpath))
    if not os.path.exists(feature_dirpath):
        os.makedirs(feature_dirpath)

    print("extract features from convolutional model based on data")
    print("generator: {}_generator".format(set_type))
    print("batch_size: {}".format(batch_size))
    features = model.predict_generator(
        generator,
        batch_size
    )

    time_now = datetime.datetime.now().strftime("%Y%m%d_%H_%M_%S")
    features_filepath = feature_dirpath + "REALDEAL_{}_{}_{}_features_{}.npy".format(
        dataset_type,
        conv_model_type,
        set_type,
        time_now
    )
    print("save features to {}".format(features_filepath))
    np.save(open(features_filepath, "wb"), features)

    return features, features_filepath

In [73]:
train_features, train_features_path = __extract_features(train_generator, model, batch_size, "train")

create a feature directory to store saved features: ./features/
extract features from convolutional model based on data
generator: train_generator
batch_size: 32
save features to ./features/REALDEAL_DGCarsDataset_ResNet50_train_features_20170216_19_59_28.npy


In [11]:
#save features to ./features/REALDEAL_DGCarsDataset_ResNet50_train_features_20170214_23_34_31.npy

In [74]:
train_feature_path = "./features/REALDEAL_DGCarsDataset_ResNet50_train_features_20170214_23_34_31.npy"
train_features = np.load(open(train_feature_path, "rb"))

# 2. Train classifier based on features

In [115]:
checkpoint_dirpath = "./checkpoints/"
print("create a checkpoint directory to store saved checkpoints: {}".format(checkpoint_dirpath))
if not os.path.exists(checkpoint_dirpath):
    os.makedirs(checkpoint_dirpath)

checkpoint_filepath = \
    checkpoint_dirpath + \
    "{}_{}_features_".format(dataset_type, "classifier") + \
    "{epoch:02d}_{val_acc:.8f}.npy"

checkpoint = ModelCheckpoint(
    checkpoint_filepath, 
    monitor="val_acc", 
    save_best_only=True, 
    mode="max"
)
callbacks_list = [checkpoint]

create a checkpoint directory to store saved checkpoints: ./checkpoints/


In [76]:
nb_classes = train_generator.nb_class
nb_features = model.output_shape[-1] # same as train_features.shape[-1]
nb_hidden_layers = int(round(np.mean([nb_features, nb_classes])))
print("{} -> [hidden layer {}] -> {}\n".format(nb_features, nb_hidden_layers, nb_classes))

top_model = Sequential()
top_model.add(Dense(nb_hidden_layers, activation="relu", input_shape=train_features.shape[1:]))
top_model.add(Flatten())
top_model.add(Dense(nb_classes, activation="softmax")) 

2048 -> [hidden layer 1026] -> 3



In [77]:
top_model.compile(
    loss="categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

In [78]:
count = 0
for i in train_generator:
    x, y = i
    count = count + 1
    print("count: {}, x.shape: {}, y.shape: {}".format(count, x.shape, y.shape))
    print("y: {}".format(y))
    break

count: 1, x.shape: (32, 224, 224, 3), y.shape: (32, 3)
y: [[ 1.  0.  0.]
 [ 0.  0.  1.]
 [ 0.  0.  1.]
 [ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]
 [ 0.  1.  0.]
 [ 1.  0.  0.]
 [ 0.  0.  1.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 1.  0.  0.]
 [ 0.  0.  1.]
 [ 1.  0.  0.]
 [ 1.  0.  0.]
 [ 0.  0.  1.]
 [ 0.  0.  1.]
 [ 1.  0.  0.]
 [ 0.  0.  1.]
 [ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]
 [ 0.  0.  1.]
 [ 0.  0.  1.]
 [ 0.  0.  1.]
 [ 0.  1.  0.]
 [ 0.  1.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]
 [ 1.  0.  0.]
 [ 0.  0.  1.]]


In [79]:
count = 0
for x in train_features:
    count = count + 1
    print("count: {}, x.shape: {}".format(count, x.shape))

count: 1, x.shape: (1, 1, 2048)
count: 2, x.shape: (1, 1, 2048)
count: 3, x.shape: (1, 1, 2048)
count: 4, x.shape: (1, 1, 2048)
count: 5, x.shape: (1, 1, 2048)
count: 6, x.shape: (1, 1, 2048)
count: 7, x.shape: (1, 1, 2048)
count: 8, x.shape: (1, 1, 2048)
count: 9, x.shape: (1, 1, 2048)
count: 10, x.shape: (1, 1, 2048)
count: 11, x.shape: (1, 1, 2048)
count: 12, x.shape: (1, 1, 2048)
count: 13, x.shape: (1, 1, 2048)
count: 14, x.shape: (1, 1, 2048)
count: 15, x.shape: (1, 1, 2048)
count: 16, x.shape: (1, 1, 2048)
count: 17, x.shape: (1, 1, 2048)
count: 18, x.shape: (1, 1, 2048)
count: 19, x.shape: (1, 1, 2048)
count: 20, x.shape: (1, 1, 2048)
count: 21, x.shape: (1, 1, 2048)
count: 22, x.shape: (1, 1, 2048)
count: 23, x.shape: (1, 1, 2048)
count: 24, x.shape: (1, 1, 2048)
count: 25, x.shape: (1, 1, 2048)
count: 26, x.shape: (1, 1, 2048)
count: 27, x.shape: (1, 1, 2048)
count: 28, x.shape: (1, 1, 2048)
count: 29, x.shape: (1, 1, 2048)
count: 30, x.shape: (1, 1, 2048)
count: 31, x.shape:

In [None]:
# x.shape: (1, 224, 224, 3), y.shape: (1, 3)
# batch_size = 1

In [59]:
class_dictionary = train_generator.class_indices

for key, value in class_dictionary.items():
    print(key, value)

3 2
2 1
1 0


In [57]:
count = 0
for i in train_generator.classes:
    count = count + 1
    print("count: {}, i: {}".format(count, i))

count: 1, i: 0
count: 2, i: 0
count: 3, i: 0
count: 4, i: 0
count: 5, i: 0
count: 6, i: 0
count: 7, i: 0
count: 8, i: 0
count: 9, i: 0
count: 10, i: 0
count: 11, i: 0
count: 12, i: 0
count: 13, i: 0
count: 14, i: 0
count: 15, i: 0
count: 16, i: 0
count: 17, i: 0
count: 18, i: 0
count: 19, i: 0
count: 20, i: 0
count: 21, i: 0
count: 22, i: 0
count: 23, i: 0
count: 24, i: 0
count: 25, i: 0
count: 26, i: 0
count: 27, i: 0
count: 28, i: 0
count: 29, i: 0
count: 30, i: 0
count: 31, i: 0
count: 32, i: 0
count: 33, i: 0
count: 34, i: 0
count: 35, i: 0
count: 36, i: 0
count: 37, i: 0
count: 38, i: 0
count: 39, i: 0
count: 40, i: 0
count: 41, i: 0
count: 42, i: 0
count: 43, i: 0
count: 44, i: 0
count: 45, i: 0
count: 46, i: 0
count: 47, i: 0
count: 48, i: 0
count: 49, i: 0
count: 50, i: 0
count: 51, i: 0
count: 52, i: 0
count: 53, i: 0
count: 54, i: 0
count: 55, i: 0
count: 56, i: 0
count: 57, i: 0
count: 58, i: 0
count: 59, i: 0
count: 60, i: 0
count: 61, i: 0
count: 62, i: 0
count: 63, i: 0
c

In [66]:
import threading
class threadsafe_iter:
    """Takes an iterator/generator and makes it thread-safe by
    serializing call to the `next` method of given iterator/generator.
    """
    def __init__(self, it):
        self.it = it
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    def next(self):
        with self.lock:
            return self.it.next()


def threadsafe_generator(f):
    """A decorator that takes a generator function and makes it thread-safe.
    """
    def g(*a, **kw):
        return threadsafe_iter(f(*a, **kw))
    return g

@threadsafe_generator
def __create_generator_from_features(feature, generator):  # write the definition of your data generator
    for feature, class_index in zip(features, generator.classes):
        label = np.zeros(generator.nb_class)
        label[class_index] = 1
        yield (feature, label)
    
    """
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    y_train = np_utils.to_categorical(y_train,10)
    X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
    X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
    X_train = X_train.astype('float32')
    X_test = X_test.astype('float32')
    X_train /= 255
    X_test /= 255
    while 1:
        for i in range(1875):
            yield X_train[i*32:(i+1)*32], y_train[i*32:(i+1)*32]
        # print("Came here")
    """

#@threadsafe_generator
from keras.datasets import mnist
from keras.utils import np_utils
def mygenerator():  # write the definition of your data generator
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    y_train = np_utils.to_categorical(y_train,10)
    print("y_train.shape: {}".format(y_train.shape))
    X_train = X_train.reshape(X_train.shape[0], 1, 28, 28)
    print("X_train.shape: {}".format(X_train.shape))
    X_test = X_test.reshape(X_test.shape[0], 1, 28, 28)
    X_train = X_train.astype('float32')
    print("X_train.shape: {}".format(X_train.shape))
    X_test = X_test.astype('float32')
    X_train /= 255
    print("X_train.shape: {}".format(X_train.shape))
    X_test /= 255
    while 1:
        for i in range(1875):
            print(i)
            yield X_train[i*32:(i+1)*32], y_train[i*32:(i+1)*32]

count = 0
for i in mygenerator():
    x, y = i
    print("count: {}, x.shape: {}, y.shape: {}".format(count, x.shape, y.shape))
    #print(">> x: {}".format(x))
    #print(">> y: {}".format(y)'break)
    break

        
#train_feature_generator = __create_generator_from_features(train_features, train_generator)

"""
count = 0
for i in train_feature_generator:
    x, y = i
    count = count + 1
    print("count: {}, x.shape: {}, y.shape: {}".format(count, x.shape, y.shape))
    print("type of y: {}".format(type(y)))
"""


y_train.shape: (60000, 10)
X_train.shape: (60000, 1, 28, 28)
X_train.shape: (60000, 1, 28, 28)
X_train.shape: (60000, 1, 28, 28)
0
count: 0, x.shape: (32, 1, 28, 28), y.shape: (32, 10)


'\ncount = 0\nfor i in train_feature_generator:\n    x, y = i\n    count = count + 1\n    print("count: {}, x.shape: {}, y.shape: {}".format(count, x.shape, y.shape))\n    print("type of y: {}".format(type(y)))\n'

In [87]:
labels = np.zeros((194, 3))
count = 0
for i, class_index in zip(range(0, 194), train_generator.classes):
    labels[i][class_index] = 1
    count = count + 1
    #print("count: {}, i: {}, class_index: {}, labels[{}]: {}".format(count, i, class_index, i, labels[i]))

count: 1, i: 0, class_index: 0, labels[0]: [ 1.  0.  0.]
count: 2, i: 1, class_index: 0, labels[1]: [ 1.  0.  0.]
count: 3, i: 2, class_index: 0, labels[2]: [ 1.  0.  0.]
count: 4, i: 3, class_index: 0, labels[3]: [ 1.  0.  0.]
count: 5, i: 4, class_index: 0, labels[4]: [ 1.  0.  0.]
count: 6, i: 5, class_index: 0, labels[5]: [ 1.  0.  0.]
count: 7, i: 6, class_index: 0, labels[6]: [ 1.  0.  0.]
count: 8, i: 7, class_index: 0, labels[7]: [ 1.  0.  0.]
count: 9, i: 8, class_index: 0, labels[8]: [ 1.  0.  0.]
count: 10, i: 9, class_index: 0, labels[9]: [ 1.  0.  0.]
count: 11, i: 10, class_index: 0, labels[10]: [ 1.  0.  0.]
count: 12, i: 11, class_index: 0, labels[11]: [ 1.  0.  0.]
count: 13, i: 12, class_index: 0, labels[12]: [ 1.  0.  0.]
count: 14, i: 13, class_index: 0, labels[13]: [ 1.  0.  0.]
count: 15, i: 14, class_index: 0, labels[14]: [ 1.  0.  0.]
count: 16, i: 15, class_index: 0, labels[15]: [ 1.  0.  0.]
count: 17, i: 16, class_index: 0, labels[16]: [ 1.  0.  0.]
count: 18

'        \n\ndef get_train_feature_generator():\n    # must return x in the form of (32, 1, 28, 28)\n    # must return y in the form of (32, 3)\n    while True:\n        for i in range()\n        \n        \ny = np.zeros() \ntrain_generator.classes:\n'

In [97]:
print(train_features[0])

[[[ 0.05949762  0.11966778  0.22653553 ...,  0.10000877  0.01944498
    0.15126897]]]


In [96]:
# 194 = train_generator.nb_sample
# 32 = batch_size


def get_train_feature_generator():
    # must return x in the form of (32, 1, 28, 28)
    # must return y in the form of (32, 3)
    while True:
        for i in range(int(194/32)):
            yield train_features[i*32:(i+1)*32], labels[i*32:(i+1)*32]

In [117]:
train_features_generator = get_train_feature_generator()

top_model.fit_generator(
    generator=train_features_generator,
    samples_per_epoch=32,
    nb_epoch=10, 
    #callbacks=callbacks_list,
    verbose=2
)

Epoch 1/10
0s - loss: 1.1921e-07 - acc: 1.0000
Epoch 2/10
0s - loss: 0.5037 - acc: 0.9688
Epoch 3/10
0s - loss: 16.1181 - acc: 0.0000e+00
Epoch 4/10
0s - loss: 16.1181 - acc: 0.0000e+00
Epoch 5/10
0s - loss: 16.1181 - acc: 0.0000e+00
Epoch 6/10
0s - loss: 16.1181 - acc: 0.0000e+00
Epoch 7/10
0s - loss: 1.1921e-07 - acc: 1.0000
Epoch 8/10
0s - loss: 0.5037 - acc: 0.9688
Epoch 9/10
0s - loss: 16.1181 - acc: 0.0000e+00
Epoch 10/10
0s - loss: 16.1181 - acc: 0.0000e+00


<keras.callbacks.History at 0x7f3bc82376d8>

In [116]:
top_model.fit_generator(
    generator=train_features_generator,
    samples_per_epoch=32,
    nb_epoch=10, 
    callbacks=callbacks_list,
    verbose=2
)

Epoch 1/10


KeyError: 'val_acc'

In [118]:
val_features, val_features_path = __extract_features(val_generator, model, batch_size, "validation")

val_labels = np.zeros((32, 3))

for i, class_index in zip(range(0, 32), val_generator.classes):
    val_labels[i][class_index] = 1

print("val_features: {}, val_labels: {}".format(val_features.shape, val_labels.shape))

score = top_model.evaluate(
    x=val_features,
    y=val_labels,
    batch_size=batch_size,
)

print("{}: {}".format(
    top_model.metrics_names[1],
    score[1]
))

create a feature directory to store saved features: ./features/
extract features from convolutional model based on data
generator: validation_generator
batch_size: 32
save features to ./features/REALDEAL_DGCarsDataset_ResNet50_validation_features_20170216_23_09_44.npy
val_features: (32, 1, 1, 2048), val_labels: (32, 3)
acc: 1.0


In [103]:
print(val_features.shape)

(32, 1, 1, 2048)


In [107]:
val_labels

NameError: name 'val_labels' is not defined

# 3. Train entire model with data

In [119]:
from keras.models import Model                              

model = ResNet50(
            include_top=False,
            weights="imagenet",
            input_tensor=Input(
                shape=(
                    img_height, 
                    img_width, 
                    img_dimension
                )
            )
        )

for layer in model.layers:
    layer.trainable = True

In [120]:
combined_model = Model(input=model.input, output=top_model(model.output))

In [121]:
combined_model.compile(
    loss="categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

In [123]:
combined_model.fit_generator(
    generator=train_generator,
    samples_per_epoch=32,
    nb_epoch=10, 
    #callbacks=callbacks_list,
    verbose=2
)

Epoch 1/10




10s - loss: 9.4812 - acc: 0.4118
Epoch 2/10
1s - loss: 9.0664 - acc: 0.4375
Epoch 3/10
0s - loss: 12.5923 - acc: 0.2188
Epoch 4/10
0s - loss: 12.0886 - acc: 0.2500
Epoch 5/10
0s - loss: 9.5701 - acc: 0.4062
Epoch 6/10
0s - loss: 13.5996 - acc: 0.1562
Epoch 7/10
0s - loss: 11.3775 - acc: 0.2941
Epoch 8/10
0s - loss: 10.5775 - acc: 0.3438
Epoch 9/10
0s - loss: 11.5849 - acc: 0.2812
Epoch 10/10
0s - loss: 9.5701 - acc: 0.4062


<keras.callbacks.History at 0x7f39cb10aeb8>

In [125]:
score = combined_model.evaluate_generator(
    generator=val_generator,
    val_samples=val_generator.nb_sample
)

print("{}: {}".format(
    combined_model.metrics_names[1],
    score[1]
))

acc: 0.3112582793298936
