In [1]:
import os
import tensorflow as tf
import keras.backend.tensorflow_backend as KTF

def get_session(gpu_fraction=0.3):
    '''Assume that you have 6GB of GPU memory and want to allocate ~2GB'''

    num_threads = os.environ.get('OMP_NUM_THREADS')
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)

    if num_threads:
        return tf.Session(config=tf.ConfigProto(
            gpu_options=gpu_options, intra_op_parallelism_threads=num_threads))
    else:
        return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

get_session()

Using TensorFlow backend.


<tensorflow.python.client.session.Session at 0x7f314d9f4cc0>

In [1]:
import argparse
import collections
import datetime
import enum
import glob
import logging
import numpy as np
import os
import random
import sys
import tempfile
import tensorflow as tf

import keras.backend.tensorflow_backend as KTF
from keras.applications.imagenet_utils import preprocess_input
from keras.applications.resnet50 import ResNet50
from keras.callbacks import ModelCheckpoint
from keras.models import Sequential                              
from keras.layers import Dense, Dropout, Flatten, Input, Reshape            
from keras.preprocessing import image

import pelops.const as const
from pelops.datasets.dgcars import DGCarsDataset
from pelops.utils import SetType, setup_custom_logger


Using TensorFlow backend.


In [2]:
batch_size = 194
img_height = 224
img_width = 224
img_dimension = 3

train_dir_path = "./datasets/train/" 
val_dir_path = "./datasets/test/"
train_features_path = None
val_features_path = None
dataset_type = "DGCarsDataset"
conv_model_type = "ResNet50"
conv_model_name = "ResNet50"

# 1. Extract Features

In [4]:
np.zeros((1,3 ))

array([[ 0.,  0.,  0.]])

In [5]:
"""
    while True:
        xs = []
        ys = []
        for filename1, filename2 in zip(image_list[0::2], image_list[1::2]):
            print("filename: {}".format(filename))
            x1 = load_image(filename1)
            x2 = load_image(filename2)
            x = np.concatenate((x1, x2), axis=0)
            xs.append(x)
            # if batch_size is greater than one, append more x into xs
            print("x.shape: {}".format(x.shape))
            y1 = np.zeros((1, num_classes))
            y1[0][train_image_class_mapping[filename1]] = 1
            y2 = np.zeros((1, num_classes))
            y2[0][train_image_class_mapping[filename2]] = 1
            y = np.concatenate((y1, y2), axis=0)
            ys.append(y)
            # if batch_size is greater than one, append more y into ys
            print("y.shape: {}".format(y.shape))
        yield (np.array(xs).squeeze(), np.array(ys))
"""

'\n    while True:\n        xs = []\n        ys = []\n        for filename1, filename2 in zip(image_list[0::2], image_list[1::2]):\n            print("filename: {}".format(filename))\n            x1 = load_image(filename1)\n            x2 = load_image(filename2)\n            x = np.concatenate((x1, x2), axis=0)\n            xs.append(x)\n            # if batch_size is greater than one, append more x into xs\n            print("x.shape: {}".format(x.shape))\n            y1 = np.zeros((1, num_classes))\n            y1[0][train_image_class_mapping[filename1]] = 1\n            y2 = np.zeros((1, num_classes))\n            y2[0][train_image_class_mapping[filename2]] = 1\n            y = np.concatenate((y1, y2), axis=0)\n            ys.append(y)\n            # if batch_size is greater than one, append more y into ys\n            print("y.shape: {}".format(y.shape))\n        yield (np.array(xs).squeeze(), np.array(ys))\n'

In [65]:
# load data 

def load_image(img_path):
    img = image.load_img(img_path, target_size=(img_height, img_width))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    return x
    
def image_class_generator(train_image_classes_mapping, num_classes, batch_size):
    image_list = list(train_image_class_mapping.keys())
    print("len(image_list): {}".format(len(image_list)))
    
    while True:
        xs = []
        ys = []
        for filename in image_list:
            #print("filename: {}".format(filename))
            x = load_image(filename)
            xs.append(x)
            # if batch_size is greater than one, append more x into xs
            #print("x.shape: {}".format(x.shape))
            y = np.zeros(num_classes)
            y[train_image_class_mapping[filename]] = 1
            ys.append(y)
            # if batch_size is greater than one, append more y into ys
            #print("y.shape: {}".format(y.shape))
        yield (np.array(xs).squeeze(), np.array(ys))
    
train_image_classes = set()
train_image_class_mapping = {}

for image_class_filepath in glob.glob(os.path.join(train_dir_path, '*')):
    if os.path.isdir(image_class_filepath):
        #print("image_class_filepath: {}".format(image_class_filepath))
        image_class_num = int(os.path.basename(image_class_filepath)) - 1
        #print("image_class_num: {}".format(image_class_num))
        train_image_classes.add(image_class_num)
        for filename in glob.glob(os.path.join(image_class_filepath, '*')):
            #print("train_image_class_mapping[{}] = {}".format(filename, image_class_num))
            train_image_class_mapping[filename] = image_class_num

nb_classes = len(train_image_classes)
print("len(train_image_classes): {}".format(nb_classes))

train_generator = image_class_generator(train_image_class_mapping, len(train_image_classes), batch_size)

count = 0
for i in train_generator:
    x, y = i
    count = count + 1
    print("count: {}, x.shape: {}, y.shape: {}".format(count, x.shape, y.shape))
    break

    
# x.shape == (194, 224, 224, 3)
# y.shape == (194, 3)

len(train_image_classes): 3
len(image_list): 194
count: 1, x.shape: (194, 224, 224, 3), y.shape: (194, 3)


In [91]:
val_image_classes = set()
val_image_class_mapping = {}

for image_class_filepath in glob.glob(os.path.join(val_dir_path, '*')):
    if os.path.isdir(image_class_filepath):
        #print("image_class_filepath: {}".format(image_class_filepath))
        image_class_num = int(os.path.basename(image_class_filepath)) - 1
        #print("image_class_num: {}".format(image_class_num))
        val_image_classes.add(image_class_num)
        for filename in glob.glob(os.path.join(image_class_filepath, '*')):
            #print("train_image_class_mapping[{}] = {}".format(filename, image_class_num))
            val_image_class_mapping[filename] = image_class_num

print("len(train_image_classes): {}".format(len(val_image_classes)))

val_generator = image_class_generator(val_image_class_mapping, len(val_image_classes), batch_size)

len(train_image_classes): 3


In [93]:
model = ResNet50(
            include_top=False,
            weights="imagenet",
            input_tensor=Input(
                shape=(
                    img_height, 
                    img_width, 
                    img_dimension
                )
            )
        )

In [94]:
for layer in model.layers:
    layer.trainable = False

In [96]:
def __extract_features(generator, model, batch_size, set_type):
    feature_dirpath = "./features/"
    print("create a feature directory to store saved features: {}".format(feature_dirpath))
    if not os.path.exists(feature_dirpath):
        os.makedirs(feature_dirpath)

    print("extract features from convolutional model based on data")
    print("generator: {}_generator".format(set_type))
    print("batch_size: {}".format(batch_size))
    features = model.predict_generator(
        generator,
        batch_size
    )

    time_now = datetime.datetime.now().strftime("%Y%m%d_%H_%M_%S")
    features_filepath = feature_dirpath + "TESTING_{}_{}_{}_features_{}.npy".format(
        dataset_type,
        conv_model_type,
        set_type,
        time_now
    )
    print("save features to {}".format(features_filepath))
    np.save(open(features_filepath, "wb"), features)

    return features, features_filepath

In [13]:
train_features, train_features_path = __extract_features(train_generator, model, batch_size, "train")

create a feature directory to store saved features: ./features/
extract features from convolutional model based on data
generator: train_generator
batch_size: 194
save features to ./features/TESTING_DGCarsDataset_ResNet50_train_features_20170214_20_43_52.npy


In [79]:
#save features to ./features/TESTING_DGCarsDataset_ResNet50_train_features_20170214_20_12_10.npy
#save features to ./features/TESTING_DGCarsDataset_ResNet50_train_features_20170214_20_39_50.npy
#save features to ./features/TESTING_DGCarsDataset_ResNet50_train_features_20170214_20_43_52.npy

In [3]:
train_feature_path = "./features/TESTING_DGCarsDataset_ResNet50_train_features_20170214_20_39_50.npy"
train_features = np.load(open(train_feature_path, "rb"))

# 2. Train classifier based on features

In [4]:
len(train_features)

194

In [5]:
train_features.shape

(194, 1, 1, 2048)

In [100]:
count = 0
for i in train_generator:
    x, y = i
    count = count + 1
    print("count: {}, x.shape: {}, y.shape: {}".format(count, x.shape, y.shape))
    break

print(y.shape)
labels = y
print(labels.shape)

count: 1, x.shape: (194, 224, 224, 3), y.shape: (194, 3)
(194, 3)
(194, 3)


In [43]:
"""
def __create_generator_from_features(features, labels):
    for feature, label in zip(features, labels):
        yield (feature, label)

train_features_generator = __create_generator_from_features(train_features, labels)
"""

In [44]:
"""
count = 0
for i in train_features_generator:
    x, y = i
    count = count + 1
    print("count: {}, x.shape: {}, y.shape: {}".format(count, x.shape, y.shape))
"""

count: 1, x.shape: (1, 1, 2048), y.shape: (3,)
count: 2, x.shape: (1, 1, 2048), y.shape: (3,)
count: 3, x.shape: (1, 1, 2048), y.shape: (3,)
count: 4, x.shape: (1, 1, 2048), y.shape: (3,)
count: 5, x.shape: (1, 1, 2048), y.shape: (3,)
count: 6, x.shape: (1, 1, 2048), y.shape: (3,)
count: 7, x.shape: (1, 1, 2048), y.shape: (3,)
count: 8, x.shape: (1, 1, 2048), y.shape: (3,)
count: 9, x.shape: (1, 1, 2048), y.shape: (3,)
count: 10, x.shape: (1, 1, 2048), y.shape: (3,)
count: 11, x.shape: (1, 1, 2048), y.shape: (3,)
count: 12, x.shape: (1, 1, 2048), y.shape: (3,)
count: 13, x.shape: (1, 1, 2048), y.shape: (3,)
count: 14, x.shape: (1, 1, 2048), y.shape: (3,)
count: 15, x.shape: (1, 1, 2048), y.shape: (3,)
count: 16, x.shape: (1, 1, 2048), y.shape: (3,)
count: 17, x.shape: (1, 1, 2048), y.shape: (3,)
count: 18, x.shape: (1, 1, 2048), y.shape: (3,)
count: 19, x.shape: (1, 1, 2048), y.shape: (3,)
count: 20, x.shape: (1, 1, 2048), y.shape: (3,)
count: 21, x.shape: (1, 1, 2048), y.shape: (3,)
c

In [45]:
checkpoint_dirpath = "./checkpoints/"
print("create a checkpoint directory to store saved checkpoints: {}".format(checkpoint_dirpath))
if not os.path.exists(checkpoint_dirpath):
    os.makedirs(checkpoint_dirpath)

checkpoint_filepath = \
    checkpoint_dirpath + \
    "{}_{}_features_".format(dataset_type, "classifier") + \
    "{epoch:02d}_{val_acc:.2f}.npy"

checkpoint = ModelCheckpoint(
    checkpoint_filepath, 
    monitor="val_acc", 
    save_best_only=True, 
    mode="max"
)
callbacks_list = [checkpoint]

create a checkpoint directory to store saved checkpoints: ./checkpoints/


In [46]:
nb_features = model.output_shape[-1] # same as train_features.shape[-1]
nb_hidden_layers = int(round(np.mean([nb_features, nb_classes])))
print("{} -> [hidden layer {}] -> {}\n".format(nb_features, nb_hidden_layers, nb_classes))


top_model = Sequential()
top_model.add(Dense(nb_hidden_layers, activation="relu", input_shape=train_features.shape[1:]))
top_model.add(Flatten())
top_model.add(Dense(nb_classes, activation="softmax")) 

"""
top_model = Sequential()
top_model = Dense(nb_hidden_layers, activation="relu", input_shape=train_features.shape[1:])(top_model)
top_model = Flatten()(top_model)
top_model = Dense(nb_classes, activation="softmax")(top_model)
"""

2048 -> [hidden layer 1026] -> 3



In [47]:
top_model.compile(
    loss="categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)


In [68]:
print("train_features.shape: {}".format(train_features.shape))
print("labels.shape: {}".format(y.shape))

top_model.fit(
    x=train_features,
    y=labels
)

train_features.shape: (194, 1, 1, 2048)
labels.shape: (194, 3)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f7124dac080>

In [102]:
val_features, val_features_path = __extract_features(val_generator, model, batch_size, "train")

print(val_features.shape)
print(labels.shape)

score = top_model.evaluate(
    x=val_features,
    y=labels,
    batch_size=batch_size,
)

print("{}: {}".format(
    top_model.metrics_names[1],
    score[1]
))

create a feature directory to store saved features: ./features/
extract features from convolutional model based on data
generator: train_generator
batch_size: 194
save features to ./features/TESTING_DGCarsDataset_ResNet50_train_features_20170214_23_14_12.npy
(194, 1, 1, 2048)
(194, 3)
acc: 0.3865979313850403


# 3. Train entire model with data

In [77]:
from keras.models import Model                              

model = ResNet50(
            include_top=False,
            weights="imagenet",
            input_tensor=Input(
                shape=(
                    img_height, 
                    img_width, 
                    img_dimension
                )
            )
        )

for layer in model.layers:
    layer.trainable = True


In [80]:
combined_model = Model(input=model.input, output=top_model(model.output))

In [82]:
combined_model.compile(
    loss="categorical_crossentropy",
    optimizer="adam",
    metrics=["accuracy"]
)

In [104]:
count = 0
for i in train_generator:
    x, y = i
    count = count + 1
    print("count: {}, x.shape: {}, y.shape: {}".format(count, x.shape, y.shape))
    break

print("x.shape: {}, y.shape: {}".format(x.shape, y.shape))


count: 1, x.shape: (194, 224, 224, 3), y.shape: (194, 3)
x.shape: (194, 224, 224, 3), y.shape: (194, 3)


In [90]:
combined_model.fit(
    x=x,
    y=y
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f6ee4a03400>

In [105]:
score = combined_model.evaluate(
    x=x,
    y=y,
    batch_size=batch_size,
)

print("{}: {}".format(
    combined_model.metrics_names[1],
    score[1]
))

acc: 0.3865979313850403
