## 2. Feature_extraction_from_VGG16_to_InceptionResNetV2

### References:
1. https://github.com/ypwhs/dogs_vs_cats
2. https://www.kaggle.com/yangpeiwen/keras-inception-xception-0-47

### Import pkgs

In [1]:
import h5py
import os
import time

from keras.layers import *
from keras.models import *
from keras.applications import *
from keras.optimizers import *
from keras.regularizers import *
from keras.preprocessing.image import *

Using TensorFlow backend.


In [2]:
def get_features(MODEL, image_size, batch_size=1, lambda_func=None):
    print('{0} start.'.format(MODEL.__name__))
    start_time = time.time()
    
    width = image_size[0]
    height = image_size[1]
    input_tensor = Input((height, width, 3))
    x = input_tensor
    if lambda_func:
        print(lambda_func.__name__)
        x = Lambda(lambda_func)(x)
    base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False)
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))

    cwd = os.getcwd()
    data_train_path = os.path.join(cwd, 'input', 'data_train')
    data_val_path = os.path.join(cwd, 'input', 'data_validation')
#     data_test_a_path  = os.path.join(cwd, 'input', 'data_test_a')
    data_test_b_path  = os.path.join(cwd, 'input', 'data_test_b')
    
    gen = ImageDataGenerator()
#     gen = ImageDataGenerator(zoom_range = 0.1,
#                             height_shift_range = 0.1,
#                             width_shift_range = 0.1,
#                             rotation_range = 10)
    train_generator = gen.flow_from_directory(data_train_path, image_size, shuffle=False, 
                                              batch_size=batch_size)
    val_generator  = gen.flow_from_directory(data_val_path,  image_size, shuffle=False, 
                                              batch_size=batch_size)
#     test_a_generator  = gen.flow_from_directory(data_test_a_path,  image_size, shuffle=False, 
#                                               batch_size=batch_size)
    test_b_generator  = gen.flow_from_directory(data_test_b_path,  image_size, shuffle=False, 
                                              batch_size=batch_size)
    
#     train = model.predict_generator(train_generator, verbose=1, steps=53879)
#     val = model.predict_generator(val_generator, verbose=1, steps=7120)
#     test = model.predict_generator(test_generator, verbose=1, steps=7040)
    train = model.predict_generator(train_generator, verbose=1, steps=10, max_queue_size=128, workers=16)
    val = model.predict_generator(val_generator, verbose=1, steps=10, max_queue_size=128, workers=16)
#     test_a = model.predict_generator(test_a_generator, verbose=1, steps=10, max_queue_size=128, workers=16)
    test_b = model.predict_generator(test_b_generator, verbose=1, steps=10, max_queue_size=128, workers=16)
    
#     print('filenames:' + str(len(val_generator.filenames)))
#     print(val_generator.filenames[0:10])
#     print('filenames:' + str(len(test_generator.filenames)))
#     print(test_generator.filenames[0:10])
    
    file_name = os.path.join(cwd, 'model', 'feature_{0}_{1}.h5'.format(MODEL.__name__, 171202))
    print(file_name)
    if os.path.exists(file_name):
        os.remove(file_name)
    with h5py.File(file_name) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("train_label", data=train_generator.classes)
        h.create_dataset("val", data=val)
        h.create_dataset("val_label", data=val_generator.classes)
#         h.create_dataset("test_a", data=test_a)
        h.create_dataset("test_b", data=test_b)
        
#     print(train.shape)
#     print(len(train_generator.classes))
#     print(val.shape)
#     print(len(val_generator.classes))
#     print(test.shape)
    
#     print(dir(train_generator))
#     print(train_generator.samples)
#     print(train_generator.image_shape)
#     print(train_generator.classes)
    
    end_time = time.time()
    print('Spend time: {0} s'.format(end_time-start_time))

In [3]:
%pdb off

Automatic pdb calling has been turned OFF


In [4]:
get_features(VGG16, (224, 224), 1)

VGG16 start.
Found 53879 images belonging to 80 classes.
Found 7120 images belonging to 80 classes.
Found 7078 images belonging to 1 classes.
E:\AIChallenger\SceneClassification2017\model\feature_VGG16_171202.h5
Spend time: 517.0663092136383 s


In [5]:
# get_features(VGG19, (224, 224), 1)

In [6]:
# get_features(ResNet50, (224, 224), 1)

In [7]:
# get_features(Xception, (299, 299), 1, xception.preprocess_input)

In [8]:
get_features(InceptionV3, (299, 299), 1, inception_v3.preprocess_input)

InceptionV3 start.
preprocess_input
Found 53879 images belonging to 80 classes.
Found 7120 images belonging to 80 classes.
Found 7078 images belonging to 1 classes.
E:\AIChallenger\SceneClassification2017\model\feature_InceptionV3_171202.h5
Spend time: 2724.4563817977905 s


In [9]:
get_features(InceptionResNetV2, (299, 299), 1, inception_resnet_v2.preprocess_input)

InceptionResNetV2 start.
preprocess_input
Found 53879 images belonging to 80 classes.
Found 7120 images belonging to 80 classes.
Found 7078 images belonging to 1 classes.
E:\AIChallenger\SceneClassification2017\model\feature_InceptionResNetV2_171202.h5
Spend time: 5746.545301437378 s


In [10]:
print('Done !')

Done !
