# 2. FeatureExtraction_from_VGG16_InceptionResNetV2

References:
- https://github.com/ypwhs/dogs_vs_cats
- https://www.kaggle.com/yangpeiwen/keras-inception-xception-0-47

## Import PKGs

In [1]:
import h5py
import os
import time
import math
import numpy as np

from keras.layers import *
from keras.models import *
from keras.applications import *
from keras.optimizers import *
from keras.regularizers import *
from keras.preprocessing.image import *

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Import PKGs

In [2]:
project_name = 'ic_furniture2018'
step_name = 'FeatureExtraction'
time_str = time.strftime("%Y%m%d_%H%M%S", time.localtime())
run_name = project_name + '_' + step_name + '_' + time_str
print('run_name: ' + run_name)

run_name: ic_furniture2018_FeatureExtraction_20180406_100859


## 使用预训练权重的VGG16、VGG19、ResNet50、Xception、InceptionV3和InceptionResNetV2模型提取特征¶

In [3]:
def get_features(MODEL, image_size, date_str, lambda_func=None, batch_size=1, is_aug=False):
    print('{0} start.'.format(MODEL.__name__))
    start_time = time.time()
    
    cwd = os.getcwd()
    folder_path = os.path.join(cwd, 'feature')
    if not os.path.exists(folder_path):
        os.mkdir(folder_path)
        print('Created folder: %s' % folder_path)
    else:
        print('Existed folder: %s' % folder_path)
    file_name = os.path.join(folder_path, 'feature_{0}_{1}.h5'.format(MODEL.__name__, date_str))
    print(file_name)
    if os.path.exists(file_name):
        os.remove(file_name)
    
    width = image_size[0]
    height = image_size[1]
    input_tensor = Input((height, width, 3))
    x = input_tensor
    if lambda_func:
        print(lambda_func.__name__)
        x = Lambda(lambda_func)(x)
    base_model = MODEL(input_tensor=x, weights='imagenet', input_shape=(height, width, 3), include_top=False)
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))
    
    data_train_path = os.path.join(cwd, 'input', 'data_train')
    data_val_path = os.path.join(cwd, 'input', 'data_val')
    data_test_path  = os.path.join(cwd, 'input', 'data_test')
    
    if is_aug:
        print('have augumentation')
        gen = ImageDataGenerator(zoom_range = 0.2,
                                 height_shift_range = 0.2,
                                 width_shift_range = 0.2,
                                 horizontal_flip = True,
                                 vertical_flip = True,
                                 fill_mode = 'wrap',
                                 rotation_range = 20)
    else:
        print('do not have augumentation')
        gen = ImageDataGenerator()

    train_generator = gen.flow_from_directory(data_train_path, image_size, shuffle=False, 
                                              batch_size=batch_size)
    val_generator = gen.flow_from_directory(data_val_path, image_size, shuffle=False, 
                                              batch_size=batch_size)
    test_generator  = gen.flow_from_directory(data_test_path,  image_size, shuffle=False, 
                                              batch_size=batch_size)
    
    
    print('train_generator')
    print(len(train_generator.filenames))
    train_generator_steps = math.ceil(len(train_generator.filenames)/batch_size)
    print('train_generator_steps=%d' % train_generator_steps)
    train = model.predict_generator(train_generator, verbose=1, steps=train_generator_steps, max_queue_size=2, workers=4)
    
    print('val_generator')
    print(len(val_generator.filenames))
    val_generator_steps = math.ceil(len(val_generator.filenames)/batch_size)
    print('val_generator_steps=%d' % val_generator_steps)
    val = model.predict_generator(val_generator, verbose=1, steps=val_generator_steps, max_queue_size=2, workers=4)
    
    print('test_generator')
    print(len(test_generator.filenames))
    test_generator_steps = math.ceil(len(test_generator.filenames)/batch_size)
    print('test_generator_steps=%d' % test_generator_steps)
    test = model.predict_generator(test_generator, verbose=1, steps=test_generator_steps, max_queue_size=2, workers=4)

    
    with h5py.File(file_name) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("train_labels", data=train_generator.classes)
        h.create_dataset("val", data=val)
        h.create_dataset("val_labels", data=val_generator.classes)
        h.create_dataset("test", data=test)
    
    print(train.shape)
    print(train_generator.classes)
    print(val.shape)
    print(val_generator.classes)
    print(test.shape)
    
    end_time = time.time()
    print('Spend time: {0} s'.format(end_time-start_time))

In [4]:
def get_all_features(batch_size=1, is_aug=False):
    time_str = time.strftime("%Y%m%d-%H%M%S", time.localtime())
    print(time_str)

#     get_features(MobileNet, (224, 224), time_str, mobilenet.preprocess_input, batch_size, is_aug)

#     get_features(VGG16, (224, 224), time_str, vgg16.preprocess_input, batch_size, is_aug)
#     get_features(VGG19, (224, 224), time_str, vgg19.preprocess_input, batch_size, is_aug)
#     get_features(ResNet50, (224, 224), time_str, resnet50.preprocess_input, batch_size, is_aug)
    
#     get_features(DenseNet121, (224, 224), time_str, densenet.preprocess_input, batch_size, is_aug)
#     get_features(DenseNet169, (224, 224), time_str, densenet.preprocess_input, batch_size, is_aug)
#     get_features(DenseNet201, (224, 224), time_str, densenet.preprocess_input, batch_size, is_aug)
    
    get_features(Xception, (150, 150), time_str, xception.preprocess_input, batch_size, is_aug)
    get_features(InceptionV3, (150, 150), time_str, inception_v3.preprocess_input, batch_size, is_aug)
    get_features(InceptionResNetV2, (150, 150), time_str, inception_resnet_v2.preprocess_input, batch_size, is_aug)


In [5]:
# get_all_features(128, False)

In [None]:
for i in range(9):
    print('*'*80, end='  ')
    print('%s' % i)
    get_all_features(128, True)

********************************************************************************  0
20180406-100859
Xception start.
Existed folder: /data1/kaggle/imaterialist-challenge-furniture-2018/feature
/data1/kaggle/imaterialist-challenge-furniture-2018/feature/feature_Xception_20180406-100859.h5
preprocess_input
have augumentation
Found 191261 images belonging to 128 classes.
Found 6301 images belonging to 128 classes.
Found 12652 images belonging to 1 classes.
train_generator
191261
train_generator_steps=1495
 240/1495 [===>..........................] - ETA: 35:41

In [None]:
print('Done!')