## 2. Feature_extraction_from_VGG16_to_InceptionV3

### References:
1. https://github.com/ypwhs/dogs_vs_cats
2. https://www.kaggle.com/yangpeiwen/keras-inception-xception-0-47

### Import pkgs

In [1]:
import h5py
import os
import time

from keras.layers import *
from keras.models import *
from keras.applications import *
from keras.optimizers import *
from keras.regularizers import *
from keras.preprocessing.image import *
from keras.applications.inception_v3 import preprocess_input

Using TensorFlow backend.


In [2]:
def get_features(MODEL, image_size, batch_size=1, lambda_func=None):
    print('{0} start.'.format(MODEL.__name__))
    start_time = time.time()
    
    width = image_size[0]
    height = image_size[1]
    input_tensor = Input((height, width, 3))
    x = input_tensor
    if lambda_func:
        print(lambda_func.__name__)
        x = Lambda(lambda_func)(x)
    base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False)
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))

    cwd = os.getcwd()
    data_train_path = os.path.join(cwd, 'input', 'data_train')
    data_test_path  = os.path.join(cwd, 'input', 'data_test')
    
    gen = ImageDataGenerator(zoom_range = 0.1,
                            height_shift_range = 0.1,
                            width_shift_range = 0.1,
                            rotation_range = 10)
    train_generator = gen.flow_from_directory(data_train_path, image_size, shuffle=False, 
                                              batch_size=batch_size)
    test_generator  = gen.flow_from_directory(data_test_path,  image_size, shuffle=False, 
                                              batch_size=batch_size)
    
    train = model.predict_generator(train_generator, verbose=1, steps=25000)
    test = model.predict_generator(test_generator, verbose=1, steps=25000) # Need to fix ot 12500
#     train = model.predict_generator(train_generator, verbose=1, steps=10)
#     val = model.predict_generator(val_generator, verbose=1, steps=10)
#     test = model.predict_generator(test_generator, verbose=1, steps=10)
    
    file_name = os.path.join(cwd, 'model', 'feature_{0}_{1}.h5'.format(MODEL.__name__, 171023))
    print(file_name)
    if os.path.exists(file_name):
        os.remove(file_name)
    with h5py.File(file_name) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("train_label", data=train_generator.classes)
        h.create_dataset("test", data=test)
        
    print(train.shape)
    print(len(train_generator.classes))
    print(test.shape)
    
    print(dir(train_generator))
    print(train_generator.num_class)
    print(train_generator.samples)
    print(train_generator.image_shape)
    print(train_generator.classes)
    
    end_time = time.time()
    print('Spend time: {0} s'.format(end_time-start_time))

In [3]:
%pdb off

Automatic pdb calling has been turned OFF


In [4]:
get_features(VGG16, (224, 224), 1)

VGG16 start.
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
D:\Dogs_vs_Cats_Redux_Kernels_Edition\model\feature_VGG16_171023.h5
(25000, 512)
25000
(25000, 512)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_flow_index', 'batch_index', 'batch_size', 'class_indices', 'class_mode', 'classes', 'color_mode', 'data_format', 'directory', 'filenames', 'image_data_generator', 'image_shape', 'index_generator', 'lock', 'n', 'next', 'num_class', 'reset', 'samples', 'save_format', 'save_prefix', 'save_to_dir', 'shuffle', 'target_size', 'total_batches_seen']
2
25000
(224, 224, 3)
[0 0 0 ..., 1 1 1]
Spend time: 7227.651050567627 s


In [5]:
get_features(VGG19, (224, 224), 1)

VGG19 start.
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
D:\Dogs_vs_Cats_Redux_Kernels_Edition\model\feature_VGG19_171023.h5
(25000, 512)
25000
(25000, 512)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_flow_index', 'batch_index', 'batch_size', 'class_indices', 'class_mode', 'classes', 'color_mode', 'data_format', 'directory', 'filenames', 'image_data_generator', 'image_shape', 'index_generator', 'lock', 'n', 'next', 'num_class', 'reset', 'samples', 'save_format', 'save_prefix', 'save_to_dir', 'shuffle', 'target_size', 'total_batches_seen']
2
25000
(224, 224, 3)
[0 0 0 ..., 1 1 1]
Spend time: 9073.476684808731 s


In [6]:
get_features(ResNet50, (224, 224), 1)

ResNet50 start.
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
D:\Dogs_vs_Cats_Redux_Kernels_Edition\model\feature_ResNet50_171023.h5
(25000, 2048)
25000
(25000, 2048)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_flow_index', 'batch_index', 'batch_size', 'class_indices', 'class_mode', 'classes', 'color_mode', 'data_format', 'directory', 'filenames', 'image_data_generator', 'image_shape', 'index_generator', 'lock', 'n', 'next', 'num_class', 'reset', 'samples', 'save_format', 'save_prefix', 'save_to_dir', 'shuffle', 'target_size', 'total_batches_seen']
2
25000
(224, 224, 3)
[0 0 0 ..., 1 1 1]
Spend time: 6894.449019193649 s


In [7]:
get_features(Xception, (299, 299), 1, xception.preprocess_input)

Xception start.
preprocess_input
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
D:\Dogs_vs_Cats_Redux_Kernels_Edition\model\feature_Xception_171023.h5
(25000, 2048)
25000
(25000, 2048)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_flow_index', 'batch_index', 'batch_size', 'class_indices', 'class_mode', 'classes', 'color_mode', 'data_format', 'directory', 'filenames', 'image_data_generator', 'image_shape', 'index_generator', 'lock', 'n', 'next', 'num_class', 'reset', 'samples', 'save_format', 'save_prefix', 'save_to_dir', 'shuffle', 'target_size', 'total_batches_seen']
2
25000
(299, 299, 3)
[0 0 0 ..., 1 1 1]
Spend time: 10703.527572870255 s


In [8]:
get_features(InceptionV3, (299, 299), 1, inception_v3.preprocess_input)

InceptionV3 start.
preprocess_input
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
D:\Dogs_vs_Cats_Redux_Kernels_Edition\model\feature_InceptionV3_171023.h5
(25000, 2048)
25000
(25000, 2048)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_flow_index', 'batch_index', 'batch_size', 'class_indices', 'class_mode', 'classes', 'color_mode', 'data_format', 'directory', 'filenames', 'image_data_generator', 'image_shape', 'index_generator', 'lock', 'n', 'next', 'num_class', 'reset', 'samples', 'save_format', 'save_prefix', 'save_to_dir', 'shuffle', 'target_size', 'total_batches_seen']
2
25000
(299, 299, 3)
[0 0 0 ..., 1 1 1]
Spend time: 8675.532561302185 s


In [9]:
print('Done !')

Done !
