## 2. Feature_extraction_from_VGG16_to_InceptionV3

### References:
1. https://github.com/ypwhs/dogs_vs_cats
2. https://www.kaggle.com/yangpeiwen/keras-inception-xception-0-47

### Import pkgs

In [11]:
import h5py
import os
import time

from keras.layers import *
from keras.models import *
from keras.applications import *
from keras.optimizers import *
from keras.regularizers import *
from keras.preprocessing.image import *
from keras.applications.inception_v3 import preprocess_input

In [15]:
def get_features(MODEL, image_size, batch_size=1, lambda_func=None):
    print('{0} start.'.format(MODEL.__name__))
    start_time = time.time()
    
    width = image_size[0]
    height = image_size[1]
    input_tensor = Input((height, width, 3))
    x = input_tensor
    if lambda_func:
        print(lambda_func.__name__)
        x = Lambda(lambda_func)(x)
    base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False)
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))

    cwd = os.getcwd()
    data_train_path = os.path.join(cwd, 'input', 'data_train')
    data_val_path = os.path.join(cwd, 'input', 'data_validation')
    data_test_path  = os.path.join(cwd, 'input', 'data_test_a')
    
    gen = ImageDataGenerator()
#     gen = ImageDataGenerator(zoom_range = 0.1,
#                             height_shift_range = 0.1,
#                             width_shift_range = 0.1,
#                             rotation_range = 10)
    train_generator = gen.flow_from_directory(data_train_path, image_size, shuffle=False, 
                                              batch_size=batch_size)
    val_generator  = gen.flow_from_directory(data_val_path,  image_size, shuffle=False, 
                                              batch_size=batch_size)
    test_generator  = gen.flow_from_directory(data_test_path,  image_size, shuffle=False, 
                                              batch_size=batch_size)
    
#     train = model.predict_generator(train_generator, verbose=1, steps=53879)
#     val = model.predict_generator(val_generator, verbose=1, steps=7120)
#     test = model.predict_generator(test_generator, verbose=1, steps=7040)
    train = model.predict_generator(train_generator, verbose=1, steps=10)
    val = model.predict_generator(val_generator, verbose=1, steps=10)
    test = model.predict_generator(test_generator, verbose=1, steps=10)
    
    print('filenames:' + str(len(val_generator.filenames)))
    print(val_generator.filenames[0:10])
    print('filenames:' + str(len(test_generator.filenames)))
    print(test_generator.filenames[0:10])
    
    file_name = os.path.join(cwd, 'model', 'feature_{0}_{1}.h5'.format(MODEL.__name__, 171028))
    print(file_name)
    if os.path.exists(file_name):
        os.remove(file_name)
    with h5py.File(file_name) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("train_label", data=train_generator.classes)
        h.create_dataset("val", data=val)
        h.create_dataset("val_label", data=val_generator.classes)
        h.create_dataset("test", data=test)
        
    print(train.shape)
    print(len(train_generator.classes))
    print(val.shape)
    print(len(val_generator.classes))
    print(test.shape)
    
    print(dir(train_generator))
    print(train_generator.num_class)
    print(train_generator.samples)
    print(train_generator.image_shape)
    print(train_generator.classes)
    
    end_time = time.time()
    print('Spend time: {0} s'.format(end_time-start_time))

In [16]:
%pdb off

Automatic pdb calling has been turned OFF


In [17]:
get_features(VGG16, (224, 224), 1)

VGG16 start.
Found 53879 images belonging to 80 classes.
Found 7120 images belonging to 80 classes.
Found 7040 images belonging to 1 classes.
['class-00\\003ee16dff735e8dc261846d033dc45905a795ea.jpg', 'class-00\\01de277ad1ce84c069f153a790a10613a33aa3a8.jpg', 'class-00\\029376109220f6ce852db458f9523122491bfe2d.jpg', 'class-00\\029fa920f9c33949784b94dfd73fb160b052124d.jpg', 'class-00\\060cbff33fb752f1e66ab4154e36b18e98261454.jpg', 'class-00\\0775702a8eb2124557f26c7dd2616e1f4dbe5fd1.jpg', 'class-00\\0aaa421855022b32398e4362cdfa1f0a63dcaa8a.jpg', 'class-00\\0b541228e84314d2fa5452797a9e1d1598a12768.jpg', 'class-00\\0cc687a5bccabb8a6541f5abfd72a0fabe61863c.jpg', 'class-00\\0ff0c7ccafae369a9c1d8e3cec8c519e01a2d4f1.jpg']
filenames:7040
['test\\00002ff812f48a3df27c321d517a6300ed8da0c3.jpg', 'test\\00049a860dca2af378faeb0ee6f435c6063818ef.jpg', 'test\\0011a9c9216c3763ffc33641a8ffc975127dc404.jpg', 'test\\0045a44cacc7bc9826db9b54d2dcd70b810250f9.jpg', 'test\\004b6823145471c6a4ce292e864909fde2d049

In [5]:
get_features(VGG19, (224, 224), 1)

VGG19 start.
Found 53879 images belonging to 80 classes.
Found 7120 images belonging to 80 classes.
Found 7040 images belonging to 1 classes.
(53879, 512)
53879
(7120, 512)
7120
(7040, 512)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_flow_index', 'batch_index', 'batch_size', 'class_indices', 'class_mode', 'classes', 'color_mode', 'data_format', 'directory', 'filenames', 'image_data_generator', 'image_shape', 'index_generator', 'lock', 'n', 'next', 'num_class', 'reset', 'samples', 'save_format', 'save_prefix', 'save_to_dir', 'shuffle', 'target_size', 'total_batches_seen']
80
53879
(224, 224, 3)
[ 0  0  0 ..., 79 79 79]
Spend time: 1151.726350069046 s


In [6]:
get_features(ResNet50, (224, 224), 1)

ResNet50 start.
Found 53879 images belonging to 80 classes.
Found 7120 images belonging to 80 classes.
Found 7040 images belonging to 1 classes.
E:\SceneClassification\model\feature_ResNet50_171023.h5
(53879, 2048)
53879
(7120, 2048)
7120
(7040, 2048)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_flow_index', 'batch_index', 'batch_size', 'class_indices', 'class_mode', 'classes', 'color_mode', 'data_format', 'directory', 'filenames', 'image_data_generator', 'image_shape', 'index_generator', 'lock', 'n', 'next', 'num_class', 'reset', 'samples', 'save_format', 'save_prefix', 'save_to_dir', 'shuffle', 'target_size', 'total_batches_seen']
80
53879
(224, 224, 3)
[ 0  0  0 ..., 79 79 79]
Spend 

In [7]:
get_features(Xception, (299, 299), 1, xception.preprocess_input)

Xception start.
preprocess_input
Found 53879 images belonging to 80 classes.
Found 7120 images belonging to 80 classes.
Found 7040 images belonging to 1 classes.
E:\SceneClassification\model\feature_Xception_171023.h5
(53879, 2048)
53879
(7120, 2048)
7120
(7040, 2048)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_flow_index', 'batch_index', 'batch_size', 'class_indices', 'class_mode', 'classes', 'color_mode', 'data_format', 'directory', 'filenames', 'image_data_generator', 'image_shape', 'index_generator', 'lock', 'n', 'next', 'num_class', 'reset', 'samples', 'save_format', 'save_prefix', 'save_to_dir', 'shuffle', 'target_size', 'total_batches_seen']
80
53879
(299, 299, 3)
[ 0  0  0 ...,

In [8]:
get_features(InceptionV3, (299, 299), 1, inception_v3.preprocess_input)

InceptionV3 start.
preprocess_input
Found 53879 images belonging to 80 classes.
Found 7120 images belonging to 80 classes.
Found 7040 images belonging to 1 classes.
E:\SceneClassification\model\feature_InceptionV3_171023.h5
(53879, 2048)
53879
(7120, 2048)
7120
(7040, 2048)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_flow_index', 'batch_index', 'batch_size', 'class_indices', 'class_mode', 'classes', 'color_mode', 'data_format', 'directory', 'filenames', 'image_data_generator', 'image_shape', 'index_generator', 'lock', 'n', 'next', 'num_class', 'reset', 'samples', 'save_format', 'save_prefix', 'save_to_dir', 'shuffle', 'target_size', 'total_batches_seen']
80
53879
(299, 299, 3)
[ 0  0  

In [9]:
print('Done !')

Done !
