## 2. Feature_extraction_MobileNet

### References:
1. https://github.com/ypwhs/dogs_vs_cats
2. https://www.kaggle.com/yangpeiwen/keras-inception-xception-0-47

### Import pkgs

In [1]:
import h5py
import os
import time

from keras.layers import *
from keras.models import *
from keras.applications import *
from keras.optimizers import *
from keras.regularizers import *
from keras.preprocessing.image import *
from keras.applications.inception_v3 import preprocess_input

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.optimizers import Adam, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler, TensorBoard

Using TensorFlow backend.


In [2]:
def get_features_mobile_net(MODEL, image_size, batch_size=1, lambda_func=None):
    print('{0} start.'.format(MODEL.__name__))
    start_time = time.time()
    
    width = image_size[0]
    height = image_size[1]
    input_tensor = Input((height, width, 3))
    x = input_tensor
    if lambda_func:
        print(lambda_func.__name__)
        x = Lambda(lambda_func)(x)
    base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False, input_shape=(height, width, 3))
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))

    cwd = os.getcwd()
    data_train_path = os.path.join(cwd, 'input', 'data_train')
    data_val_path = os.path.join(cwd, 'input', 'data_validation')
    data_test_path  = os.path.join(cwd, 'input', 'data_test_a')
    
    gen = ImageDataGenerator()
#     gen = ImageDataGenerator(zoom_range = 0.1,
#                             height_shift_range = 0.1,
#                             width_shift_range = 0.1,
#                             rotation_range = 10)
    train_generator = gen.flow_from_directory(data_train_path, image_size, shuffle=False, 
                                              batch_size=batch_size)
    val_generator  = gen.flow_from_directory(data_val_path,  image_size, shuffle=False, 
                                              batch_size=batch_size)
    test_generator  = gen.flow_from_directory(data_test_path,  image_size, shuffle=False, 
                                              batch_size=batch_size)
    
    train = model.predict_generator(train_generator, verbose=1, steps=53879)
    val = model.predict_generator(val_generator, verbose=1, steps=7120)
    test = model.predict_generator(test_generator, verbose=1, steps=7040)
#     train = model.predict_generator(train_generator, verbose=1, steps=10)
#     val = model.predict_generator(val_generator, verbose=1, steps=10)
#     test = model.predict_generator(test_generator, verbose=1, steps=10)
#     print(test_generator.filenames)
    
    file_name = os.path.join(cwd, 'model', 'feature_{0}_{1}.h5'.format(MODEL.__name__, 171023))
    print(file_name)
    if os.path.exists(file_name):
        os.remove(file_name)
    with h5py.File(file_name) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("train_label", data=train_generator.classes)
        h.create_dataset("val", data=val)
        h.create_dataset("val_label", data=val_generator.classes)
        h.create_dataset("test", data=test)
        
    print(train.shape)
    print(len(train_generator.classes))
    print(val.shape)
    print(len(val_generator.classes))
    print(test.shape)
    
    print(dir(train_generator))
    print(train_generator.num_class)
    print(train_generator.samples)
    print(train_generator.image_shape)
    print(train_generator.classes)
    
    end_time = time.time()
    print('Spend time: {0} s'.format(end_time-start_time))

In [3]:
get_features_mobile_net(MobileNet, (224, 224), 1)

MobileNet start.
Found 53879 images belonging to 80 classes.
Found 7120 images belonging to 80 classes.
Found 7040 images belonging to 1 classes.
E:\SceneClassification\model\feature_MobileNet_171023.h5
(53879, 1024)
53879
(7120, 1024)
7120
(7040, 1024)
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__next__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_flow_index', 'batch_index', 'batch_size', 'class_indices', 'class_mode', 'classes', 'color_mode', 'data_format', 'directory', 'filenames', 'image_data_generator', 'image_shape', 'index_generator', 'lock', 'n', 'next', 'num_class', 'reset', 'samples', 'save_format', 'save_prefix', 'save_to_dir', 'shuffle', 'target_size', 'total_batches_seen']
80
53879
(224, 224, 3)
[ 0  0  0 ..., 79 79 79]
Spen

In [4]:
print('Done !')

Done !
