### 导出深度特征
- 首先我们导出VGG16,VGG19,ResNet50,Xception以及InceptionV3的深度特征
- VGG16,VGG19,ResNet50要求的图片的大小为（224， 224）
- Xception，Inception要求的图片大小为（299，299）
- 

In [24]:
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *

import time
import h5py

train_data_path = 'data/train/'
test_data_path = 'data/test/'

def save_bottleneck_features(MODEL, image_size, module_name, preprocess):
    
    start_time = time.time()
    
    width = image_size[0]
    height = image_size[1]
    input_tensor = Input((height, width, 3))
    x = Lambda(preprocess)(input_tensor)
    
    base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False)
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))

    gen = ImageDataGenerator()
    train_generator = gen.flow_from_directory(train_data_path, image_size, shuffle=False, batch_size=32)
    test_generator = gen.flow_from_directory(test_data_path, image_size, shuffle=False, batch_size=32, class_mode=None)

    train = model.predict_generator(train_generator)
    test = model.predict_generator(test_generator)
    with h5py.File("bottleneck_features/{}_bottleneck_features.h5".format(module_name)) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("test", data=test)
        h.create_dataset("label", data=train_generator.classes)
        
    end_time = time.time()
    
    print("{} extrac features total consumed: {} seconds".format(module_name, end_time - start_time))
    

save_bottleneck_features(VGG16, (224, 224), 'VGG16', vgg16.preprocess_input)
save_bottleneck_features(VGG19, (224, 224), 'VGG19', vgg19.preprocess_input)
save_bottleneck_features(ResNet50, (224, 224), 'ResNet50', resnet50.preprocess_input)
save_bottleneck_features(InceptionV3, (299, 299), 'InceptionV3', inception_v3.preprocess_input)
save_bottleneck_features(Xception, (299, 299), 'Xception', xception.preprocess_input)

Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
VGG16 extrac features total consumed: 196.73024725914001 seconds
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
VGG19 extrac features total consumed: 228.9459409713745 seconds
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
ResNet50 extrac features total consumed: 211.36610960960388 seconds
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
InceptionV3 extrac features total consumed: 273.7162392139435 seconds
Found 25000 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.
Xception extrac features total consumed: 416.25546979904175 seconds


In [12]:
import h5py
import numpy as np
from sklearn.utils import shuffle
from keras.models import *
from keras.layers import *

X_train = []
X_test = []

#for filename in ["bottleneck_features/gap_ResNet50.h5", "bottleneck_features/gap_Xception.h5", "bottleneck_features/gap_InceptionV3.h5"]:
for filename in ["bottleneck_features/VGG16_bottleneck_features.h5"]:
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['label'])

X_train = np.concatenate(X_train, axis=1)
X_test = np.concatenate(X_test, axis=1)

print(X_train.shape[1:])

input_tensor = Input(X_train.shape[1:])
x = Dropout(0.5)(input_tensor)
x = Dense(1, activation='sigmoid')(x)
model = Model(input_tensor, x)

model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])


model.fit(X_train, y_train, batch_size=128, epochs=8, validation_split=0.2)

(512,)
Train on 20000 samples, validate on 5000 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x2267a6e82e8>

In [13]:
y_pred = model.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)

import pandas as pd
from keras.preprocessing.image import *

df = pd.read_csv("data/sample_submission.csv")

gen = ImageDataGenerator()
test_generator = gen.flow_from_directory(test_data_path, (224, 224), shuffle=False, batch_size=16, class_mode=None)

for i, fname in enumerate(test_generator.filenames):
    index = int(fname[fname.rfind('\\')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('data/pred.csv', index=None)
df.head(10)

Found 12500 images belonging to 1 classes.


  


Unnamed: 0,id,label
0,1,0.995
1,2,0.995
2,3,0.995
3,4,0.995
4,5,0.005
5,6,0.005
6,7,0.005
7,8,0.005
8,9,0.005
9,10,0.005


In [22]:
start_time = time.time()

end_time = time.time()

duration = start_time - end_time

print(float(duration))

print("{} extrac features total consumed: {} seconds".format('123', duration))

0.0
123 extrac features total consumed: 0.0 seconds
