### 使用模型集合

* Resnet50
* Inception V3
* Xception
* mobileNet

### 使用数据增强

```
       gen = ImageDataGenerator(rotation_range=5, height_shift_range=0.05, horizontal_flip=True,
                           shear_range=0.1, channel_shift_range=10, width_shift_range=0.1)
```

### 使用BacthNormalization

#### 参考：[猫狗大战](https://github.com/ypwhs/dogs_vs_cats)

#### 预处理数据
    由于之前已经把数据分成了dogs 文件夹和 cats 文件夹，这里就可以使用Keras中的 ImageDataGenerator 来自动处理训练数据：

In [1]:
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *

from tqdm import *
import h5py
import cv2
import random
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

Using TensorFlow backend.


In [2]:
batch_size=10

In [3]:
def write_preTrain(MODEL, image_size, lambda_func=None):
    width = image_size[0]
    height = image_size[1]
    input_tensor = Input((height, width, 3))
    x = input_tensor
    if lambda_func:
        x = Lambda(lambda_func)(x)
    base_model = MODEL(input_tensor=x, weights='imagenet', include_top=False)
    model = Model(base_model.input, GlobalAveragePooling2D()(base_model.output))

    gen = ImageDataGenerator(rotation_range=5, height_shift_range=0.05, horizontal_flip=True,
                                 shear_range=0.1, channel_shift_range=10, width_shift_range=0.1)
    
    train_generator = gen.flow_from_directory("train3", image_size, shuffle=False, 
                                              batch_size=batch_size)
    valid_generator = gen.flow_from_directory("valid3", image_size, shuffle=False, 
                                              batch_size=batch_size)
    test_generator = gen.flow_from_directory("test3", image_size, shuffle=False, 
                                             batch_size=batch_size, class_mode=None)

    train = model.predict_generator(train_generator, steps=train_generator.samples//batch_size,verbose=1)
    valid = model.predict_generator(valid_generator, steps=valid_generator.samples//batch_size,verbose=1)
    test = model.predict_generator(test_generator, steps=test_generator.samples//batch_size,verbose=1)
    
    with h5py.File("pre_{}s.h5".format(MODEL.__name__)) as h:
        h.create_dataset("train", data=train)
        h.create_dataset("valid", data=valid)
        h.create_dataset("test", data=test)
        h.create_dataset("train_label", data=train_generator.classes)
        h.create_dataset("valid_label", data=valid_generator.classes)

In [10]:
write_preTrain(ResNet50, (224, 224))

Found 22500 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [12]:
write_preTrain(Xception, (299, 299), xception.preprocess_input)

Found 22500 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [13]:
write_preTrain(InceptionV3, (299, 299), inception_v3.preprocess_input)

Found 22500 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [4]:
write_preTrain(DenseNet201, (299, 299), densenet.preprocess_input)

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.8/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5
Found 22500 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [5]:
write_preTrain(InceptionResNetV2, (299, 299), inception_resnet_v2.preprocess_input)

Found 22500 images belonging to 2 classes.
Found 2500 images belonging to 2 classes.
Found 12500 images belonging to 1 classes.


In [15]:
from sklearn.utils import shuffle

X_train = []
X_valid = []
X_test = []

for filename in ["pre_ResNet50s.h5", "pre_Xceptions.h5", "pre_InceptionV3s.h5","pre_InceptionResNetV2s.h5","pre_DenseNet201s.h5"]:
    with h5py.File(filename, 'r') as h:
        X_train.append(np.array(h['train']))
        X_valid.append(np.array(h['valid']))
        X_test.append(np.array(h['test']))
        y_train = np.array(h['train_label'])
        y_valid = np.array(h['valid_label'])

X_train = np.concatenate(X_train, axis=1)
X_valid = np.concatenate(X_valid, axis=1)
X_test = np.concatenate(X_test, axis=1)

X_train, y_train = shuffle(X_train, y_train)

In [16]:
X_train.shape

(22500, 9600)

In [17]:
from keras.callbacks import TensorBoard

input_tensor = Input(X_train.shape[1:])
x = input_tensor
#x = Dropout(0.5)(x)
x = BatchNormalization(axis=1)(x)
x = Dense(1, activation='sigmoid')(x)
model = Model(input_tensor, x)

In [18]:
model.compile(optimizer='adadelta',
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [20]:
tensorBoard = TensorBoard(log_dir = './logs')

model.fit(X_train, y_train, batch_size=128, 
          epochs=8, 
          validation_data=(X_valid, y_valid),
          callbacks=[tensorBoard])

Train on 22500 samples, validate on 2500 samples
Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f9acbb98358>

In [21]:
model.save('Ensemble_model.h5')

In [22]:
y_pred = model.predict(X_test, verbose=1)
y_pred = y_pred.clip(min=0.005, max=0.995)



In [29]:
import pandas as pd
from keras.preprocessing.image import *

df = pd.read_csv("sample_submission.csv")

image_size = (224, 224)
gen = ImageDataGenerator(rotation_range=5, height_shift_range=0.05, horizontal_flip=True,
                                 shear_range=0.1, channel_shift_range=10, width_shift_range=0.1)
test_generator = gen.flow_from_directory("test3", image_size, shuffle=False, 
                                         batch_size=10, class_mode=None)

for i, fname in enumerate(test_generator.filenames):
    index = int(fname[fname.rfind('/')+1:fname.rfind('.')])
    df.set_value(index-1, 'label', y_pred[i])

df.to_csv('pred.csv', index=None)
df.head(10)

Found 12500 images belonging to 1 classes.


Unnamed: 0,id,label
0,1,0.995
1,2,0.995
2,3,0.995
3,4,0.995
4,5,0.005
5,6,0.005
6,7,0.005
7,8,0.005
8,9,0.005
9,10,0.005
