## 获取数据

下载数据集，项目数据集来自Kaggle，[Dogs vs. Cats Redux](https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/data)

使用kaggle-api下载：

In [None]:
import os
import shutil

if not os.path.isdir('train'):
    if os.path.isfile('train.zip'):
        os.system('unzip train.zip')
    else:
        print('FILES (train.zip) NOT FOUND, DOWNLOAD FIRST')

if not os.path.isdir('test'):
    if os.path.isfile('test.zip'):
        os.system('unzip test.zip')
    else:
        print('FILES (test.zip) NOT FOUND, DOWNLOAD FIRST')

In [None]:
import numpy as np

dogs=[]
cats=[]

for file in os.listdir('train'):
    if (file.split(sep='.')[0]=='cat'):
        cats=np.append(cats,file)
    else:
        dogs=np.append(dogs,file)

cat_lable=np.zeros(len(cats))
dog_lable=np.zeros(len(dogs))+1

### 划分验证集

In [None]:
dogs_train,dogs_valid,dog_lable_train,dog_lable_valid=train_test_split(dogs,dog_lable,test_size=0.2,random_state=10,shuffle=True)
cats_train,cats_valid,dog_lable_train,dog_lable_valid=train_test_split(cats,dog_lable,test_size=0.2,random_state=10,shuffle=True)

In [None]:
from sklearn.model_selection import train_test_split
import numpy as np

if os.path.exists('train2'):
    shutil.rmtree('train2')
    os.mkdir('train2')
    os.mkdir(r'train2/cat')
    os.mkdir(r'train2/dog')
else:
    os.mkdir('train2')
    os.mkdir(r'train2/cat')
    os.mkdir(r'train2/dog')

if os.path.exists('valid'):
    shutil.rmtree('valid')
    os.mkdir('valid')
    os.mkdir(r'valid/cat')
    os.mkdir(r'valid/dog')
else:
    os.mkdir('valid')
    os.mkdir(r'valid/cat')
    os.mkdir(r'valid/dog')

if os.path.exists('test2'):
    shutil.rmtree('test2')
else:
    os.mkdir('test2')

def link_image(image_name,train_valid,dog_or_cats):
    for file in image_name:
        if train_valid=='T':
            if dog_or_cats =='CAT':
                os.symlink(r'train/'+file,r'train2/cat/'+file)
            else:
                os.symlink(r'train/'+file,r'train2/dog/'+file)
        else:
            if dog_or_cats =='CAT':
                os.symlink(r'train/'+file,r'valid/cat/'+file)
            else:
                os.symlink(r'train/'+file,r'valid/dog/'+file)

link_image(dogs_train,'T','DOG')   
link_image(dogs_valid,'V','DOG')    
link_image(cats_train,'T','CAT')    
link_image(cats_valid,'V','CAT')    

In [None]:
# for windows
!tree
#for linux
#!tree -d

print('\n')
print('statistics:')
print('totol train pictures  :{}'.format(len(os.listdir('train'))))
print('totol test  pictures  :{}'.format(len(os.listdir('test'))))
print('train      set:  cats :{}'.format(len(os.listdir(r'train2/cat'))))
print('train      set:  dogs :{}'.format(len(os.listdir(r'train2/dog'))))
print('validation set:  cats :{}'.format(len(os.listdir(r'valid/cat'))))
print('validation set:  dogs :{}'.format(len(os.listdir(r'valid/dog'))))

## 探索数据

In [None]:
import os
import cv2
import random
import matplotlib.pyplot as plt

%matplotlib inline

dog_path=r'train/'+random.choice(dogs)
cat_path=r'train/'+random.choice(cats)

dog_pict=cv2.resize(cv2.imread(dog_path),(200,200))
cat_pict=cv2.resize(cv2.imread(cat_path),(200,200))

plt.figure(figsize=(10,5),dpi=90)
p1=plt.subplot(1,2,1)
p2=plt.subplot(1,2,2)
p1.set_title("random dog")
p2.set_title("random cat")
p1.imshow(dog_pict)
p2.imshow(cat_pict)
p1.axis('off')
p2.axis('off')
plt.show()

## 模型

In [None]:
import tensorflow as tf
from keras.models import *
from keras.layers import *
from keras.applications import *
from keras.preprocessing.image import *

'''
input1 = Input(shape=(299, 299, 3))
input2 = Input(shape=(224, 224, 3))
input_set=[input1,input2]
'''

model_ResNet50={'name':ResNet50,'shape':(224,224),'preprocess':None}
model_VGG16={'name':VGG16,'shape':(224,224),'preprocess':None}
model_VGG19={'name':VGG19,'shape':(224,224),'preprocess':None}
model_InceptionV3={'name':InceptionV3,'shape':(299,299),'preprocess':inception_v3.preprocess_input}
model_Xception={'name':Xception,'shape':(299,299),'preprocess':xception.preprocess_input}

def import_model(model_input,model_dict):
    if model_dict['preprocess']:
        x = Lambda(model_dict['preprocess'])(model_input)
    else:
        x=model_input
        
    base_model=model_dict['name'](input_tensor=x,weights='imagenet',include_top=False)
    for layers in base_model.layers:
        layers.trainable = False
    return base_model

def model_concatenate(input_set,model_set):
    #input1 = Input(shape=(299, 299, 3))
    input1=input_set[0]
    #input2 = Input(shape=(224, 224, 3))
    input2=input_set[1]

    mid_out=[]
    
    for i in range(len(model_set)):
        if model_set[i] in [model_Xception,model_InceptionV3]:
            base_model=import_model(input1,model_set[i])
        else:
            base_model=import_model(input2,model_set[i])
    
        pool_layer=GlobalAveragePooling2D()(base_model.output)
        mid_out.append(pool_layer)
    
    #print(mid_out)
    
    if (len(model_set)>1):
        x= Concatenate(axis=-1)(mid_out)
    else:
        x=mid_out[0]
    
    x = Dropout(0.5)(x)
    out=Dense(1,activation='sigmoid')(x)
    #print(out)
    
    return out

def input_define(mask):
    input_tensor=[]
    if mask[0]==1:
        input_tensor.append(Input(shape=(299, 299, 3)))
    if mask[1]==1:
        input_tensor.append(Input(shape=(224, 224, 3)))
    return input_tensor


def img_load_mask_transfer(input_type,mask,enhance=False):
    masked_load=[]
    
    # enhance parameter for train
    if enhance:
        if input_type=='train':
            data_gen_args = dict(featurewise_center=True,
                         featurewise_std_normalization=True,
                         rotation_range=90.,
                         width_shift_range=0.1,
                         height_shift_range=0.1,
                         horizontal_flip=True,
                         vertical_flip=True,
                         zoom_range=0.2)
    else:
        data_gen_args = dict()

    #   directories  from different input        
    if (input_type=='train'):
        load_direct=r'E:\project-py\dog_cat\train2'
    elif (input_type=='valid'):
        load_direct=r'E:\project-py\dog_cat\valid'
    elif (input_type=='test'):
        load_direct=r'E:\project-py\dog_cat\test'

    #   mask  ImageDataGenerator   
    if mask[0]==1:
        image_size=(299,299)
        gen1 = ImageDataGenerator(**data_gen_args)
        image_gen1=gen1.flow_from_directory(directory=load_direct,
                                            class_mode='binary',
                                            target_size=(299, 299),
                                            batch_size=20,
                                            seed=1)
        masked_load.append(image_gen1)
    if mask[1]==1:
        image_size=(224,224)
        gen2 = ImageDataGenerator(**data_gen_args)
        image_gen2 = gen2.flow_from_directory(directory=load_direct,
                                              class_mode='binary',
                                              target_size=(224,224),
                                              batch_size=20,
                                              seed=1)
        masked_load.append(image_gen2)
    # if not use append  then use zip
    if sum(mask)>1:
        return(zip(image_gen1,image_gen2))
    else:
        return masked_load

## 构建基础模型

In [None]:
model_set=[model_ResNet50,model_Xception]

##   model_mask   used for mask input(train,valid,test)
model_mask=[0,0]
for i in range(len(model_set)):
    if model_set[i] in [model_Xception,model_InceptionV3]:
        model_mask[0]=1
    else:
        model_mask[1]=1

print(model_mask)

In [None]:
input_set=input_define(model_mask)
print(input_set)

out=model_concatenate(input_set,model_set)

model = Model(input_set, out)

model.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
train_load=img_load_mask_transfer('train',model_mask,True)
print(train_load)
#print(train_load[0].__dict__)

valid_load=img_load_mask_transfer('valid',model_mask)
#print(valid_load)

test_load=img_load_mask_transfer('test',model_mask)
#print(test_load)

In [None]:
model.fit(X_train, y_train, batch_size=128, nb_epoch=8, validation_split=0.2)

In [None]:
for i, layer in enumerate(model.layers):
    print('Model name: {}'.format(model_set))
    print('  seq        layer_name')
    print(i, layer.name)

In [None]:
from IPython.display import SVG
from keras.utils.visualize_util import model_to_dot, plot

SVG(model_to_dot(model, show_shapes=True).create(prog='dot', format='svg'))