In [2]:
from keras.applications import InceptionV3, Xception, ResNet50, VGG16
from keras.applications import inception_v3, xception,resnet,vgg16
from keras.models import Model,Input
from keras.layers import Lambda,GlobalAveragePooling2D

def getmodel(model,preprocess_input=None,image_size=(299, 299, 3),output_layer = None,include_top=False):
    input_tensor = Input((image_size[0], image_size[1], image_size[2]))
    if preprocess_input:
        input_tensor = Lambda(preprocess_input)(input_tensor)
    base_model = model(input_tensor=input_tensor, weights='imagenet', include_top=include_top)
    output = base_model.output
    if output_layer is not None:
        output = base_model.layers[output_layer].output
    return Model(base_model.input, output)
InceptionV3_top_model = getmodel(InceptionV3, inception_v3.preprocess_input, include_top=True)
InceptionV3_model = getmodel(InceptionV3, inception_v3.preprocess_input)
InceptionV3_ave_model = Model(InceptionV3_model.input, GlobalAveragePooling2D()(InceptionV3_model.output))
# InceptionV3_ave_model.summary()

Xception_top_model = getmodel(Xception,xception.preprocess_input, include_top=True)
Xception_model = getmodel(Xception,xception.preprocess_input)
Xception_ave_model = Model(Xception_model.input, GlobalAveragePooling2D()(Xception_model.output))
# Xception_ave_model.summary()

ResNet50_top_model = getmodel(ResNet50, resnet.preprocess_input, include_top=True)
ResNet50_model = getmodel(ResNet50, resnet.preprocess_input)
ResNet50_ave_model = Model(ResNet50_model.input, GlobalAveragePooling2D()(ResNet50_model.output))
# ResNet50_ave_model.summary()

# VGG16_top_model = getmodel(VGG16, vgg16.preprocess_input,include_top=True,image_size=(224, 224, 3))
VGG16_model = getmodel(VGG16, vgg16.preprocess_input,output_layer=18)
VGG16_ave_model = Model(VGG16_model.input, GlobalAveragePooling2D()(VGG16_model.output))
# VGG16_ave_model.summary()

print('载入模型完成')

Using TensorFlow backend.
W1025 09:26:42.342046  9052 deprecation_wrapper.py:119] From c:\program files\python37\lib\site-packages\keras\backend\tensorflow_backend.py:4070: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W1025 09:26:42.701431  9052 deprecation_wrapper.py:119] From c:\program files\python37\lib\site-packages\keras\backend\tensorflow_backend.py:4074: The name tf.nn.avg_pool is deprecated. Please use tf.nn.avg_pool2d instead.



载入模型完成


In [3]:
import cv2
from keras.preprocessing import image
def load_image(img,image_size=(299, 299)):
    if type(img) == str:
        img = image.load_img(img, target_size=image_size)
        img = image.img_to_array(img)
        return np.expand_dims(img, axis=0)
    if type(img) == np.ndarray:
        img = cv2.resize(img,image_size)
        return np.expand_dims(img, axis=0)
    if isinstance(img,(tuple,list)):
        ret = [load_image(i)for i in img]
        return np.vstack(ret)

In [3]:
import os
from glob import glob
import numpy as np
from sklearn.model_selection import train_test_split

def get_train_filenames(_type):
    train_dir = r'..\data\train'
    train_filenames = os.path.join(train_dir, '%s.*.jpg'%_type)
    return glob(train_filenames)
def get_test_filenames():
    test_dir = r'..\data\test'
    test_filenames = os.path.join(test_dir, '*.jpg')
    test_filenames =glob(test_filenames)
    def key(filename):
        filename = os.path.split(filename)[1]
        filename = os.path.splitext(filename)[0]
        return int(filename)
    test_filenames.sort(key=key)
    return test_filenames

train_cat_filenames = get_train_filenames('cat')
train_dog_filenames = get_train_filenames('dog')
test_filenames = get_test_filenames()

print('训练集一共%d张猫图'%len(train_cat_filenames), train_cat_filenames[:5])
print('训练集一共%d张狗图'%len(train_dog_filenames), train_dog_filenames[:5])
print('测试集一共%d张图'%len(test_filenames), test_filenames[:5])

训练集一共12445张猫图 ['..\\data\\train\\cat.0.jpg', '..\\data\\train\\cat.1.jpg', '..\\data\\train\\cat.10.jpg', '..\\data\\train\\cat.100.jpg', '..\\data\\train\\cat.1000.jpg']
训练集一共12469张狗图 ['..\\data\\train\\dog.0.jpg', '..\\data\\train\\dog.1.jpg', '..\\data\\train\\dog.10.jpg', '..\\data\\train\\dog.100.jpg', '..\\data\\train\\dog.1000.jpg']
测试集一共12500张图 ['..\\data\\test\\1.jpg', '..\\data\\test\\2.jpg', '..\\data\\test\\3.jpg', '..\\data\\test\\4.jpg', '..\\data\\test\\5.jpg']


In [4]:
import shutil
from time import time
target_class_dogs = ['n02085620','n02085782','n02085936','n02086079','n02086240','n02086646','n02086910','n02087046','n02087394','n02088094','n02088238',
        'n02088364','n02088466','n02088632','n02089078','n02089867','n02089973','n02090379','n02090622','n02090721','n02091032','n02091134',
        'n02091244','n02091467','n02091635','n02091831','n02092002','n02092339','n02093256','n02093428','n02093647','n02093754','n02093859',
        'n02093991','n02094114','n02094258','n02094433','n02095314','n02095570','n02095889','n02096051','n02096177','n02096294','n02096437',
        'n02096585','n02097047','n02097130','n02097209','n02097298','n02097474','n02097658','n02098105','n02098286','n02098413','n02099267',
        'n02099429','n02099601','n02099712','n02099849','n02100236','n02100583','n02100735','n02100877','n02101006','n02101388','n02101556',
        'n02102040','n02102177','n02102318','n02102480','n02102973','n02104029','n02104365','n02105056','n02105162','n02105251','n02105412',
        'n02105505','n02105641','n02105855','n02106030','n02106166','n02106382','n02106550','n02106662','n02107142','n02107312','n02107574',
        'n02107683','n02107908','n02108000','n02108089','n02108422','n02108551','n02108915','n02109047','n02109525','n02109961','n02110063',
        'n02110185','n02110341','n02110627','n02110806','n02110958','n02111129','n02111277','n02111500','n02111889','n02112018','n02112137',
        'n02112350','n02112706','n02113023','n02113186','n02113624','n02113712','n02113799','n02113978']
target_class_cats=['n02123045','n02123159','n02123394','n02123597','n02124075','n02125311','n02127052']

def remove_file_excep(filename,target_classes):
    excep_dir = '..\data\exception'
    img = load_image(filename)
    models = [(InceptionV3_top_model,inception_v3),
              (Xception_top_model,xception),
              (ResNet50_top_model,resnet)]
    for model in models:
        preds = model[0].predict(img)
        dps = model[1].decode_predictions(preds)[0]
        for dp in dps:
            if dp[0] in target_classes:
                return
    print('\n移除文件', filename)
    dst_filename = os.path.join(excep_dir, os.path.split(filename)[1])
#     shutil.copyfile(filename,dst_filename)
    os.rename(filename, dst_filename)

def remove_excep(train_filenames,target_classes):
    for i in range(len(train_filenames)):
        print('\r训练集异常值处理%d/%d'%(i+1,len(train_filenames)), end='')
        filename = train_filenames[i]
        remove_file_excep(filename,target_classes)

t0 = time()
print('开始处理猫异常输出')
train_cat_filenames = get_train_filenames('cat')
# remove_excep(train_cat_filenames,target_class_cats)
print('开始处理狗异常输出')
train_dog_filenames = get_train_filenames('dog')
# remove_excep(train_dog_filenames,target_class_dogs)
print('异常值处理完毕用时%ds' % (time()-t0))

train_cat_filenames = get_train_filenames('cat')
train_dog_filenames = get_train_filenames('dog')
X_test_filenames = get_test_filenames()
print('剔除异常数据后训练集一共%d张猫图'%len(train_cat_filenames), train_cat_filenames[:5])
print('剔除异常数据后训练集一共%d张狗图'%len(train_dog_filenames), train_dog_filenames[:5])
print('测试集一共%d张图'%len(test_filenames), test_filenames[:5])

开始处理猫异常输出
开始处理狗异常输出
异常值处理完毕用时0s
剔除异常数据后训练集一共12445张猫图 ['..\\data\\train\\cat.0.jpg', '..\\data\\train\\cat.1.jpg', '..\\data\\train\\cat.10.jpg', '..\\data\\train\\cat.100.jpg', '..\\data\\train\\cat.1000.jpg']
剔除异常数据后训练集一共12469张狗图 ['..\\data\\train\\dog.0.jpg', '..\\data\\train\\dog.1.jpg', '..\\data\\train\\dog.10.jpg', '..\\data\\train\\dog.100.jpg', '..\\data\\train\\dog.1000.jpg']
测试集一共12500张图 ['..\\data\\test\\1.jpg', '..\\data\\test\\2.jpg', '..\\data\\test\\3.jpg', '..\\data\\test\\4.jpg', '..\\data\\test\\5.jpg']


In [7]:
# 提取特征保存fdf5
import math
import numpy as np
import h5py
from time import time

def preprocess_save(x_data,name, model,image_size=(299, 299)):
    def preprocess(images, model,image_size=(299, 299),  show_log=False):
        X_preprocess = list()
        batch_size = 10
        for i in range(math.ceil(len(images)/batch_size)):
            batch_image = images[i*batch_size:(i+1)*batch_size]
            batch_data = [load_image(img,image_size) for img in batch_image]
            batch_data = np.vstack(batch_data)
        #     print(batch_data.shape)
            X_preprocess.append(model.predict(batch_data))
            if show_log:
                print('\r%d/%d'%(i + 1, math.ceil(len(images)/batch_size)),end='')
        if show_log:
            print('\r', end='')
        X_preprocess = np.vstack(X_preprocess)
        return X_preprocess

    with h5py.File("preprocess.hdf5", "a") as f:
        if name in f.keys():
            print('%s已存在' % name)
            return
    batch_size = 1000
    for i in range(math.ceil(len(x_data)/batch_size)):
        name_temp = '%s_%d' % (name,i)
        with h5py.File("preprocess.hdf5", "a") as f:
            if name_temp in f.keys():
                continue
            t0 = time()
            X_temp = preprocess(x_data[i*batch_size:(i+1)*batch_size],
                                model=model,
                                image_size=image_size,
                                show_log=True)
            f.create_dataset(name_temp,data=X_temp)
            print('%s_%d处理完成%d/%d用时%ds'%(name, i, (i+1)*batch_size, len(x_data), time()-t0), 
                  X_temp.shape)
    X_merge = list()
    with h5py.File("preprocess.hdf5", "a") as f:
        for i in range(math.ceil(len(x_data)/batch_size)):
            name_temp = '%s_%d' % (name,i)
            X_merge.append(np.array(f[name_temp]))
            del f[name_temp]
        X_merge = np.vstack(X_merge)
        f.create_dataset(name,data=X_merge)
    print('%s处理完成'%name)



print('开始预处理')
t0 = time()
preprocess_save(train_cat_filenames,'X_train_cat_InceptionV3',model=InceptionV3_ave_model)
preprocess_save(train_cat_filenames,'X_train_cat_Xception',model=Xception_ave_model)
preprocess_save(train_cat_filenames,'X_train_cat_ResNet50',model=ResNet50_ave_model)
preprocess_save(train_cat_filenames,'X_train_cat_VGG16',model=VGG16_ave_model)
print('cat处理完成用时%ds' % (time() - t0))
t0 = time()
preprocess_save(train_dog_filenames,'X_train_dog_InceptionV3',model=InceptionV3_ave_model)
preprocess_save(train_dog_filenames,'X_train_dog_Xception',model=Xception_ave_model)
preprocess_save(train_dog_filenames,'X_train_dog_ResNet50',model=ResNet50_ave_model)
preprocess_save(train_dog_filenames,'X_train_dog_VGG16',model=VGG16_ave_model)
print('dog处理完成用时%ds' % (time()-t0))

t0 = time()
preprocess_save(X_test_filenames,'X_test_InceptionV3',model=InceptionV3_ave_model)
preprocess_save(X_test_filenames,'X_test_Xception',model=Xception_ave_model)
preprocess_save(X_test_filenames,'X_test_ResNet50',model=ResNet50_ave_model)
preprocess_save(X_test_filenames,'X_test_VGG16',model=VGG16_ave_model)
print('测试集处理完成用时%ds' % (time()-t0))




开始预处理
X_train_cat_InceptionV3已存在
X_train_cat_Xception已存在
X_train_cat_ResNet50已存在
X_train_cat_VGG16已存在
cat处理完成用时0s
X_train_dog_InceptionV3已存在
X_train_dog_Xception已存在
X_train_dog_ResNet50已存在
X_train_dog_VGG16已存在
dog处理完成用时0s
X_test_InceptionV3已存在
X_test_Xception已存在
X_test_ResNet50已存在
X_test_VGG16已存在
测试集处理完成用时0s
