In [1]:
import os
import h5py
import random
import numpy as np
import pandas as pd
from tqdm import tqdm
from glob import iglob
import matplotlib.pyplot as plt
from keras.preprocessing import image                

random.seed(9527)

Using TensorFlow backend.


1、加载数据

In [2]:
def load_dataset(path):
    image_list = []
    images = iglob(path)
    for image in images:
        image_name = os.path.basename(image)
        data = image_name.split('.')
        data[0] = 1 if data[0] == 'dog' else 0
        data[1] = int(data[1])
        data.append(image)
        image_list.append(data)
    image_list.sort(key=lambda x: int(x[1]))
    return image_list

In [3]:
train_data = load_dataset('./all/train/*')
# print(train_data[:5])
# 随机打乱数据集
random.shuffle(train_data)
train_data = np.array(train_data)
train_data[:5]

array([['1', '446', 'jpg', './all/train\\dog.446.jpg'],
       ['0', '4840', 'jpg', './all/train\\cat.4840.jpg'],
       ['0', '8467', 'jpg', './all/train\\cat.8467.jpg'],
       ['1', '4930', 'jpg', './all/train\\dog.4930.jpg'],
       ['1', '7202', 'jpg', './all/train\\dog.7202.jpg']], dtype='<U25')

In [4]:
train_files, train_targets = train_data[:][:, 3], train_data[:][:, 0]
train_targets = train_targets.astype(int)
train_files[:5], train_targets[:5]

(array(['./all/train\\dog.446.jpg', './all/train\\cat.4840.jpg',
        './all/train\\cat.8467.jpg', './all/train\\dog.4930.jpg',
        './all/train\\dog.7202.jpg'], dtype='<U25'), array([1, 0, 0, 1, 1]))

2、数据预处理

In [5]:
def path_to_tensor(img_path):
    # 用PIL加载RGB图像为PIL.Image.Image类型
    img = image.load_img(img_path, target_size=(299, 299))
    # 将PIL.Image.Image类型转化为格式为(299, 299, 3)的3维张量
    x = image.img_to_array(img)
    # 将3维张量转化为格式为(1, 299, 299, 3)的4维张量并返回
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [6]:
from PIL import ImageFile
from keras.applications.xception import Xception, preprocess_input
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# 实现了归一化处理。
train_tensors = preprocess_input(paths_to_tensor(train_files))

100%|████████████████████████████████████████████████████████████████████████████| 25000/25000 [05:21<00:00, 77.69it/s]


In [7]:
train_tensors.shape

(25000, 299, 299, 3)

In [8]:
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Input, Dropout
from keras import backend as K
import h5py

# 创建预训练模型
base_model = Xception(weights='imagenet', include_top=False)

# 添加一个全局平均池化层
x = base_model.output
x = GlobalAveragePooling2D()(x)

# 提取特征的模型
model = Model(inputs=base_model.input, outputs=x)

In [9]:
train = model.predict(train_tensors)

In [10]:
with h5py.File("features_Xception.h5", 'w') as h:
        h.create_dataset("train", data=train)
        h.create_dataset("label", data=train_targets)