In [20]:
import os
import cv2
import random

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import ticker
%matplotlib inline

from keras.models import Sequential
from keras.layers import Input, Dropout, Flatten, Convolution2D, MaxPooling2D, Dense, Activation
from keras.optimizers import RMSprop
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import np_utils

In [21]:
TRAIN_DIR = '/Users/hehongwei/datas/Cat_Dog/train/'

train_images = [TRAIN_DIR + i for i in os.listdir(TRAIN_DIR)]
train_images = train_images[1:]
dogs = [i for i in train_images if 'dog' in i]
cats = [i for i in train_images if 'cat' in i]
# 以上已经提取了dog的路径和cat的路径

In [22]:
print len(dogs)
print len(cats)

12500
12500


提取1000张dog，1000张cat作为训练集，提取1000张dog，1000张cat作为验证集

In [23]:
train_dogs = dogs[0:1000]
train_cats = cats[0:1000]
# 做好验证机数据和标签

V_cross_dogs = dogs[1000:2000]
V_cross_cats = cats[1000:2000]
V_cross_images = V_cross_dogs + V_cross_cats
random.shuffle(V_cross_images)
V_labels = []
for i in V_cross_images:
    if 'cat' in i:
        V_labels.append(0)
    if 'dog' in i:
        V_labels.append(1)
# 上面做好了验证集的数据和标签V_cross_images和V_labels

将2000个训练集数据进行数据提升,提升后的数据保存在/Users/hehongwei/datas/Cat_Dog/preview/

In [24]:
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

for dog in train_dogs:
    img = cv2.imread(dog)
    x = img_to_array(img)
    x = x.reshape((1,) + x.shape)
    i = 0
    for batch in datagen.flow(x, batch_size=1, save_to_dir='/Users/hehongwei/datas/Cat_Dog/preview/', save_prefix='dog', save_format='jpeg'):
        i += 1
        if i > 20:
            break
for cat in train_cats:
    img = cv2.imread(cat)
    x = img_to_array(img)
    x = x.reshape((1,) + x.shape)
    i = 0
    for batch in datagen.flow(x, batch_size=1, save_to_dir='/Users/hehongwei/datas/Cat_Dog/preview/', save_prefix='cat', save_format='jpeg'):
        i += 1
        if i > 20:
            break

对提升后的数据进行训练数据的预处理，做好数据和标签

In [41]:
TRAIN_IMAGES_DIR = '/Users/hehongwei/datas/Cat_Dog/preview/'
train_images = [TRAIN_IMAGES_DIR + i for i in os.listdir(TRAIN_IMAGES_DIR)]
train_images = train_images[1:]
random.shuffle(train_images)
y_labels = []
for i in train_images:
    if 'cat' in i:
        y_labels.append(0)
    if 'dog' in i:
        y_labels.append(1)

建立神经网络模型

In [42]:
model = Sequential()
# 输入数据要处理为3*150*150
model.add(Convolution2D(32, 3, 3, input_shape=(3, 150, 150)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

In [43]:
# 处理训练数据
ROWS = 150
COLS = 150
CHANNELS = 3

def read_image(file_path):
    img = cv2.imread(file_path)
    return cv2.resize(img, (ROWS, COLS), interpolation=cv2.INTER_CUBIC)
def prep_data(images):
    count = len(images)
    data = np.ndarray([count, CHANNELS, ROWS, COLS])
    for i, image_file in enumerate(images):
        image = read_image(image_file)
        data[i] = image.T
        if i % 250 == 0:
            print('Processed {} of {}'.format(i, count))
    return data


In [44]:
train_data = prep_data(train_images)

Processed 0 of 17557
Processed 250 of 17557
Processed 500 of 17557
Processed 750 of 17557
Processed 1000 of 17557
Processed 1250 of 17557
Processed 1500 of 17557
Processed 1750 of 17557
Processed 2000 of 17557
Processed 2250 of 17557
Processed 2500 of 17557
Processed 2750 of 17557
Processed 3000 of 17557
Processed 3250 of 17557
Processed 3500 of 17557
Processed 3750 of 17557
Processed 4000 of 17557
Processed 4250 of 17557
Processed 4500 of 17557
Processed 4750 of 17557
Processed 5000 of 17557
Processed 5250 of 17557
Processed 5500 of 17557
Processed 5750 of 17557
Processed 6000 of 17557
Processed 6250 of 17557
Processed 6500 of 17557
Processed 6750 of 17557
Processed 7000 of 17557
Processed 7250 of 17557
Processed 7500 of 17557
Processed 7750 of 17557
Processed 8000 of 17557
Processed 8250 of 17557
Processed 8500 of 17557
Processed 8750 of 17557
Processed 9000 of 17557
Processed 9250 of 17557
Processed 9500 of 17557
Processed 9750 of 17557
Processed 10000 of 17557
Processed 10250 of 17

In [46]:
nb_epoch = 15
batch_size = 16

hist = model.fit(train_data, y_labels, validation_split=0.25,
                 nb_epoch=nb_epoch, batch_size=batch_size, verbose=1)

Train on 13167 samples, validate on 4390 samples
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [47]:
train_scores = model.evaluate(train_data, y_labels, verbose=1)



In [51]:
train_scores

[8.0530805511179118, 0.50037022270489273]

In [48]:
# 处理验证数据
V_cross_data = prep_data(V_cross_images)

Processed 0 of 2000
Processed 250 of 2000
Processed 500 of 2000
Processed 750 of 2000
Processed 1000 of 2000
Processed 1250 of 2000
Processed 1500 of 2000
Processed 1750 of 2000


In [49]:
V_cross_data.shape

(2000, 3, 150, 150)

In [50]:
V_cross_scores = model.evaluate(V_cross_data, V_labels, verbose=1)



In [52]:
V_cross_scores

[8.0590477905273445, 0.5]