### 文件操作

In [17]:
import os
import shutil 
import random
 
TRAIN_PATH = "./train/"
TEST_PATH = "./test/"
# SAMPLE_PATH = "sample/"
CAT_PATH = "cat/"
DOG_PATH = "dog/"
CLEAN_TRAIN = "clean_train/"

def checkDir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
        
checkDir(CAT_PATH)
checkDir(DOG_PATH)
checkDir(CLEAN_TRAIN)

train_files = os.listdir(TRAIN_PATH)
for file in train_files:
    if 'dog' in file:
        shutil.copy(TRAIN_PATH + file, DOG_PATH + file)
    if 'cat' in file:
        shutil.copy(TRAIN_PATH + file, CAT_PATH + file)    
    shutil.copy(TRAIN_PATH + file, CLEAN_TRAIN + file)

In [12]:
import cv2
import glob
import time
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Model
from keras.layers import Input, Flatten, Dropout, Dense
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
from sklearn.utils import shuffle
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions

## 数据预处理selector

In [13]:
def get_train_data1(imgDir, size=224):
    imgDir = shuffle(imgDir)
    train_x = np.zeros((len(imgDir), size, size, 3), dtype=np.float32)
    train_y = np.zeros(len(imgDir), dtype=np.uint8)
    for index, file in enumerate(imgDir):        
        img = image.load_img(file, target_size=(size, size))
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        train_x[index] = preprocess_input(x)    
        if 'dog' in file:
            train_y[index] = 1
    return train_x, train_y

In [14]:
# cat 0  dog 1
print('Proprecessing begin !')
t=time.time()
train_dir = glob.glob('train_all/*.jpg')
train_x, train_y = get_train_data1(train_dir)
t2 = time.time()
print(round(t2 - t, 2), 'seconds to preprocessing !')

Proprecessing begin !
202.73 seconds to preprocessing !


### ----------------------------------------------------------------------------------------------------------------------------------###

### 将预处理后的数据  0.5 Dropout

In [15]:
from keras.optimizers import SGD
from keras.applications.resnet50 import ResNet50
opt = SGD(lr=0.0001, momentum=0.9)
myinput = Input(shape=(224, 224, 3))
base_model = ResNet50(weights='imagenet', input_tensor=myinput, include_top=False)
x = Flatten()(base_model.output)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation='sigmoid')(x)
# this is the model we will train
model = Model(myinput, predictions)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
best_model = ModelCheckpoint('model_best.h5', verbose=1, save_best_only=True)
model.fit(train_x, train_y, validation_split=0.2, shuffle=True, batch_size=16, epochs=10, callbacks=[best_model])
model.save('model_10.h5')

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
  319488/94653016 [..............................] - ETA: 6:28:36

KeyboardInterrupt: 

### 将预处理后的数据 0.75 Dropout

In [None]:
from keras.optimizers import SGD
from keras.applications.resnet50 import ResNet50
opt = SGD(lr=0.0001, momentum=0.9)
myinput = Input(shape=(224, 224, 3))
base_model = ResNet50(weights='imagenet', input_tensor=myinput, include_top=False)
x = Flatten()(base_model.output)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.75)(x)
predictions = Dense(1, activation='sigmoid')(x)
# this is the model we will train
model = Model(myinput, predictions)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
best_model = ModelCheckpoint('model_best.h5', verbose=1, save_best_only=True)
model.fit(train_x, train_y, validation_split=0.2, shuffle=True, batch_size=16, epochs=10, callbacks=[best_model])
model.save('model_10.h5')

### ------------------------------------------------------------------------------------------------------###
### 清理数据

In [15]:
import os
size = 224
model_test = load_model('model_best.h5')
cat_dir = glob.glob('cat/*.jpg')
for file_name in cat_dir:        
    img = image.load_img(file_name, target_size=(size, size))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    if model_test.predict(x) > 0.5:
        print(file_name)
        os.remove(file_name)

100%|██████████| 12500/12500 [01:31<00:00, 135.90it/s]


In [11]:
import os
size = 224
model_test = load_model('model_best.h5')
dog_dir = glob.glob('dog/*.jpg')
for file_name in dog_dir:        
    img = image.load_img(file_name, target_size=(size, size))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)
    if model_test.predict(x) < 0.5:
        print(file_name)
        os.remove(file_name)

### ------------------------------------------------------------------------------------------------------------------####
### 75% 的Dropout 重新训练

In [None]:
# cat 0  dog 1
print('Proprecessing begin !')
t=time.time()
train_dir = glob.glob('clean_train/*.jpg')
train_x, train_y = get_train_data1(train_dir)
t2 = time.time()
print(round(t2 - t, 2), 'seconds to preprocessing !')

In [None]:
from keras.optimizers import SGD
from keras.applications.resnet50 import ResNet50
opt = SGD(lr=0.0001, momentum=0.9)
myinput = Input(shape=(224, 224, 3))
base_model = ResNet50(weights='imagenet', input_tensor=myinput, include_top=False)
x = Flatten()(base_model.output)
x = Dense(1024, activation='relu')(x)
x = Dropout(0.75)(x)
predictions = Dense(1, activation='sigmoid')(x)
# this is the model we will train
model = Model(myinput, predictions)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
best_model = ModelCheckpoint('model_best.h5', verbose=1, save_best_only=True)
model.fit(train_x, train_y, validation_split=0.2, shuffle=True, batch_size=16, epochs=10, callbacks=[best_model])
model.save('model_10.h5')

### ------------------------------------------------------------------------------------------------------ ###
### Drawing model


In [None]:
from keras.models import load_model
model = load_model('model_best.h5')
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model_drawing.png',show_shapes=True)

### ------------------------------------------------------------------------------------------------------ ###
### Testing ###

In [None]:
import os
size = 224
test_dir = glob.glob('test/*.jpg')
mytest = np.zeros((12500, size, size, 3), dtype=np.float32)
# test_path = r'E:\final\test1'
for file_name in (test_dir):
    index = int(file_name[6:-4]) - 1
    img = image.load_img(file_name, target_size=(size, size))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    mytest[index] = preprocess_input(x)

### ------------------------------------------------------------------------------------------------------ ###
### 写入文件

In [None]:
##predict
with open('file3.csv','w') as f:
    f.write('id,label\n')

model_test = load_model('model_best.h5')

with open('file3.csv','a') as f:
    for i in range(len(test_dir)):
        predict = model_test.predict(mytest[i:i+1])
        predict = predict[0][0]
        f.write('{},{}\n'.format(i+1,predict))