In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
from tqdm import tqdm
from random import shuffle
import numpy as np
import os
import cv2 
from keras.callbacks import ModelCheckpoint

IMG_SIZE = 150
TRAIN_DIR = '/home/hai/Desktop/Cat&Dog/all/train'
TEST_DIR = '/home/hai/Desktop/Cat&Dog/all/test'


train_data_dir = '/home/hai/Desktop/Cat&Dog/all/train/'
nb_train_samples = 2000
epochs = 50
batch_size = 16

input_shape = (IMG_SIZE, IMG_SIZE, 1)

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= '0'


Using TensorFlow backend.


In [2]:
def label_img(img):
    word_label = img.split('.')[-3]
    if word_label == 'cat' : return [1,0]
    elif word_label == 'dog' : return [0,1]

In [3]:
def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        label = label_img(img)
        path = os.path.join(TRAIN_DIR,img)
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img), np.array(label)])
    shuffle(training_data)
    #np.save('train_data.npy', training_data)
    return training_data

In [4]:
def process_test_data():
    testing_data = []
    for img in tqdm(os.listdir(TEST_DIR)):
        path = os.path.join(TEST_DIR,img)
        img_num = img.split('.')[0]
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        testing_data.append([np.array(img), img_num])
    shuffle(testing_data)
    #np.save('test_data.npy', testing_data)
    return testing_data

In [5]:
train_data = create_train_data()

100%|██████████| 25000/25000 [00:20<00:00, 1195.21it/s]


In [6]:
model = Sequential()
model.add(Conv2D(32, (3,3),input_shape = input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

model.add(Conv2D(64, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(2))
model.add(Activation('sigmoid'))

model.compile(loss = 'binary_crossentropy', optimizer ='rmsprop', metrics = ['accuracy'] )

In [7]:
train = train_data[:-1000]
test = train_data[-1000:]

X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]

In [None]:
filepath="weights-improvement-{epoch:02d}-{val_acc:.2f}.hdf5"
callbacks = [ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')]

model.fit(x = np.array(X), y = np.array(Y), batch_size = batch_size, epochs = epochs,callbacks = callbacks, validation_data = (np.array(test_x), np.array(test_y)))


Train on 24000 samples, validate on 1000 samples
Epoch 1/50

Epoch 00001: val_acc improved from -inf to 0.46700, saving model to weights-improvement-01-0.47.hdf5
Epoch 2/50

Epoch 00002: val_acc did not improve from 0.46700
Epoch 3/50

Epoch 00003: val_acc did not improve from 0.46700
Epoch 4/50

Epoch 00004: val_acc did not improve from 0.46700
Epoch 5/50

Epoch 00005: val_acc did not improve from 0.46700
Epoch 6/50

Epoch 00006: val_acc improved from 0.46700 to 0.63050, saving model to weights-improvement-06-0.63.hdf5
Epoch 7/50

Epoch 00007: val_acc improved from 0.63050 to 0.76200, saving model to weights-improvement-07-0.76.hdf5
Epoch 8/50

Epoch 00008: val_acc did not improve from 0.76200
Epoch 9/50

Epoch 00009: val_acc did not improve from 0.76200
Epoch 10/50

Epoch 00010: val_acc improved from 0.76200 to 0.79250, saving model to weights-improvement-10-0.79.hdf5
Epoch 11/50

Epoch 00011: val_acc improved from 0.79250 to 0.82300, saving model to weights-improvement-11-0.82.hdf5


In [None]:
test_data = process_test_data()
with open('submission_file.csv','w') as f:
    f.write('id,label\n')
            
with open('submission_file.csv','a') as f:
    for data in tqdm(test_data):
        img_num = data[1]
        img_data = data[0]
        orig = img_data
        data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
        model_out = model.predict([data])[0]
        f.write('{},{}\n'.format(img_num,model_out[1]))