In [1]:
import cv2
import numpy as np
import os
from random import shuffle
from tqdm import tqdm

TRAIN_DIR = 'C:\\Users\\Matias\\Kaggle_Data\\dogs_vs_cats\\train'
TEST_DIR = 'C:\\Users\\Matias\\Kaggle_Data\\dogs_vs_cats\\test'
IMG_SIZE = 50
LR = 1e-3

MODEL_NAME = 'dogsvscats-{}-{}-model'.format(LR, '2conv-basic')


In [2]:
# Return [1,0] if it's a cat, [0,1] if it is not a cat
label_img = lambda img: [1,0] if img.split('.')[-3] == 'cat' else [0,1]

In [3]:
def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        label = label_img(img)
        path = os.path.join(TRAIN_DIR, img)
        img = cv2.resize(
            cv2.imread(path, cv2.IMREAD_GRAYSCALE),
            (IMG_SIZE, IMG_SIZE)
        )
        training_data.append([np.array(img), np.array(label)])
    shuffle(training_data)
    np.save('train_data.npy', training_data)
    
    return training_data
        

In [4]:
def process_train_data():
    testing_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        path = os.path.join(TRAIN_DIR, img)
        img_num = img.split('.')[0]
        img = cv2.resize(
            cv2.imread(path, cv2.IMREAD_GRAYSCALE),
            (IMG_SIZE, IMG_SIZE)
        )
        testing_data.append([np.array(img), img_num])
        
    np.save('test_data.npy', testing_data)
    
    return testing_data
        

In [7]:
train_data = create_train_data()
# if train data already exists:
# train_data = np.load('train_data.npy')



100%|██████████| 25000/25000 [05:27<00:00, 76.39it/s]


In [10]:
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression
import tensorflow as tf

tf.reset_default_graph()


convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 2, activation='relu')
convnet = max_pool_2d(convnet,2)


convnet = conv_2d(convnet, 64, 2, activation='relu')
convnet = max_pool_2d(convnet,2)


convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_onnected(convnet, 10, activation='softmax')
convnet = regression(
    convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets'
                    )

model = tflearn.DNN(convnet, tensorboard_dir='log')


AttributeError: module 'pandas' has no attribute '_libs'

In [None]:
if os.path.exists('{}.meta'.format(MODEL_NAME)):
    model.load(MODEL_NAME)
    print('model loaded')

In [None]:
train = train_data[:-500]
test = train_data[-500:]

In [None]:
X = np.array([i[0] for i in train]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
Y = [i[1] for i in train]


test_x = np.array([i[0] for i in test]).reshape(-1, IMG_SIZE, IMG_SIZE, 1)
test_y = [i[1] for i in test]

model.fit(
    {'input': X},
    {'targets': Y},
    n_epochs=3,
    validation_set=({'input': test_x}, {'targets': test_y}),
    snapshot_step=500,
    show_metric=True,
    run_id=MODEL_NAME
)



In [None]:
# tensorboard --logdir=foo:C:\Users\Matias\Kaggle\KaggleDogsvsCats\log

In [None]:
model.save(MODEL_NAME)

In [None]:
import matplotlib.pyplot as plt

# if you don't have this file yet
test_data

try:
    
    test_data = np.load('test_data.npy')
    
except FileNotFoundError:
    # doesn't exist
    test_data = process_test_data()

fig = plt.figure()


for num, data in enumerate(test_data[:12]):
    
    img_num = data[1]
    img_data = data [0]
    
    y = fig.add_subplot(3,4,num+1)
    orig = img_data
    data = img_data.reshape(IMG_SIZE, IMG_SIZE, 1)
    
    model_out = model.predict([data])[0]
   
    str_label = 'Cat' if np.argmax(model_out) == 1 else 'Dog'
    
    y.imshow(orig, cmap='gray')
    plt.title(str_label)
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_yaxis().set_visible(False)
    
plt.show()
        
        

In [None]:
with open('submission-file.csv', 'w') as f:
    f.write('id,label\n')

In [None]:
with open('submission-file.csv', 'a') as f:
    for data in tqdm(test_data):
        
        img_num = data[1]
        img_data = data [0]

        orig = img_data
        data = img_data.reshape(IMG_SIZE, IMG_SIZE, 1)

        model_out = model.predict([data])[0]
        
        f.write('{},{}\n'.format(img_num, model_out[1]))
        
        