In [3]:
import cv2  # for resizing image
import numpy as np  # for arrays
import os  # to play with directories
from random import shuffle
from tqdm import tqdm  # for professional looping with progress bar

TRAIN_DIR = '/Users/jacquesthibodeau/Downloads/train/'
TEST_DIR = '/Users/jacquesthibodeau/Downloads/test/'
IMG_SIZE = 50
LR = 1e-3

MODEL_NAME = 'dogsvscats-{}-{}.model'.format(LR, '2conv-basic')

In [4]:
def label_img(img):
    # filenames are ex: cat.9.png, so we'll take out the 'cat' or 'dog' part
    word_label = img.split('.')[-3]
    if word_label == 'cat': return [1,0]
    elif word_label == 'dog': return [0,1]

In [9]:
def create_train_data():
    training_data = []
    # let's take every image in our training data and convert it into
    # a grayscale image of size 50x50 pixels. Grayscale sets every training
    # image a 1D array that gives an intensity value
    for img in tqdm(os.listdir(TRAIN_DIR)):
        label = label_img(img)
        path = os.path.join(TRAIN_DIR, img)
        img = cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE), (IMG_SIZE, IMG_SIZE))
        training_data.append([np.array(img), np.array(label)])
    shuffle(training_data)
    np.save('training_data.npy', training_data)
    return training_data

In [10]:
create_train_data()

100%|██████████| 25000/25000 [00:35<00:00, 695.03it/s]


[[array([[69, 76, 80, ..., 65, 60, 57],
         [72, 78, 84, ..., 71, 62, 61],
         [77, 83, 87, ..., 82, 69, 65],
         ..., 
         [21, 24, 26, ..., 20, 22, 17],
         [21, 22, 26, ..., 19, 17, 15],
         [20, 21, 21, ..., 17, 14, 12]], dtype=uint8), array([0, 1])],
 [array([[107, 116, 130, ..., 144,  93, 118],
         [112,  96, 119, ..., 132,  55, 111],
         [ 76,  57,  36, ..., 125,  88,  30],
         ..., 
         [ 53,  67,  53, ...,  43,  56,  43],
         [ 63,  49,  56, ...,  71,  46,  62],
         [ 42,  62,  62, ...,  54,  66,  55]], dtype=uint8), array([1, 0])],
 [array([[ 48,  13,  14, ...,  33,  36,  32],
         [ 20,  14,  13, ...,  34,  34,  34],
         [ 52,  16,  15, ...,  31,  35,  36],
         ..., 
         [212, 209, 219, ..., 224, 218, 229],
         [150, 204, 211, ..., 232, 223, 225],
         [143, 134, 121, ..., 216, 222, 215]], dtype=uint8), array([1, 0])],
 [array([[198, 198, 195, ..., 212, 194, 201],
         [192, 192, 189,

In [None]:
def process_test():
    testing_data = []
    for img in tqdm(os.listdir(TEST_DIR)):
        path = os.path.join(TEST_DIR, img)