In [1]:
import tensorflow as tf
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm
import os
import random

In [2]:
TRAIN_DIR = '/Users/apple/Downloads/dogscats/dogscats'
TEST_DIR = '/Users/apple/Desktop/test'
CATEGORY = ['cats','dogs']
IMG_SIZE = 50
LR = 1e-3

MODEL_NAME =  'dogsvscats-{}-{}.model'.format(LR, '2conv-basic')

In [3]:
def label_img(img):
    word_label = img.split('.')[0]
    if word_label == 'cat':
        return [1,0]
    elif word_label == 'dog':
        return [0,1]

In [4]:
def create_train_data():
    training_data = []
    for category in CATEGORY:
        folder = os.path.join(TRAIN_DIR, category)
        for img in tqdm(os.listdir(folder)):
            label = label_img(img)
            path = os.path.join(folder, img)
            img = cv2.imread(path)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            
            training_data.append([np.array(img), np.array(label)])
    random.shuffle(training_data)
    np.save('train_data.npy', training_data)
    return training_data


In [23]:
def create_test_data():
    testing_data = []
    for category in CATEGORY:
        folder = os.path.join(TEST_DIR, category)
        for img in tqdm(os.listdir(folder)):
            label = label_img(img)
            img_path = os.path.join(folder, img)
            img = cv2.imread(img_path)
            img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
            testing_data.append([np.array(img), label])
        
    random.shuffle(testing_data)
    np.save('test_data.npy', testing_data)
    return testing_data


In [6]:
train_data = create_train_data()


100%|██████████| 11500/11500 [00:46<00:00, 246.68it/s]
100%|██████████| 11500/11500 [00:56<00:00, 202.47it/s]


In [7]:
train_data[0]

[array([[[ 41,  40,  82],
         [ 49,  48,  90],
         [ 52,  52,  96],
         ...,
         [ 14,  23,  57],
         [  4,  14,  44],
         [ 13,  23,  53]],
 
        [[ 36,  40,  67],
         [ 42,  42,  81],
         [ 49,  48,  96],
         ...,
         [ 12,  21,  53],
         [  1,  10,  40],
         [  8,  19,  48]],
 
        [[ 33,  37,  63],
         [ 33,  36,  67],
         [ 42,  43,  84],
         ...,
         [ 12,  23,  51],
         [  2,  12,  42],
         [  6,  16,  46]],
 
        ...,
 
        [[101, 107, 102],
         [107, 113, 108],
         [115, 122, 114],
         ...,
         [125, 131, 124],
         [121, 125, 120],
         [120, 124, 119]],
 
        [[ 96, 102,  97],
         [107, 113, 108],
         [109, 117, 108],
         ...,
         [129, 135, 128],
         [129, 132, 128],
         [118, 122, 117]],
 
        [[103, 109, 104],
         [ 99, 105,  99],
         [107, 115, 106],
         ...,
         [126, 132, 125],
  

In [8]:
X_train = []
y_train = []

for features, labels in train_data:
    X_train.append(features)
    y_train.append(labels)
    

In [9]:
X_train[0]


array([[[ 41,  40,  82],
        [ 49,  48,  90],
        [ 52,  52,  96],
        ...,
        [ 14,  23,  57],
        [  4,  14,  44],
        [ 13,  23,  53]],

       [[ 36,  40,  67],
        [ 42,  42,  81],
        [ 49,  48,  96],
        ...,
        [ 12,  21,  53],
        [  1,  10,  40],
        [  8,  19,  48]],

       [[ 33,  37,  63],
        [ 33,  36,  67],
        [ 42,  43,  84],
        ...,
        [ 12,  23,  51],
        [  2,  12,  42],
        [  6,  16,  46]],

       ...,

       [[101, 107, 102],
        [107, 113, 108],
        [115, 122, 114],
        ...,
        [125, 131, 124],
        [121, 125, 120],
        [120, 124, 119]],

       [[ 96, 102,  97],
        [107, 113, 108],
        [109, 117, 108],
        ...,
        [129, 135, 128],
        [129, 132, 128],
        [118, 122, 117]],

       [[103, 109, 104],
        [ 99, 105,  99],
        [107, 115, 106],
        ...,
        [126, 132, 125],
        [132, 136, 131],
        [121, 125, 120]]

In [10]:
y_train[0]

array([1, 0])

In [24]:
test_data = create_test_data()


100%|██████████| 1000/1000 [00:04<00:00, 204.71it/s]
100%|██████████| 1000/1000 [00:04<00:00, 222.01it/s]


In [26]:
X_test = []
y_test = []

for features, labels in test_data:
    X_test.append(features)
    y_test.append(labels)
    

In [28]:
X_train = np.array(X_train)
X_test = np.array(X_test)

y_train = np.array(y_train)
y_test = np.array(y_test)


In [29]:
X_train.shape


(23000, 50, 50, 3)

In [30]:
import pickle


In [31]:
X_train = pickle.dump(X_train, open('X_train.pkl','wb'))
y_train = pickle.dump(y_train, open('y_train.pkl','wb'))

X_test = pickle.dump(X_test, open('X_test.pkl','wb'))
y_test= pickle.dump(y_test, open('y_test.pkl','wb'))
