In [4]:
from __future__ import print_function
from __future__ import absolute_import

import os
import shutil
import random
import warnings

import cv2
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
np.random.seed(0)
from   tqdm import *
from   sklearn.model_selection import train_test_split

import tensorflow as tf
from   keras           import backend as K
from   keras.models    import Model
from   keras.layers    import Dense, Input, BatchNormalization, Activation, merge, Dropout
from   keras.layers    import Conv2D, SeparableConv2D, MaxPooling2D, GlobalAveragePooling2D
from   keras.callbacks import ModelCheckpoint
from   keras.preprocessing       import image
from   keras.preprocessing.image import ImageDataGenerator
from   keras.engine.topology     import get_source_inputs
from   keras.utils.data_utils    import get_file
from   keras_applications.imagenet_utils import decode_predictions, _obtain_input_shape


In [5]:
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm      # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion

%matplotlib inline

In [6]:
TRAIN_DATASET = 'train'
TEST_DATASET = 'test1'
IMG_SIZE = 50

In [7]:
def label_img(img):
    word_label = img.split('.')[-3]
    # conversion to one-hot array [cat,dog]
    #                            [much cat, no dog]
    if word_label == 'cat': return [1,0]
    #                             [no cat, very doggo]
    elif word_label == 'dog': return [0,1]

In [8]:
def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DATASET)):
        label = label_img(img)
        path = os.path.join(TRAIN_DATASET,img)
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img),np.array(label)])
    shuffle(training_data)
   # np.save('train_data.npy', training_data)
    return training_data

In [9]:
def process_test_data():
    testing_data = []
    for img in tqdm(os.listdir(TEST_DATASET)):
        path = os.path.join(TEST_DATASET,img)
        img_num = img.split('.')[0]
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        testing_data.append([np.array(img), img_num])
        
    shuffle(testing_data)
   # np.save('test_data.npy', testing_data)
    return testing_data

In [None]:
#2. Preprocessing for the input images

In [10]:
train_filenames = create_train_data()
test_filenames  = process_test_data()


100%|██████████| 25000/25000 [03:52<00:00, 107.68it/s]
100%|██████████| 12500/12500 [01:48<00:00, 115.55it/s]


In [11]:
print(train_filenames[0:10])
print(test_filenames[0:10])

[[array([[104, 125, 151, ..., 230, 255, 255],
       [106, 125, 136, ..., 255, 255, 255],
       [123, 164, 148, ..., 248, 255, 254],
       ...,
       [ 24,  67, 123, ..., 167, 192, 190],
       [ 84,  68,  56, ..., 126, 194, 188],
       [ 80, 116,  43, ..., 191, 189, 190]], dtype=uint8), array([0, 1])], [array([[ 36,  35,  36, ..., 118,  97, 106],
       [ 33,  53,  43, ...,  54,  62,  71],
       [ 59,  39,  46, ...,  60,  55,  62],
       ...,
       [145, 177, 188, ..., 172, 178, 183],
       [166, 172, 171, ..., 118, 138, 152],
       [178, 179, 186, ..., 147, 119, 157]], dtype=uint8), array([0, 1])], [array([[142, 150, 157, ..., 212, 206, 199],
       [148, 154, 161, ..., 217, 210, 204],
       [149, 158, 164, ..., 216, 210, 210],
       ...,
       [136, 147, 148, ..., 139, 133, 122],
       [134, 138, 146, ..., 132, 126, 121],
       [129, 143, 146, ..., 125, 121, 119]], dtype=uint8), array([0, 1])], [array([[ 65, 110, 143, ..., 206, 204, 203],
       [ 98, 132, 249, ..., 20

In [12]:
#Let’s see the total number of images in training set and testing set
train_cat = filter(lambda x: x.split(".")[0] == "cat", train_filenames)
train_dog = filter(lambda x: x.split(".")[0] == "dog", train_filenames)
x = ['train_cat', 'train_dog', 'test']
y = [len(train_cat), len(train_dog), len(test_filenames)]
ax = sns.barplot(x=x, y=y)


TypeError: object of type 'filter' has no len()

In [None]:
#Training sets were further divided into 90% for training the model and 10% for evaluate the model using cross validation.

my_train, my_cv = train_test_split(train_filenames, test_size=0.1, random_state=0)
print(len(my_train), len(my_cv))