# Keras Cats and Dogs


In [1]:
# import the necessary packages
from sklearn.preprocessing import LabelEncoder
from sklearn.cross_validation import train_test_split

import tensorflow as tf
from keras.models import Sequential
from keras.optimizers import Adadelta, SGD
from keras.layers import Dense, Dropout, BatchNormalization, Activation, Conv2D, MaxPooling2D, Flatten, Conv1D
from keras.utils import np_utils
from imutils import paths
from keras.models import model_from_json
import numpy as np
import cv2
import os
import sys

np.random.seed(0)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

Using TensorFlow backend.


In [2]:
def image_to_feature_vector(image, size=(64, 64)):
	# resize the image to a fixed size, then flatten the image into
	# a list of raw pixel intensities
	return cv2.resize(image, size).flatten()

In [3]:
train_test_split_data_done = False;

if train_test_split_data_done == False:
    # grab the list of images that we'll be describing
    print("[INFO] describing images...")
    image_dataset = "train/"
    imagePaths = list(paths.list_images(image_dataset))
elif train_test_split_data_done == True:
    train_dataset = "train/"
    test_dataset = "test/"
    # grab the list of images that we'll be describing
    print("[INFO] describing training images...")
    trainimagePaths = list(paths.list_images(train_dataset))
    print("[INFO] describing testing images...")
    testimagePaths = list(paths.list_images(test_dataset))
else:
    sys.exit(0)

[INFO] describing images...


In [4]:
# initialize the data matrix and labels list
trainingdata = []
traininglabels = []
testingdata = []
testinglabels = []
data = []
labels = []

In [5]:
# loop over the input images
if train_test_split_data_done == False:
    for (i, imagePath) in enumerate(imagePaths):
        # load the image and extract the class label (assuming that our
        # path as the format: /path/to/dataset/{class}.{image_num}.jpg
        image = cv2.imread(imagePath)
        label = imagePath.split(os.path.sep)[-1].split(".")[0]

        # construct a feature vector raw pixel intensities, then update
        # the data matrix and labels list
        features = image_to_feature_vector(image)
        data.append(features)
        labels.append(label)
    
        # show an update every 1,000 images
        if i > 0 and i % 1000 == 0:
            print("[INFO] processed {}/{}".format(i, len(imagePaths)))

    # encode the labels, converting them from strings to integers
    le = LabelEncoder()
    labels = le.fit_transform(labels)

elif train_test_split_data_done == True:
    for i, trainimagePath in enumerate(trainimagePaths):
        # load the image and extract the class label (assuming that our
        # path as the format: /path/to/dataset/{class}.{image_num}.jpg
        trainingimage = cv2.imread(trainimagePath)
        traininglabel = trainimagePath.split(os.path.sep)[-1].split(".")[0]
        
        # construct a feature vector raw pixel intensities, then update
        # the data matrix and labels list
        trainingfeatures = image_to_feature_vector(trainingimage)
        trainingdata.append(trainingfeatures)
        traininglabels.append(traininglabel)
        
        # show an update every 1,000 images
        if i > 0 and i % 1000 == 0:
            print("[INFO] processed {}/{}".format(i, len(trainimagePaths)))
            
    # encode the labels, converting them from strings to integers
    le = LabelEncoder()
    traininglabels = le.fit_transform(traininglabels)
    
    for i, testimagePath in enumerate(testimagePaths):
        # load the image and extract the class label (assuming that our
        # path as the format: /path/to/dataset/{class}.{image_num}.jpg
        testingimage = cv2.imread(testimagePath)
        testinglabel = testimagePath.split(os.path.sep)[-1].split(".")[0]
        
        # construct a feature vector raw pixel intensities, then update
        # the data matrix and labels list
        testingfeatures = image_to_feature_vector(testingimage)
        testingdata.append(testingfeatures)
        testinglabels.append(testinglabel)

        # show an update every 100 images
        if i > 0 and i % 100 == 0:
            print("[INFO] processed {}/{}".format(i, len(testimagePaths)))

    # encode the labels, converting them from strings to integers
    le = LabelEncoder()
    testinglabels = le.fit_transform(testinglabels)

[INFO] processed 1000/25001
[INFO] processed 2000/25001
[INFO] processed 3000/25001
[INFO] processed 4000/25001
[INFO] processed 5000/25001
[INFO] processed 6000/25001
[INFO] processed 7000/25001
[INFO] processed 8000/25001
[INFO] processed 9000/25001
[INFO] processed 10000/25001
[INFO] processed 11000/25001
[INFO] processed 12000/25001
[INFO] processed 13000/25001
[INFO] processed 14000/25001
[INFO] processed 15000/25001
[INFO] processed 16000/25001
[INFO] processed 17000/25001
[INFO] processed 18000/25001
[INFO] processed 19000/25001
[INFO] processed 20000/25001
[INFO] processed 21000/25001
[INFO] processed 22000/25001
[INFO] processed 23000/25001
[INFO] processed 24000/25001
[INFO] processed 25000/25001


In [6]:
# scale the input image pixels to the range [0, 1], then transform
# the labels into vectors in the range [0, num_classes] -- this
# generates a vector for each label where the index of the label
# is set to `1` and all other entries to `0`
if train_test_split_data_done == True:
    trainingdata = np.array(trainingdata) / 255.0
    traininglabels = np_utils.to_categorical(traininglabels, 2)
    testingdata = np.array(testingdata) / 255.0
    testinglabels = np_utils.to_categorical(testinglabels, 2)
        
elif train_test_split_data_done == False:
    data = np.array(data) / 255.0
    labels = np_utils.to_categorical(labels, 2)
else:
    pass

In [7]:
if train_test_split_data_done == False:
    # partition the data into training and testing splits, using 75%
    # of the data for training and the remaining 25% for testing
    print("[INFO] constructing training/testing split...")
    (trainData, testData, trainLabels, testLabels) = train_test_split(data, labels, test_size=0.25, random_state=42)
elif train_test_split_data_done == True:
    pass

[INFO] constructing training/testing split...


In [9]:
load_model = False

if load_model == False:
# define the architecture of the network
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(64,64,3), padding='same'))
    model.add(MaxPooling2D())
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D())
    model.add(Flatten())
    model.add(Dense(128, kernel_initializer="uniform", activation="relu"))
    model.add(Dropout(0.4))
    #model.add(Dense(128, kernel_initializer="uniform", activation="relu"))
    #model.add(Dropout(0.3))
    model.add(Dense(64, kernel_initializer="uniform", activation="relu"))
    model.add(Dropout(0.1))
    #model.add(Dense(32, kernel_initializer="uniform", activation="relu"))
    #model.add(Dropout(0.1))
    model.add(Dense(2))
    model.add(Activation("softmax"))
elif load_model == True:
    #load json and create model
    json_file = open('model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    model = model_from_json(loaded_model_json)
    #load weights into new model
    model.load_weights("model.h5")
    print("Loaded model from disk")
else:
    pass

In [10]:
should_train = True

from keras.callbacks import EarlyStopping

if train_test_split_data_done == False:
    if should_train == True:
        # train the model using SGD
        print("[INFO] compiling model...")
        sgd = SGD(lr=0.0001)
        model.compile(loss="binary_crossentropy", optimizer=sgd, metrics=["accuracy"])
        print(model.summary())

        model.fit(trainData.reshape(18750, 64, 64, 3), trainLabels, epochs=175*2, batch_size=128, verbose=1, validation_split=0.3,
                  callbacks=[EarlyStopping(monitor='val_acc', min_delta=0, patience=15, verbose=1, mode='max', baseline=None, restore_best_weights=True)])
    else:
        # Compile the model
        print("[INFO] compiling model...")
        sgd = SGD(lr=0.01)
        model.compile(loss="binary_crossentropy", optimizer=sgd, metrics=["accuracy"])

elif train_test_split_data_done == True:
    if should_train == True:
        # train the model using SGD
        print("[INFO] compiling model...")
        sgd = SGD(lr=0.0001)
        model.compile(loss="binary_crossentropy", optimizer=sgd, metrics=["accuracy"])
        model.fit(trainingdata, traininglabels, epochs=350, batch_size=256, verbose=1)
    else:
        pass

[INFO] compiling model...
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 64, 64, 32)        896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 64)        18496     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 16, 16, 64)        0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 16384)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               2097280   
_________________________________________________________________
dropout_1 (Dropout)          (None, 128)          

Epoch 105/350
Epoch 106/350
Epoch 107/350
Epoch 108/350
Epoch 109/350
Epoch 110/350
Epoch 111/350
Epoch 112/350
Epoch 113/350
Epoch 114/350
Epoch 115/350
Epoch 116/350
Epoch 117/350
Epoch 118/350
Epoch 119/350
Epoch 120/350
Epoch 121/350
Epoch 122/350
Epoch 123/350
Epoch 124/350
Epoch 125/350
Epoch 126/350
Epoch 127/350
Epoch 128/350
Epoch 129/350
Epoch 130/350
Epoch 131/350
Epoch 132/350
Epoch 133/350
Epoch 134/350
Epoch 135/350
Epoch 136/350
Epoch 137/350
Epoch 138/350
Epoch 139/350
Epoch 140/350
Epoch 141/350
Epoch 142/350
Epoch 143/350
Epoch 144/350
Epoch 145/350
Epoch 146/350
Epoch 147/350
Epoch 148/350
Epoch 149/350
Epoch 150/350
Epoch 151/350
Epoch 152/350
Epoch 153/350
Epoch 154/350
Epoch 155/350
Epoch 156/350
Epoch 157/350
Epoch 158/350
Epoch 159/350
Epoch 160/350
Epoch 161/350
Epoch 162/350
Epoch 163/350
Epoch 164/350
Epoch 165/350
Epoch 166/350
Epoch 167/350
Epoch 168/350
Epoch 169/350
Epoch 170/350
Epoch 171/350
Epoch 172/350
Epoch 173/350
Epoch 174/350
Epoch 175/350
Epoch 

Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350
Epoch 241/350
Epoch 242/350
Epoch 243/350
Epoch 244/350
Epoch 245/350
Epoch 246/350
Epoch 247/350
Epoch 248/350
Epoch 249/350
Epoch 250/350
Epoch 251/350
Epoch 252/350
Epoch 253/350
Epoch 254/350
Epoch 255/350
Epoch 256/350
Epoch 257/350
Epoch 258/350
Epoch 259/350
Epoch 260/350
Epoch 261/350
Epoch 262/350
Epoch 263/350
Epoch 264/350
Epoch 265/350
Epoch 266/350
Epoch 267/350
Epoch 268/350
Epoch 269/350
Epoch 270/350
Epoch 271/350
Epoch 272/350
Epoch 273/350
Epoch 274/350
Epoch 275/350
Epoch 276/350
Epoch 277/350
Epoch 278/350
Epoch 279/350
Epoch 280/350
Epoch 281/350
Epoch 282/350
Epoch 283/350
Epoch 284/350
Epoch 285/350
Epoch 286/350
Restoring model weights from the end of the best epoch
Epoch 00286: early st

In [21]:
should_test = True

if train_test_split_data_done == False:
    if should_test == True:
        # show the accuracy on the testing set
        print("[INFO] evaluating on testing set...")
        (loss, accuracy) = model.evaluate(testData.reshape(6251, 64, 64, 3), testLabels, batch_size=128, verbose=1)
        print("[INFO] loss={:.4f}, accuracy: {:.4f}%".format(loss, accuracy * 100))
    else:
        pass

elif train_test_split_data_done == True:
    if should_test == True:
        # show the accuracy on the testing set
        print("[INFO] evaluating on testing set...")
        (loss, accuracy) = model.evaluate(testingdata, testinglabels, batch_size=256, verbose=1)
        print("[INFO] loss={:.4f}, accuracy: {:.4f}%".format(loss, accuracy * 100))
    else:
        pass
else:
    pass

[INFO] evaluating on testing set...
[INFO] loss=0.4550, accuracy: 78.7074%


In [13]:
model_save = True

if model_save == True:
	# serialize model to JSON
    model_json = model.to_json()
    with open("model.json", "w") as json_file:
        json_file.write(model_json)
        # serialize weights to HDF5
        model.save_weights("model.h5")
        print("Saved model to disk")
else:
    pass

Saved model to disk


AttributeError: module 'string' has no attribute 'maketrans'