In [1]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
import numpy as np
from glob import glob


Using TensorFlow backend.


In [2]:
# define function to load train, test, and validation datasets
def load_dataset(path):
    data = load_files(path)
    block_files = np.array(data['filenames'])
    block_targets = np_utils.to_categorical(np.array(data['target']), 6)
    return block_files, block_targets

In [3]:
# load train, test, and validation datasets
train_files, train_targets = load_dataset('block_image/train')
valid_files, valid_targets = load_dataset('block_image/valid')
test_files, test_targets = load_dataset('block_image/test')

In [4]:
# load list of dog names
block_names = [item[20:-1] for item in sorted(glob("block_image/train/*/"))]

# print statistics about the dataset
print('There are %d total block categories.' % len(block_names))
print('There are %s total block images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training block images.' % len(train_files))
print('There are %d validation block images.' % len(valid_files))
print('There are %d test block images.'% len(test_files))

There are 6 total block categories.
There are 1080 total block images.

There are 989 training block images.
There are 61 validation block images.
There are 30 test block images.


In [5]:
from keras.preprocessing import image                  
from tqdm import tqdm

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(28, 28))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

In [6]:
train_files[:10]

array(['block_image/train/000.none/block_010912462113.jpg',
       'block_image/train/000.none/block_010912462615.jpg',
       'block_image/train/000.none/block_0109124517.jpg',
       'block_image/train/000.none/block_01091246264.jpg',
       'block_image/train/000.none/block_010912474417.jpg',
       'block_image/train/005.gray/block_01091246160.jpg',
       'block_image/train/001.red/block_010912473118.jpg',
       'block_image/train/003.blue/block_010912474115.jpg',
       'block_image/train/000.none/block_01091246272.jpg',
       'block_image/train/003.blue/block_01091245211.jpg'], dtype='<U51')

In [7]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# pre-process the data for Keras
train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
test_tensors = paths_to_tensor(test_files).astype('float32')/255



100%|██████████| 989/989 [00:00<00:00, 3011.61it/s]
100%|██████████| 61/61 [00:00<00:00, 2536.41it/s]
100%|██████████| 30/30 [00:00<00:00, 1929.75it/s]


In [8]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

model = Sequential()


model.add( Conv2D(16,  kernel_size = (2, 2),  input_shape=(28, 28, 3), activation = 'relu', strides = 1, padding = 'valid'))
model.add( MaxPooling2D(pool_size = (2, 2),  strides = 2))
model.add( Conv2D(32, kernel_size = (2, 2),  activation = 'relu', strides = 1, padding = 'valid'))
model.add( MaxPooling2D(pool_size = (2, 2),  strides = 2))
model.add( Conv2D(64, kernel_size = (2, 2),  activation = 'relu', strides = 1, padding = 'valid'))
model.add( MaxPooling2D(pool_size = (2, 2),  strides = 2))
model.add( GlobalAveragePooling2D())
model.add( Dense(units = 6, activation = 'softmax'))

model.summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 27, 27, 16)        208       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 13, 13, 16)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 12, 12, 32)        2080      
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 6, 6, 32)          0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 5, 5, 64)          8256      
_________________________________________________________________
max_pooling2d_3 (MaxPooling2 (None, 2, 2, 64)          0         
_________________________________________________________________
glob

In [9]:
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
from keras.callbacks import ModelCheckpoint  

epochs = 10


checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.from_scratch.hdf5', 
                               verbose=1, save_best_only=True)

model.fit(train_tensors, train_targets, 
          validation_data=(valid_tensors, valid_targets),
          epochs=epochs, batch_size=2, callbacks=[checkpointer], verbose=1)

Instructions for updating:
Use tf.cast instead.
Train on 989 samples, validate on 61 samples
Epoch 1/10

Epoch 00001: val_loss improved from inf to 1.99904, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 2/10

Epoch 00002: val_loss improved from 1.99904 to 1.56544, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 3/10

Epoch 00003: val_loss improved from 1.56544 to 1.35717, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 4/10

Epoch 00004: val_loss improved from 1.35717 to 0.88855, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 5/10

Epoch 00005: val_loss did not improve from 0.88855
Epoch 6/10

Epoch 00006: val_loss improved from 0.88855 to 0.51308, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 7/10

Epoch 00007: val_loss improved from 0.51308 to 0.30369, saving model to saved_models/weights.best.from_scratch.hdf5
Epoch 8/10

Epoch 00008: val_loss did not improve from 0.30369
Epoch 9/10

Epoch

<keras.callbacks.History at 0x1a3fd18be0>

In [11]:

model.load_weights('saved_models/weights.best.from_scratch.hdf5')

In [12]:
data = load_files('block_image/input')
input_files = np.array(data['filenames'])
input_tensors = paths_to_tensor(input_files).astype('float32')/255

100%|██████████| 4/4 [00:00<00:00, 1125.23it/s]


In [13]:
#input_tensors
result = model.predict_proba(input_tensors, batch_size = 2)
result


array([[6.09406754e-07, 5.48371696e-04, 9.68392134e-01, 1.99562754e-03,
        7.57026501e-05, 2.89875828e-02],
       [2.26313250e-06, 2.26825569e-02, 2.00284690e-01, 3.79657256e-03,
        4.40179184e-03, 7.68832088e-01],
       [2.51757548e-09, 6.00552484e-02, 3.50624009e-06, 1.85531723e-09,
        9.39939260e-01, 2.00714157e-06],
       [9.99764502e-01, 1.10290175e-05, 4.78616494e-05, 4.94509732e-05,
        1.26930434e-04, 9.98632643e-08]], dtype=float32)

In [14]:
# get index of predicted dog breed for each image in test set
block_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]

# report test accuracy
test_accuracy = 100*np.sum(np.array(block_predictions)==np.argmax(test_targets, axis=1))/len(block_predictions)
print('Test accuracy: %.4f%%' % test_accuracy)

Test accuracy: 90.0000%
