https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

In [2]:
import os
import glob
import numpy as np

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.inception_v3 import InceptionV3, preprocess_input

datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

Using TensorFlow backend.


In [3]:
train_dir='../data'
val_dir='../validation'
nb_epoch=5
batch_size=32

IM_WIDTH, IM_HEIGHT = 299, 299 #fixed size for InceptionV3

FC_SIZE = 1024
NB_IV3_LAYERS_TO_FREEZE = 249


In [4]:
def get_nb_files(directory):
  """Get number of files by searching directory recursively"""
  if not os.path.exists(directory):
    return 0
  cnt = 0
  labels=[]
  i=0
  for r, dirs, files in os.walk(directory):
    for dr in dirs:
      newCount = len(glob.glob(os.path.join(r, dr + "/*")))
      i+=1
      cnt+= newCount
      labels.append([i,newCount])
  labels = np.array([[l[0]]*l[1] for l in labels])
  return cnt, labels

In [5]:
nb_train_samples, train_labels = get_nb_files(train_dir)
nb_val_samples, val_labels = get_nb_files(val_dir)
nb_classes = len(glob.glob(val_dir + "/*"))

print("Loaded %d training images, %d validation images, spanning over %d classes."%(nb_train_samples, nb_val_samples, nb_classes))

Loaded 58236 training images, 14517 validation images, spanning over 84 classes.


In [6]:
model = InceptionV3(weights='imagenet', include_top=False)

In [7]:
batch_size = 16

generator = datagen.flow_from_directory(
        train_dir,
        target_size=(IM_WIDTH, IM_HEIGHT),
        batch_size=batch_size,
        class_mode=None,  # this means our generator will only yield batches of data, no labels
        shuffle=False)  # our data will be in order, so all first 1000 images will be cats, then 1000 dogs
# the predict_generator method returns the output of a model, given
# a generator that yields batches of numpy data
bottleneck_features_train = model.predict_generator(generator, nb_train_samples)
# save the output as a Numpy array
np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train)

generator = datagen.flow_from_directory(
        val_dir,
        target_size=(150, 150),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)
bottleneck_features_validation = model.predict_generator(generator, nb_val_samples)
np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation)


Found 58172 images belonging to 84 classes.


  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
Exception in thread Thread-4:
Traceback (most recent call last):
  File "/usr/local/Cellar/python/2.7.13/Fr

ValueError: Cannot feed value of shape () for Tensor u'input_1:0', which has shape '(?, ?, ?, 3)'

In [None]:
train_data = np.load(open('bottleneck_features_train.npy'))
# the features were saved in order, so recreating the labels is easy

#
# RECREATE TRAIN_LABELS HERE
#
#train_labels = np.array([0] * 1000 + [1] * 1000)

validation_data = np.load(open('bottleneck_features_validation.npy'))

#
# RECREATE VALIDATION_LABELS HERE
#
#validation_labels = np.array([0] * 400 + [1] * 400)

model = Sequential()
model.add(Flatten(input_shape=train_data.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='rmsprop',
              loss='binary_crossentropy',
              metrics=['accuracy'])

model.fit(train_data, train_labels,
          epochs=50,
          batch_size=batch_size,
          validation_data=(validation_data, val_labels))
model.save_weights('bottleneck_fc_model.h5')