## Source: https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html

In [1]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K

import os
import math
import random
import shutil

Using TensorFlow backend.


In [2]:
LABELED_IMAGES_DIRECTORY = 'static/labeled_images/'

WATER_DIRECTORY = os.path.join(LABELED_IMAGES_DIRECTORY, 'water')
NO_WATER_DIRECTORY = os.path.join(LABELED_IMAGES_DIRECTORY, 'no_water')

TRAIN_IMAGES_DIRECTORY = 'model_data/train'
VAL_IMAGES_DIRECTORY = 'model_data/val'
TEST_IMAGES_DIRECTORY = 'model_data/test/test_images'

In [3]:
# dimensions of our images.
img_width, img_height = 1000, 1000

total_train_samples = 1000
total_val_test_samples = 1000
batch_size = 20
epochs = 50

if K.image_data_format() == 'channels_first':
    input_shape = (3, img_width, img_height)
else:
    input_shape = (img_width, img_height, 3)

In [4]:
def copy_image_subset(target_directory, file_indices, label, target_has_label=True):
    for ind in file_indices:
        filename = filenames[ind]
        current_filepath = os.path.join(LABELED_IMAGES_DIRECTORY, label, filename)
        
        if target_has_label:
            target_filepath = os.path.join(target_directory, label, filename)
        else:
            target_filepath = os.path.join(target_directory, filename)
            
        shutil.copyfile(current_filepath, target_filepath)

In [5]:
labeled_directories = [(WATER_DIRECTORY, 'water'),
                       (NO_WATER_DIRECTORY, 'no_water')]

for directory, label in labeled_directories:
    filenames = os.listdir(directory)
    num_files = len(filenames)
    all_file_indices = list(range(num_files))
    random.shuffle(all_file_indices)

    num_train_samples = math.floor(num_files * 0.6)
    num_val_test_samples = math.floor((num_files - num_train_samples) * 0.5)

    val_data_start = num_train_samples
    test_data_start = num_train_samples + num_val_test_samples

    copy_image_subset(TRAIN_IMAGES_DIRECTORY, all_file_indices[:val_data_start], label)
    copy_image_subset(VAL_IMAGES_DIRECTORY, all_file_indices[val_data_start:test_data_start], label)
    copy_image_subset(TEST_IMAGES_DIRECTORY, all_file_indices[test_data_start:], label, target_has_label=False)

In [6]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

In [7]:
# this is the augmentation configuration we will use for training
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

# this is the augmentation configuration we will use for testing:
# only rescaling
val_test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    TRAIN_IMAGES_DIRECTORY,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

val_generator = val_test_datagen.flow_from_directory(
    VAL_IMAGES_DIRECTORY,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

test_generator = val_test_datagen.flow_from_directory(
    TEST_IMAGES_DIRECTORY,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 18 images belonging to 2 classes.
Found 12 images belonging to 2 classes.
Found 0 images belonging to 0 classes.


In [None]:
model.fit_generator(
    train_generator,
    steps_per_epoch=total_train_samples // batch_size,
    epochs=epochs,
    validation_data=val_generator,
    validation_steps=total_val_test_samples // batch_size)

model.save_weights('weights.h5')

Epoch 1/50


In [None]:
model.evaluate_generator(generator=test_generator)