In [1]:
import gzip
import os
import sys
import urllib
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from PIL import Image
import numpy as n

# Extract patches from a given image
def img_crop(im, w, h):
    list_patches = []
    imgwidth = im.shape[0]
    imgheight = im.shape[1]
    is_2d = len(im.shape) < 3
    for i in range(0,imgheight,h):
        for j in range(0,imgwidth,w):
            if is_2d:
                im_patch = im[j:j+w, i:i+h]
            else:
                im_patch = im[j:j+w, i:i+h, :]
            list_patches.append(im_patch)
    return list_patches

def extract_data(filename, num_images, IMG_PATCH_SIZE):
    """Extract the images into a 4D tensor [image index, y, x, channels].
    Values are rescaled from [0, 255] down to [-0.5, 0.5].
    """
    imgs = []
    for i in range(1, num_images+1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            print ('Loading ' + image_filename)
            img = mpimg.imread(image_filename)
            imgs.append(img)
        else:
            print ('File ' + image_filename + ' does not exist')

    num_images = len(imgs)
    IMG_WIDTH = imgs[0].shape[0]
    IMG_HEIGHT = imgs[0].shape[1]
    N_PATCHES_PER_IMAGE = (IMG_WIDTH/IMG_PATCH_SIZE)*(IMG_HEIGHT/IMG_PATCH_SIZE)

    img_patches = np.asarray([img_crop(imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)])
    data = np.asarray([img_patches[i][j] for i in range(len(img_patches)) for j in range(len(img_patches[i]))])

    return np.asarray(data)

def value_to_class(v):
    foreground_threshold = 0.25 # percentage of pixels > 1 required to assign a foreground label to a patch
    df = np.sum(v)
    if df > foreground_threshold:
        return [1, 0] #              *****  category matrix
    else:
        return [0, 1]

def extract_labels(filename, num_images, IMG_PATCH_SIZE):
    """Extract the labels into a 1-hot matrix [image index, label index]."""
    gt_imgs = []
    for i in range(1, num_images+1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            print ('Loading ' + image_filename)
            img = mpimg.imread(image_filename)
            gt_imgs.append(img)
        else:
            print ('File ' + image_filename + ' does not exist')

    num_images = len(gt_imgs)
    gt_patches = [img_crop(gt_imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)]
    data = np.asarray([gt_patches[i][j] for i in range(len(gt_patches)) for j in range(len(gt_patches[i]))])
    labels = np.asarray([value_to_class(np.mean(data[i])) for i in range(len(data))])

    # Convert to dense 1-hot representation.
    return labels.astype(np.float32)

# Convert array of labels to an image
def label_to_img(imgwidth, imgheight, w, h, labels):
    array_labels = np.zeros([imgwidth, imgheight])
    idx = 0
    for i in range(0,imgheight,h):
        for j in range(0,imgwidth,w):
            if labels[idx] > 0.5:
                l = 0
            else:
                l = 1
            array_labels[j:j+w, i:i+h] = l
            idx = idx + 1
    return array_labels

In [2]:
from __future__ import print_function
import numpy as np
np.random.seed(1337)  # for reproducibility

from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.utils import np_utils
from keras.models import load_model
from keras.optimizers import SGD
from keras import backend as K

NUMBER_IMGS = 100
TRAIN_RATIO = 0.8
NUM_CHANNELS = 3

batch_size = 128
nb_classes = 2


# ********** Tuning parameters: (See Network architecture as well)

# size of patch of an image to be used as input and output of the neural net
IMG_PATCH_SIZE = 8
# Epochs to be trained
nb_epoch = 20
# number of convolutional filters to use
nb_filters = 64
# size of pooling area for max pooling
pool_size = (2, 2)
# convolution kernel size
kernel_size = (4, 3)

input_shape = (IMG_PATCH_SIZE, IMG_PATCH_SIZE, NUM_CHANNELS)

def train_cnn(model_name='test.h5'):

    # ***************** HANDLE THE DATA **********************************
    data_dir = 'training/'
    train_data_filename = data_dir + 'images/'
    train_labels_filename = data_dir + 'groundtruth/' 

    # Extract data into numpy arrays.
    data = extract_data(train_data_filename, NUMBER_IMGS, IMG_PATCH_SIZE)
    labels = extract_labels(train_labels_filename, NUMBER_IMGS, IMG_PATCH_SIZE)
    #print(train_data.shape)
    #print(train_labels.shape)

    # Create train and test sets
    idx = np.random.permutation(np.arange(data.shape[0]))
    train_size = int(TRAIN_RATIO*data.shape[0])
    X_train = data[idx[:train_size]]
    Y_train = labels[idx[:train_size]]
    X_test = data[idx[train_size:]]
    Y_test = labels[idx[train_size:]]

    """
    # Balancing the class VS. class_weight during traing?
    c0 = 0
    c1 = 0
    for i in range(len(Y_train)):
        if Y_train[i][0] == 1:
            c0 = c0 + 1
        else:
            c1 = c1 + 1
    print ('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1))

    print ('Balancing training data...')
    min_c = min(c0, c1)
    idx0 = [i for i, j in enumerate(Y_train) if j[0] == 1]
    idx1 = [i for i, j in enumerate(Y_train) if j[1] == 1]
    new_indices = idx0[0:min_c] + idx1[0:min_c]
    print (len(new_indices))
    print (Y_train.shape)
    X_train = X_train[new_indices,:,:,:]
    Y_train = Y_train[new_indices]

    train_size = Y_train.shape[0]

    c0 = 0
    c1 = 0
    for i in range(len(Y_train)):
        if Y_train[i][0] == 1:
            c0 = c0 + 1
        else:
            c1 = c1 + 1
    print ('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1))

    """
    # **************** DEFINE THE MODEL ARCHITECTURE *******************

    model = Sequential()

    # Convolution layer with rectified linear activation
    model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1],
                            border_mode='same',
                            input_shape=input_shape))
    model.add(Activation('relu'))

    # Second convolution
    model.add(Convolution2D(nb_filters, kernel_size[1], kernel_size[0]))
    model.add(Activation('relu'))

    model.add(Dropout(0.25))

    # Third convolution
    model.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[0]))
    model.add(Activation('relu'))

    # Pooling and dropout
    model.add(MaxPooling2D(pool_size=pool_size))
    model.add(Dropout(0.25))

    # Full-connected layer
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation('relu'))

    model.add(Dense(1024))
    model.add(Activation('relu'))

    # Dropout to avoid overfitting
    model.add(Dropout(0.25))

    model.add(Dense(1024))
    model.add(Activation('relu'))

    # Dropout to avoid overfitting
    model.add(Dropout(0.5))

    #Fully-connected layer to ouptut the resulting class
    model.add(Dense(nb_classes))
    model.add(Activation('softmax'))

    model.compile(loss='binary_crossentropy', optimizer='adadelta', metrics=['fmeasure'])

    #sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    #model.compile(loss='binary_crossentropy',
    #                    optimizer=sgd,
    #                    metrics=['fmeasure'])

    #class_weight = auto??
    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, class_weight='auto', verbose=1, validation_data=(X_test, Y_test))


    score = model.evaluate(X_test, Y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])

    model.save('models/' + model_name)

    """
    data_dir = 'test_set_images/'
    pred_dir = 'predictions/'
    for i in range(1, 51):
        imageid = "test_%.1d" % i
        image_filename = data_dir + imageid + ".png"
        if os.path.isfile(image_filename):
            print ('Predicting' + image_filename)
            img = mpimg.imread(image_filename)

            data = np.asarray(img_crop(img, IMG_PATCH_SIZE, IMG_PATCH_SIZE))

            predictions_patch = model.predict_classes(data, verbose=1)

            img_prediction = label_to_img(img.shape[0], img.shape[1], 
                                          IMG_PATCH_SIZE, IMG_PATCH_SIZE, 
                                          predictions_patch)

            pimg = Image.fromarray((img_prediction*255.0).astype(np.uint8))
            pimg.save(pred_dir + "prediction_" + str(i) + ".png")

        else:
            print ('File ' + image_filename + ' does not exist')
    """

Using TensorFlow backend.


In [10]:
train_cnn()

Loading training/images/satImage_001.png
Loading training/images/satImage_002.png
Loading training/images/satImage_003.png
Loading training/images/satImage_004.png
Loading training/images/satImage_005.png
Loading training/images/satImage_006.png
Loading training/images/satImage_007.png
Loading training/images/satImage_008.png
Loading training/images/satImage_009.png
Loading training/images/satImage_010.png
Loading training/images/satImage_011.png
Loading training/images/satImage_012.png
Loading training/images/satImage_013.png
Loading training/images/satImage_014.png
Loading training/images/satImage_015.png
Loading training/images/satImage_016.png
Loading training/images/satImage_017.png
Loading training/images/satImage_018.png
Loading training/images/satImage_019.png
Loading training/images/satImage_020.png
Loading training/images/satImage_021.png
Loading training/images/satImage_022.png
Loading training/images/satImage_023.png
Loading training/images/satImage_024.png
Loading training

KeyboardInterrupt: 

In [6]:
X_train.shape

(200000, 8, 8, 3)

In [11]:
X_train[:20,:,:,:]

array([[[[ 0.62352943,  0.60000002,  0.5411765 ],
         [ 0.50196081,  0.47843137,  0.42745098],
         [ 0.59215689,  0.56470591,  0.49803922],
         ..., 
         [ 0.56078434,  0.53725493,  0.47450981],
         [ 0.56862748,  0.54509807,  0.47843137],
         [ 0.53725493,  0.52156866,  0.4627451 ]],

        [[ 0.56470591,  0.53725493,  0.47058824],
         [ 0.47843137,  0.45490196,  0.40000001],
         [ 0.50980395,  0.48235294,  0.42352942],
         ..., 
         [ 0.53725493,  0.51372552,  0.44705883],
         [ 0.53333336,  0.51372552,  0.4509804 ],
         [ 0.53333336,  0.51372552,  0.45490196]],

        [[ 0.61176473,  0.58039218,  0.52156866],
         [ 0.60392159,  0.57254905,  0.51764709],
         [ 0.58431375,  0.5529412 ,  0.49803922],
         ..., 
         [ 0.63529414,  0.60392159,  0.5411765 ],
         [ 0.64705884,  0.61960787,  0.56078434],
         [ 0.60784316,  0.57647061,  0.51764709]],

        ..., 
        [[ 0.01176471,  0.01568628,

In [7]:
Y_train.shape

(200000, 2)

In [13]:
Y_train[:20,:]

array([[ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 1.,  0.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.],
       [ 1.,  0.],
       [ 0.,  1.]], dtype=float32)

In [8]:
X_test.shape

(50000, 8, 8, 3)

In [9]:
Y_test.shape

(50000, 2)