<table style="font-size: 1em; padding: 0; margin: 0;">

<tr style="vertical-align: top; padding: 0; margin: 0;background-color: #ffffff">
        <td style="vertical-align: top; padding: 0; margin: 0; padding-right: 15px;">
    <p style="background: #182AEB; color:#ffffff; text-align:justify; padding: 10px 25px;">
        <strong style="font-size: 1.0em;"><span style="font-size: 1.2em;"><span style="color: #ffffff;">Deep Learning </span> for Satellite Image Classification</span> (Manning Publications)<br/>by <em>Daniel Buscombe</em></strong><br/><br/>
        <strong>> Chapter 3: Deliverable Solution </strong><br/>
    </p>   

Import libraries and test if Tensorflow sees an available GPU

In [None]:
%matplotlib inline
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.layers import Concatenate, Conv2DTranspose
from tensorflow.keras.models import Model
from random import shuffle
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import numpy as np
import json, os, glob
import zipfile

In [None]:
tf.test.is_gpu_available()

Use the unzip function from Part 2 to unzip the imagery

In [None]:
def unzip_nwpu(f):
    """
    f = file to be unzipped
    """    
    with zipfile.ZipFile(f, 'r') as zip_ref:
        zip_ref.extractall()
        
unzip_nwpu('NWPU_images.zip')        

Use the same workflow as in Part 2 to delete folders of non-lake imagery

In [None]:
os.rename('images','nwpu_images')
subdirecs = [x[0] for x in os.walk('nwpu_images')][1:]
to_delete = [s for s in subdirecs if 'lake' not in s]
for k in to_delete:
    shutil.rmtree(k, ignore_errors=True) 
os.rename('nwpu_images'+os.sep+'lake','nwpu_images'+os.sep+'data')    

Make directories for NWPU label images

In [None]:
os.mkdir('nwpu_label_images')
os.mkdir('nwpu_label_images'+os.sep+'data')

This function gets the X,Y polygon coordinates, creates a mask, and saves the mask to file. The function is composed mostly of commands shown in the Part 2 tutorial

In [None]:
def get_data(data):
    X = []; Y = [] #pre-allocate lists to fill in a for loop
    for k in data['regions']: #cycle through each polygon
        # get the x and y points from the dictionary
        X.append(data['regions'][k]['shape_attributes']['all_points_x'])
        Y.append(data['regions'][k]['shape_attributes']['all_points_y'])
    return Y,X #image coordinates are flipped relative to json coordinates


def write_mask(data, images, all_images, i):
    X, Y = get_data(data[images[i]])
    
    # get the dimensions of the image
    nx, ny, nz = np.shape(all_images[i])
    mask = np.zeros((ny,nx))
    
    for x,y in zip(X,Y):
        # the ImageDraw.Draw().polygon function we will use to create the mask
        # requires the x's and y's are interweaved, which is what the following
        # one-liner does    
        polygon = np.vstack((x,y)).reshape((-1,),order='F').tolist()
        
        # create a mask image of the right size and infill according to the polygon
        if nx>ny:
            x,y = y,x 
            img = Image.new('L', (ny, nx), 0)
        elif ny>nx:
            x,y = y,x 
            img = Image.new('L', (ny, nx), 0)            
        else:
            img = Image.new('L', (nx, ny), 0)
        ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
        # turn into a numpy array
        m = np.flipud(np.rot90(np.array(img)))
        try:
            mask = mask + m
        except:
            mask = mask + m.T
    matplotlib.image.imsave('nwpu_label_images'+os.sep+'data'+os.sep+images[i]+"_mask.jpg", mask.astype('uint8'))    

Load and merge the three VGG-JSON labels files and extract the list of sample images

In [None]:
data = []
json_file = '..'+os.sep+'2_Data'+os.sep+'nwpu_labels'+os.sep+'nwpu_lakes_30samples.json'
data.append(json.load(open(json_file)))
json_file = '..'+os.sep+'2_Data'+os.sep+'nwpu_labels'+os.sep+'nwpu_lakes_20samplesA.json'
data.append(json.load(open(json_file)))
json_file = '..'+os.sep+'2_Data'+os.sep+'nwpu_labels'+os.sep+'nwpu_lakes_20samplesB.json'
data.append(json.load(open(json_file)))

data_merged = {}
for d in data:
    data_merged.update(d)

images = sorted(data_merged.keys())
print(images)
print(len(images))

Load all the image samples into an array, like in the Part 2 tutorial

In [None]:
import rasterio
all_images = []
for image in images:
    with rasterio.open('nwpu_images'+os.sep+'data'+os.sep+image) as dataset:
        all_images.append(dataset.read().T)

Call ```write mask``` to write the image mask for each image sample, according to the information in ```data```, the json label structure

In [None]:
for i in range(len(images)):
    write_mask(data_merged, images, all_images, i)

Use the same image batch generator we used in Part 3 to generate batches of labels and masks

In [None]:
def image_batch_generator(files, batch_size = 32, sz = (512, 512)):
  
  while True: # this is here because it will be called repeatedly by the training function
    
    #extract a random subset of files of length "batch_size"
    batch = np.random.choice(files, size = batch_size)    
    
    #variables for collecting batches of inputs (x) and outputs (y)
    batch_x = []
    batch_y = []
    
    #cycle through each image in the batch
    for f in batch:

        #preprocess the raw images 
        rawfile = f'nwpu_images/data/{f}'
        raw = Image.open(rawfile)
        raw = raw.resize(sz)
        raw = np.array(raw)

        #check the number of channels because some of the images are RGBA or GRAY
        if len(raw.shape) == 2:
            raw = np.stack((raw,)*3, axis=-1)
        else:
            raw = raw[:,:,0:3]
            
        #get the image dimensions, find the min dimension, then square the image off    
        nx, ny, nz = np.shape(raw)
        n = np.minimum(nx,ny)
        raw = raw[:n,:n,:] 
            
        batch_x.append(raw)
        
        #get the masks. 
        maskfile = rawfile.replace('nwpu_images','nwpu_label_images')+'_mask.jpg'
        mask = Image.open(maskfile)
        # the mask is 3-dimensional so get the max in each channel to flatten to 2D
        mask = np.max(np.array(mask.resize(sz)),axis=2)
        # water pixels are always greater than 100
        mask = (mask>100).astype('int')
        
        mask = mask[:n,:n]

        batch_y.append(mask)

    #preprocess a batch of images and masks 
    batch_x = np.array(batch_x)/255. #divide image by 255 to normalize
    batch_y = np.array(batch_y)
    batch_y = np.expand_dims(batch_y,3) #add singleton dimension to batch_y

    yield (batch_x, batch_y) #yield both the image and t

Like in Part 3, specify a batch size, proportion to use for training, and make test and train generators

In [None]:
batch_size = 4
prop_train = 0.5

split = int(prop_train * len(images))

#split into training and testing
train_files = images[0:split]
test_files  = images[split:]

train_generator = image_batch_generator(train_files, batch_size = batch_size)
test_generator  = image_batch_generator(test_files, batch_size = batch_size)

Using augmented NWPU imagery

Set up mask and image generators like in Part 3, and merge them together

In [None]:
batch_size = 1

train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        featurewise_center=False,
        featurewise_std_normalization=False,
        shear_range=0,
        zoom_range=0,
        rotation_range=90,
        horizontal_flip=True)

img_generator = train_datagen.flow_from_directory(
        'nwpu_images',
        target_size=(128, 128),
        batch_size=batch_size,
        class_mode=None, seed=42, shuffle=False)

test_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        featurewise_center=False,
        featurewise_std_normalization=False,    
        shear_range=0,
        zoom_range=0,
        rotation_range=90,
        horizontal_flip=True)

mask_generator = test_datagen.flow_from_directory(
        'nwpu_label_images',
        target_size=(128, 128),
        batch_size=batch_size,
        class_mode=None, seed=42, shuffle=False)

train_generator = (pair for pair in zip(img_generator, mask_generator))

Check that worked by using the `next` command to get a random sample, then plot it like in Part 3 to see if the augmentation worked

In [None]:
x, y = next(train_generator)

%matplotlib inline
plt.imshow((x[0]).astype('uint8'), cmap='gray')
plt.imshow(np.max(y[0], axis=2)/255, cmap='gray', alpha=0.5)

Let's generate 500 augmented files

In [None]:
n_aug_files = 500

# merge the two generators together, scaling each image so it is scaled 0 -- 1
train_generator2 = (tuple(np.array(pair, dtype='float64')/255) for pair in zip(img_generator, mask_generator))

# create label files
counter = 0
while counter<n_aug_files:
    x, y = next(train_generator2)
    matplotlib.image.imsave('nwpu_label_images'+os.sep+'data'+os.sep+"augimage00"+str(counter)+".jpg_mask.jpg", np.squeeze(y[0])) 
    matplotlib.image.imsave('nwpu_images'+os.sep+'data'+os.sep+"augimage00"+str(counter)+".jpg", np.squeeze(x[0]))    
    counter += 1