In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
np.random.seed(42)

#from keras.layers import merge
from src.models.catdata import *
from src.models.catmodel import *
from src.models.catsegnet import *
from src.data.utils import get_tile_prefix
from src.models.metrics_img import auc_roc, auc_pr, auc_pr_multiclass, dummy_metric
#import rasterio.plot as rioplot
import matplotlib
import matplotlib.pyplot as plt
#import matplotlib.image as mpimg

from pathlib import Path
import os, shutil
import sys
%matplotlib inline

import pandas as pd

import math

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split



from keras.callbacks import RemoteMonitor, TensorBoard, ReduceLROnPlateau, EarlyStopping, History
from keras import backend as keras



Using TensorFlow backend.


In [2]:
def selectData(tNoRoad = 0.95, tPRoad = 0, tURoad = 0, tEmpty = 0):
    df = pd.read_csv('../visualize_imagery/numOfPixPerClassPerTile_256.csv').drop(['Unnamed: 0'], axis = 1)
    sdf = df[((df['relative_noRoad']) < tNoRoad) 
             & ((df['relative_pavedRoad']) > tPRoad)
             & ((df['relative_unpavedRoad']) > tURoad)].reset_index(drop=True)
    
    if tEmpty > 0:
        edf = df[(df['relative_noRoad'] == 1)].sample(frac=tEmpty)
        sdf = pd.concat([sdf, edf]).reset_index(drop=True)
    
    sdf = shuffle(sdf, random_state=42)
    
    train_tmp, test = train_test_split(sdf, test_size=0.2, random_state=42)
    train, valid = train_test_split(train_tmp, test_size=0.2, random_state=42)
    
    return train, valid, test   

In [3]:
train, valid, test = selectData()

In [4]:
print("Train set")
print(train.region.value_counts())
print("")
print("Validation set")
print(valid.region.value_counts())
print("")
print("Test set")
print(test.region.value_counts())

Train set
Harz      568
Borneo     28
Name: region, dtype: int64

Validation set
Harz      143
Borneo      6
Name: region, dtype: int64

Test set
Harz      179
Borneo      8
Name: region, dtype: int64


In [5]:
labeldict = {
    0: train.numPixel_noRoad.mean(),
    1: train.numPixel_pavedRoad.mean(),
    2: train.numPixel_unpavedRoad.mean()
}

In [6]:
def create_class_weight(labels_dict,mu=0.25):
    total = sum(labels_dict.values())
    keys = labels_dict.keys()
    class_weight = dict()
    weights = []
    for key in keys:
        score = math.log(mu*total/float(labels_dict[key]))
        class_weight[key] = score if score > 1.0 else 1.0
        weights.append(score if score > 1.0 else 1.0)

    return weights

In [7]:
create_class_weight(labeldict)

[1.0, 1.4764483023549686, 1.9546758688621175]

In [8]:
def categoricalCrossentropy(y_noisy, y_pred, the0, the1):
    '''
    Calculate the class-weighted categorical cross-entropy for the given
    predicted and true sets.

    y_true [in] The truth set to test against. This is a Tensor with a last
                dimension that contains a set of 1-of-N selections.
    y_pred [in] The predicted set to test against. This is a Tensor with a last
                dimension that contains a set of 1-of-N selections.
    returns     A Tensor function that will calculate the weighted categorical
                cross-entropy on the inputs.
    '''

    P = np.array([[1, the1, the1], 
                  [the0, 1, the1], 
                  [the1, the0, 1]])
    P = np.array([[     1, 0,      0], 
                  [     0, 1,   the0], 
                  [  the1, 0,      1]])
    

    # If weights are defined, multiply the truth values by the class weights.
    #
    if P is not None:
        # Wrap the loss weights in a tensor object.
        #
        p =  np.linalg.inv(P)
        theWeights =  K.constant(p, shape=p.shape)

        y_true = K.dot(y_noisy,theWeights)

    # Get the cross-entropy and return it.
    #
    crossEntropy = K.categorical_crossentropy(y_true, y_pred)

    return crossEntropy

def noisy_loss(the0, the1):    
    def loss(y_true, y_pred):
        return categoricalCrossentropy(y_true, y_pred, the0, the1)
    return loss

In [9]:
original_dataset_dir = "../../data/train_raw"
raw_images_path = "../../data/raw/images"
dirs = []

base_dir = "../../data"

train_dir = os.path.join(base_dir, "train")
dirs.append(train_dir)
validation_dir = os.path.join(base_dir, "validate")
dirs.append(validation_dir)
test_dir = os.path.join(base_dir, "test")
dirs.append(test_dir)


In [10]:
for directory in dirs:
    if not os.path.exists(directory):
        os.mkdir(directory)


In [11]:
def should_make_tiles_from(r_analytic_name):
    is_analytic_tif = r_analytic_name.endswith(
        ('AnalyticMS.tif', 'AnalyticMS_SR.tif', 'AnalyticMS.tiff', 'AnalyticMS_SR.tiff')
    )
    return is_analytic_tif 

def make_datasets():
    file_prefixes = [ get_tile_prefix(r_analytic.name) 
                      for r_analytic in Path(raw_images_path).iterdir()  
                        if  should_make_tiles_from(r_analytic.name)
                    ]
    print(file_prefixes)

    # copy files to train dir
    train_fnames = train.name.values
    for fname in train_fnames:
        for file_type in ["sat", "map", "sat_rgb"]:
            src = os.path.join(original_dataset_dir, file_type, fname)
            dest = os.path.join(train_dir, file_type, fname)
            if(os.path.exists(src)):
                shutil.copy(src, dest)

    # copy files to validation dir
    validation_fnames = valid.name.values
    for fname in validation_fnames:
        for file_type in ["sat", "map", "sat_rgb"]:
            print(fname)
            src = os.path.join(original_dataset_dir, file_type, fname)
            dest = os.path.join(validation_dir, file_type, fname)
            if(os.path.exists(src)):
                shutil.copy(src, dest)
    # copy files to test dir
    test_fnames = test.name.values
    for fname in test_fnames:
        for file_type in ["sat", "map", "sat_rgb"]:
            src = os.path.join(original_dataset_dir, file_type, fname)
            dest = os.path.join(test_dir, file_type, fname)
            if(os.path.exists(src)):
                shutil.copy(src, dest)

    # print overview
    for directory in dirs:
        for file_type in ["sat", "map", "sat_rgb"]:
            target = os.path.join(directory, file_type)
            print(target, ":", len(os.listdir(target)))

    print("Done.")

In [12]:
#make_datasets()

In [13]:
def train_generator(batch_size, data_gen_args,imgdatagen_dir,target_size):
    return trainGenerator(
            batch_size,'../../data/train','sat','map',
            data_gen_args,
            save_to_dir = imgdatagen_dir,
            image_color_mode="rgba",
            target_size=target_size,
            flag_multi_class=True,
            num_class=3)
            

def validation_generator(batch_size, data_gen_args,imgdatagen_dir,target_size):
    return trainGenerator(
            batch_size,'../../data/validate','sat','map',
            data_gen_args, 
            save_to_dir = None, 
            image_color_mode="rgba", 
            target_size=target_size, 
            flag_multi_class=True,
            num_class=3)

In [14]:
# paths to append
sys.path.append("/home/ubuntu/roaddetection/")

# ------------- image characteristics and augmentation -----------------------------

def model_name(model, th0, th1, batch_size, epochs):
    MODELDIR = '../../models/'    


    mname = 'multicat_' + model + '_NL_th0-' + str(th0) \
            + '_th1-' + str(th1) + '_bs-' + str(batch_size) \
            + '_ep-' + str(epochs)
    
    versions = []
    for file in Path(MODELDIR).iterdir():
        print(file.name)
        if file.name.startswith((mname)):
            print('found it')
            versions.append(int(file.name.rsplit(mname+'_r-')[1].split('.')[0]))
            print(versions)
    latest = 1
    if len(versions) > 0:
        latest = np.max(versions) + 1
    model_name =  MODELDIR + mname + '_r-' + str(latest) + '.hdf5'
    pretrained = ''
    if(latest > 1):
        pretrained =  MODELDIR + mname + '_r-' + str(latest-1) + '.hdf5'
    return model_name, pretrained

In [15]:
def go(model = 'unet', th0 = 0.01, th1 = 0.02, target_size = (256, 256), batch_size = 3, epochs = 5, data_aug = True, pretrained = False):
    
    if data_aug:
        data_gen_args = dict(
            data_format="channels_last",
            horizontal_flip=True,
            vertical_flip=True
        )
    

    
    trained_model_fn, pretrained_model_fn = model_name(model, th0, th1, batch_size, epochs)
        
    print("... training model ", trained_model_fn)

    
    steps_per_epoch = len(train) // batch_size

    validation_steps = len(valid) // batch_size
    
    
    optimizer    = Adam(lr=1e-4)
    loss_weights = None
    metrics      = ['accuracy', auc_pr_multiclass]
    
    imgdatagen_dir = None
    
        
    train_gen = train_generator(batch_size, data_gen_args,imgdatagen_dir,target_size)    
    validation_gen = validation_generator(batch_size, data_gen_args,imgdatagen_dir,target_size)    
    
    width, height = target_size
    if model == 'unet':
        model = unet((width, height, 4), 3)
    if model == 'segnet':
        model = segnet((width, height, 4), 3)
        
    model.compile(optimizer=optimizer,
                  loss=noisy_loss(th0, th1),#0.01, 0.08),
                  loss_weights=loss_weights,
                  metrics=metrics)
    #model.summary()
    if (pretrained and len(pretrained_model_fn) > 0):
        print('... loading the pretrained model', pretrained_model_fn)
        model.load_weights(pretrained_model_fn)
    model_checkpoint = ModelCheckpoint(trained_model_fn, monitor='loss',verbose=1, save_best_only=True)
    #RemoteMonitor(root='http://35.240.18.144:9000', path='/publish/epoch/end/', field='data', headers=None, send_as_json=False)
    leaning_rate = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, verbose=0, mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)
    early_stop = EarlyStopping(monitor='loss', min_delta=0, patience=10, verbose=1, mode='auto', baseline=None)
    hist = History()

    class_weights = create_class_weight(labeldict)

    history = model.fit_generator(
        train_gen,
        steps_per_epoch=steps_per_epoch,
        epochs=epochs,
        class_weight=class_weights,
        callbacks=[model_checkpoint, leaning_rate, early_stop, hist],
        validation_data=validation_gen,
        validation_steps=validation_steps
    )

    return history, model

In [None]:
for t0 in [0., 0.01, 0.02]:
    for t1 in [0., 0.01, 0.02, 0.04, 0.08]:

        for i in range(5):
            history, model = go(batch_size = 3, epochs = 15, pretrained = True, th0=t0, th1=t1)
            del history
            del model
            keras.clear_session()

multiCat_segnet_0907_1115_e-15_r-4.hdf5
multicat_unet_NL_th0-0.01_th1-0.02_bs-3_ep-1_r-2.hdf5
multicat_unet_NL_th0-0.01_th1-0.02_bs-3_ep-1_r-1.hdf5
multiCat_segnet_0907_1115_e-15_r-5.hdf5
multiCat_segnet_0907_1115_e-15_r-1.hdf5
.ipynb_checkpoints
.gitkeep
multiCat_segnet_0907_1115_e-15_r-3.hdf5
multiCat_segnet_0907_1115_e-15_r-2.hdf5
multicat_unet_NL_th0-0.01_th1-0.02_bs-3_ep-1_r-3.hdf5
... training model  ../../models/multicat_unet_NL_th0-0.0_th1-0.0_bs-3_ep-15_r-1.hdf5


  conv10 = layers.Conv2D(nClasses, 1, 1, activation='relu',border_mode='same')(conv9)
  model = Model(input=inputs, output=conv10)


Epoch 1/15
Found 149 images belonging to 1 classes.Found 596 images belonging to 1 classes.

Found 149 images belonging to 1 classes.
Found 596 images belonging to 1 classes.
 13/198 [>.............................] - ETA: 1:40 - loss: 0.6505 - acc: 0.9103 - auc_pr_multiclass: 0.0434

In [None]:
import matplotlib.pyplot as plt

def plot_history(history):
    plt.plot(history["acc"], label="acc")
    plt.plot(history["val_acc"], label="val_acc")
    plt.legend()
    plt.show()
    plt.close()
    
    plt.plot(history["loss"], label="loss")
    plt.plot(history["val_loss"], label="val_loss")
    plt.legend()
    plt.show()
    plt.close()

    plt.plot(history["auc_pr_multiclass"], label="auc_pr_multiclass")
    plt.plot(history["val_auc_pr_multiclass"], label="val_auc_pr_multiclass")
    plt.legend()
    plt.show()
    plt.close()

plot_history(history.history)

In [None]:
#testGene = testGenerator("../../data/test/sat",target_size=(256, 256),as_gray=False)
#n = 0
#for img, name in testGene:
#    results = model.predict(img, batch_size=1)
#    saveResult("../../data/test/predict", results, name, True, 3)
#    n += 1
#    if(n>300):
#        break