In [None]:
%load_ext autoreload
%autoreload 2

# Define training and test data

In [None]:
from pathlib import Path

sampling_step_size = 10, 10

windows = (
    (50, 50),
    (100, 100),
    (200, 200),
)

home = Path.home()
data = home / 'DynaSlum' / 'Work'

train_files = (
    data / 'section_1_multiband.tif',
    data / 'section_2_multiband.tif',
)

test_files = (
    data / 'section_3_multiband.tif',
)

ground_truth_file = data / 'slum_approved.shp'

# Path where temporary files are saved
work = home / 'satsense_notebook'

# Define the set of features for classification

In [None]:
from satsense.features import (NirNDVI, HistogramOfGradients, Pantex, Sift,
                               Lacunarity, Texton)
from satsense import Image

train_images = [Image(file, 'worldview3') for file in train_files]

ndvi = NirNDVI(windows)
hog = HistogramOfGradients(windows)
pantex = Pantex(windows)
lacunarity = Lacunarity(windows)
sift = Sift.from_images(windows, train_images)
texton = Texton.from_images(windows, train_images)

features = [
    ndvi,
    hog,
    pantex,
    lacunarity,
    sift,
    texton,
]


# Compute and save features

In [None]:
import os
from pathlib import Path

from satsense import extract_features
from satsense.generators import FullGenerator

def compute_features(filenames):
    paths = []
    for filename in filenames:
        image = Image(filename, 'worldview3')
        path = str(work / Path(filename).stem) + os.sep
        paths.append(path)        
        if not os.path.exists(path):
            os.makedirs(path)
            generator = FullGenerator(image, sampling_step_size)
            for feature_vector in extract_features(features, generator):
                feature_vector.save(path)
    return paths
        
train_data_paths = compute_features(train_files)

# Load training data

In [None]:
import numpy as np

from satsense import Image, FeatureVector
from satsense.util.mask import get_ndxi_mask, load_mask_from_shapefile, resample, save_mask2file
from satsense.features import NirNDVI, WVSI
from satsense.generators import FullGenerator

def load_feature_vector(features, path):
    """Load feature values from file."""
    feature_vector = []
    for feature in features:
        vector = FeatureVector.from_file(feature, path).vector
        # flatten window/feature_size dimensions
        vector.shape = (vector.shape[0], vector.shape[1], -1)
        feature_vector.append(vector)
    feature_vector = np.ma.dstack(feature_vector)
    return feature_vector

def load_ground_truth(filename, sampling_step_size, path, shape, crs, transform):
    ground_truth = load_mask_from_shapefile(filename, shape, transform)
    mask_file = path / 'ground_truth_mask.tif'
    ground_truth_mask = save_mask2file(ground_truth, mask_file, crs, transform)
    ground_truth_image = Image(mask_file, 'monochrome', normalization_parameters=False)
    ground_truth = resample(FullGenerator(ground_truth_image, sampling_step_size))
    return ground_truth

labels = {
    'other': 0,
    'deprived_neighbourhood': 1,
    'vegetation': 2,
}

x_train = []
y_train = []

for path, image in zip(train_data_paths, train_images):
    print("Processing", image.filename)
    # Load feature vector
    feature_vector = load_feature_vector(features, path)
    
    label_vector = np.zeros(feature_vector.shape[:2], dtype=np.uint8)

    # Create deprived neighbourhood labels
    ground_truth = load_ground_truth(
        ground_truth_file, sampling_step_size, path, image.shape, image.crs, image.transform)
    label_vector[ground_truth] = labels['deprived_neighbourhood']

    # Create vegetation labels
    generator = FullGenerator(image, sampling_step_size)
    vegetation_mask = get_ndxi_mask(generator, NirNDVI)
    label_vector[vegetation_mask] = labels['vegetation']

    # Create x_train and y_train
    feature_vector.shape = (-1, feature_vector.shape[2])
    label_vector.shape = (-1, )

    x_train.append(feature_vector)
    y_train.append(label_vector)
    
x_train = np.concatenate(x_train)
y_train = np.concatenate(y_train)

# Train a classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

classifier = GradientBoostingClassifier(verbose=True)
    
classifier.fit(x_train, y_train)

# Load test data and assess performance

In [None]:
from sklearn.metrics import classification_report, matthews_corrcoef, confusion_matrix

test_data_paths = compute_features(test_files)

test_images = [Image(f, 'worldview3') for f in test_files]

for path, image in zip(test_data_paths, test_images):
    print('Performance on', image.filename)
    # Create x_test
    x_test = load_feature_vector(features, path)
    shape = x_test.shape
    x_test.shape = (-1, shape[2])
    
    # Predict the labels
    y_pred = classifier.predict(x_test)
    
    # Create y_test
    y_test = np.zeros(shape[:2], dtype=np.uint8)
    
    # Create deprived neighbourhood labels 
    ground_truth = load_ground_truth(
        ground_truth_file, sampling_step_size, path, image.shape, image.crs, image.transform)
    y_test[ground_truth] = labels['deprived_neighbourhood']

    # Create vegetation labels
    generator = FullGenerator(image, sampling_step_size)
    vegetation_mask = get_ndxi_mask(generator, NirNDVI)
    y_test[vegetation_mask] = labels['vegetation']
    y_test.shape = (-1, )
    
    # Assess performance

    # Label the vegetation as buildings to create more accurate representation of the performance
    # y_pred[y_pred == labels['vegetation']] = labels['other']
    # y_test[y_test == labels['vegetation']] = labels['other']

    print(matthews_corrcoef(y_test, y_pred))
    print(classification_report(y_test, y_pred, labels=list(labels.values()), target_names=list(labels.keys())))
    print(confusion_matrix(y_test, y_pred))
