In [1]:
%load_ext autoreload
%autoreload 2

# Define training and test data

In [5]:
cell_size = 200, 200

windows = (
    (50, 50),
#     (100, 100),
#     (200, 200),
)

train_files = (
    '/home/bandela/DynaSlum/Work/section_1_multiband.tif',
    '/home/bandela/DynaSlum/Work/section_2_multiband.tif',
)

test_files = (
    '/home/bandela/DynaSlum/Work/section_3_multiband.tif',
)

ground_truth_file = '/home/bandela/DynaSlum/Work/slum_approved.shp'

# Define the set of features for classification

In [3]:
from satsense.features import (NirNDVI, HistogramOfGradients, Pantex, Sift,
                               Lacunarity, Texton)
from satsense import Image

train_images = [Image(file, 'worldview3') for file in train_files]

ndvi = NirNDVI(windows)
hog = HistogramOfGradients(windows)
pantex = Pantex(windows)
lacunarity = Lacunarity(windows)
sift = Sift.from_images(windows, train_images)
texton = Texton.from_images(windows, train_images)

features = [
    ndvi,
    hog,
    pantex,
    lacunarity,
    sift,
    texton,
]


# Compute and save features

In [7]:
import os
from pathlib import Path

from satsense import extract_features_parallel
from satsense.generators import FullGenerator

def compute_features(filenames):
    paths = []
    for filename in filenames:
        image = Image(filename, 'worldview3')
        path = Path(filename).stem + os.sep
        paths.append(path)        
        if not os.path.exists(path):
            os.makedirs(path)
            generator = FullGenerator(image, cell_size)
            for feature_vector in extract_features_parallel(features, generator):
                feature_vector.save(path)
    return paths
        
train_data_paths = compute_features(train_files)

# Load training data

In [None]:
import numpy as np

from satsense import Image
from satsense.util.mask import get_ndxi_mask, load_mask_from_shapefile, resample
from satsense.features import NirNDVI, WVSI
from satsense.generators import FullGenerator

labels = {
    'other': 0,
    'deprived_neighbourhood': 1,
    'vegetation': 2,
}

x_train = []
y_train = []

for path, image in zip(train_data_paths, [Image(f, 'worldview3') for f in train_files]):
    # Load feature vector
    feature_vector = []
    for feature in features:
        fv = FeatureVector.from_file(feature, path)
        feature_vector.append(fv.vector)
    feature_vector = np.ma.vstack(feature_vector)

    label_vector = np.zeros(feature_vector.shape[:2], dtype=np.uint8)

    # Create vegetation labels
    generator = FullGenerator(image, cell_size)
    vegetation_mask = get_ndxi_mask(generator, NirNDVI)
    label_vector[vegetation_mask] = labels['vegetation']
    
    # Create deprived neighbourhood labels 
    ground_truth = load_mask_from_shapefile(ground_truth_file, image.shape, image.transform)
    ground_truth = resample(CellGenerator(Image(ground_truth), cell_size))
    label_vector[ground_truth] = labels['deprived_neighbourhood']
    
    # Create x_train and y_train
    feature_vector.shape = (-1, feature_vector.shape[2])
    label_vector.shape = (-1, )

    x_train.append(feature_vector)
    y_train.append(label_vector)
    
x_train = np.concatenate(x_train)
y_train = np.concatenate(y_train)

# Train a classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

classifier = GradientBoostingClassifier(verbose=True)
    
classifier.fit(x_train, y_train)

# Load test data and assess performance

In [None]:
from sklearn.metrics import classification_report, matthews_corrcoef, confusion_matrix

test_data_paths = compute_features(test_files)

for path, image in zip(test_data_paths, get_image_iterator(test_files)):
    print('Performance on', image.name)
    # Create x_test
    x_test = load_features(features, path)
    shape = x_test.shape
    x_test.shape = (-1, shape[2])
    
    # Predict the labels
    y_pred = classifier.predict(x_test)
    
    # Create y_test
    y_test = np.zeros(shape[:2], dtype=np.uint8)
    # Create deprived neighbourhood labels 
    ground_truth = load_mask_from_shapefile(ground_truth_file, image.shape, image.transform)
    ground_truth = resample(CellGenerator(Image(ground_truth), cell_size))
    y_test[ground_truth] = labels['deprived_neighbourhood']
    # Create vegetation labels
    generator = CellGenerator(image, cell_size)
    vegetation_mask = get_ndxi_mask(generator, NirNDVI)
    y_test[vegetation_mask] = labels['vegetation']
    y_test.shape = (-1, )
    
    # Assess performance

    # Label the vegetation as buildings to create more accurate representation of the performance
    # y_pred[y_pred == labels['vegetation']] = labels['other']
    # y_test[y_test == labels['vegetation']] = labels['other']

    print(matthews_corrcoef(y_test, y_pred))
    print(classification_report(y_test, y_pred, labels=list(labels.values()), target_names=list(labels.keys())))
    print(confusion_matrix(y_test, y_pred))
