In [None]:
%load_ext autoreload
%autoreload 2

# Define training and test data

In [None]:
from satsense import WORLDVIEW3, SatelliteImage

cell_size = 20, 20

windows = (
    (50, 50),
    (100, 100),
    (200, 200),
)

train_files = (
    '/home/bandela/DynaSlum/Work/section_1_multiband.tif',
    '/home/bandela/DynaSlum/Work/section_2_multiband.tif',
)

test_files = (
    '/home/bandela/DynaSlum/Work/section_3_multiband.tif',
)

ground_truth_file = '/home/bandela/DynaSlum/Work/slum_approved.shp'

def get_image_iterator(files):
    return (SatelliteImage.load_from_file(f, WORLDVIEW3) for f in files)


# Define the set of features for classification

In [None]:
from satsense.features import (FeatureSet, NirNDVI, HistogramOfGradients, Pantex, Sift,
                               Lacunarity, Texton, sift_cluster, texton_cluster)

features = FeatureSet()

ndvi = NirNDVI(windows)
features.add(ndvi)

hog = HistogramOfGradients(windows)
features.add(hog)

pantex = Pantex(windows)
features.add(pantex)

lacunarity = Lacunarity(windows)
features.add(lacunarity)

sift = Sift(windows=windows, kmeans=sift_cluster(get_image_iterator(train_files)))
features.add(sift)

texton = Texton(windows=windows, kmeans=texton_cluster(get_image_iterator(train_files)))
features.add(texton)


# Compute and save features

In [None]:
import os

from satsense import extract_features_parallel, save_features
from satsense.generators import CellGenerator

def compute_features(filenames):
    paths = []
    for image in get_image_iterator(filenames):
        path = os.path.splitext(os.path.basename(image.name))[0] + os.sep
        paths.append(path)        
        if not os.path.exists(path):
            os.makedirs(path)
            generator = CellGenerator(image, cell_size)
            results = extract_features_parallel(features, generator)
            save_features(features, results, filename_prefix=path)
    return paths
        
train_data_paths = compute_features(train_files)

# Load training data

In [None]:
import numpy as np

from satsense import load_features
from satsense.image import Image
from satsense.util.mask import get_ndxi_mask, load_mask_from_shapefile, resample
from satsense.features import NirNDVI, WVSI
from satsense.generators import CellGenerator

labels = {
    'other': 0,
    'deprived_neighbourhood': 1,
    'vegetation': 2,
}

x_train = []
y_train = []

for path, image in zip(train_data_paths, get_image_iterator(train_files)):
    # Load feature vector
    feature_vector = load_features(features, path)
    label_vector = np.zeros(feature_vector.shape[:2], dtype=np.uint8)

    # Create vegetation labels
    generator = CellGenerator(image, cell_size)
    vegetation_mask = get_ndxi_mask(generator, NirNDVI)
    label_vector[vegetation_mask] = labels['vegetation']
    
    # Create deprived neighbourhood labels 
    ground_truth = load_mask_from_shapefile(ground_truth_file, image.shape, image.transform)
    ground_truth = resample(CellGenerator(Image(ground_truth), cell_size))
    label_vector[ground_truth] = labels['deprived_neighbourhood']
    
    # Create x_train and y_train
    feature_vector.shape = (-1, feature_vector.shape[2])
    label_vector.shape = (-1, )

    x_train.append(feature_vector)
    y_train.append(label_vector)
    
x_train = np.concatenate(x_train)
y_train = np.concatenate(y_train)

# Train a classifier

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

classifier = GradientBoostingClassifier(verbose=True)
    
classifier.fit(x_train, y_train)

# Load test data and assess performance

In [None]:
from sklearn.metrics import classification_report, matthews_corrcoef, confusion_matrix

test_data_paths = compute_features(test_files)

for path, image in zip(test_data_paths, get_image_iterator(test_files)):
    print('Performance on', image.name)
    # Create x_test
    x_test = load_features(features, path)
    shape = x_test.shape
    x_test.shape = (-1, shape[2])
    
    # Predict the labels
    y_pred = classifier.predict(x_test)
    
    # Create y_test
    y_test = np.zeros(shape[:2], dtype=np.uint8)
    # Create deprived neighbourhood labels 
    ground_truth = load_mask_from_shapefile(ground_truth_file, image.shape, image.transform)
    ground_truth = resample(CellGenerator(Image(ground_truth), cell_size))
    y_test[ground_truth] = labels['deprived_neighbourhood']
    # Create vegetation labels
    generator = CellGenerator(image, cell_size)
    vegetation_mask = get_ndxi_mask(generator, NirNDVI)
    y_test[vegetation_mask] = labels['vegetation']
    y_test.shape = (-1, )
    
    # Assess performance

    # Label the vegetation as buildings to create more accurate representation of the performance
    # y_pred[y_pred == labels['vegetation']] = labels['other']
    # y_test[y_test == labels['vegetation']] = labels['other']

    print(matthews_corrcoef(y_test, y_pred))
    print(classification_report(y_test, y_pred, labels=list(labels.values()), target_names=list(labels.keys())))
    print(confusion_matrix(y_test, y_pred))
