In [1]:
import os
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from skimage.io import imread, imread_collection_wrapper, concatenate_images

In [2]:
import sys
sys.path.append("../")
from helpers.utils import load_image, tif_to_rgb
from config import TRAIN_PATH, VALIDATION_PATH

# Load data

In [3]:
def imread_tiff_rgb_nir(fname, *args, **kwargs):
    """returns tiff image with 4 channels (RGB + NIR)"""
    im = imread(fname, *args, **kwargs)
    # re-indexing is needed because channels are stored as b,g,r,nir
    return im[:, :, [2,1,0,3]]    

In [4]:
img_file_pattern = "*.tif"
train_imgs_path = os.path.join(TRAIN_PATH, img_file_pattern)
"""
it is necessary to create custom imread collection function which reads images with the 'imread' function
in order to obtain the raw values from the tif image. 
The default imread_collection function returns images that are uncorrectly scaled between 0 and 255
"""
imread_collection_custom = imread_collection_wrapper(imread_tiff_rgb_nir)
train_imgs = imread_collection_custom(train_imgs_path, conserve_memory=True)

# Feature extraction

## spectral features

In [9]:
from sklearn.base import TransformerMixin
from sklearn.pipeline import FeatureUnion

In [6]:
class BaseFeatureExtractor(TransformerMixin):
    def __init__(self):
        self.pixels_axis = (1, 2)
    
    def fit(self, imgs, y=None):
        raise NotImplementedError
    
    def transform(self, imgs, y=None):
        raise NotImplementedError

class SpectralFeatureExtractor(BaseFeatureExtractor):
    """
    extracts mean and standard deviation of every color channel in the image (RGB)
    and the brightness, where brightness is defined as the mean of all color channels

    Parameters
    ----------
    imgs : numpy.ndarray
           set of images, each with 4 channels (R,G,B,NIR)
    """
    def __init__(self):
        super().__init__()
    
    def fit(self, imgs, y=None):
        return self
    
    def transform(self, imgs, y=None):
        imgs = imgs[:, :, :, :3] # extract color channels
        rgb_means = np.mean(imgs, axis=self.pixels_axis)
        brightness = np.mean(rgb_means, axis=1)
        brightness = np.reshape(brightness, (-1, 1))
        rgb_sds = np.std(imgs, axis=self.pixels_axis)

        return np.concatenate((rgb_means, brightness, rgb_sds), axis=1)

class NDVIFeatureExtractor(BaseFeatureExtractor):
    """
    extracts normalized difference vegatation index from multispectral image

    Parameters
    ----------
    imgs : numpy.ndarray
           set of images, each with 4 channels (R,G,B,NIR)
    """
    def __init__(self):
        super().__init__()
    
    def fit(self, imgs, y=None):
        return self
    
    def transform(self, imgs, y=None):
        red = imgs[:, :, :, 0]
        nir = imgs[:, :, :, 3]

        ndvi = (nir-red)/(nir+red)
        
        # scale ndvi between -1 and 1
        # scaler = MinMaxScaler(feature_range=(-1, 1)) 
        # ndvi = scaler.fit_transform(ndvi)

        ndvi_means = np.mean(ndvi, axis=self.pixels_axis)
        ndvi_sds = np.std(ndvi, axis=self.pixels_axis)
        ndvi_means = np.reshape(ndvi_means, (-1, 1))
        ndvi_sds = np.reshape(ndvi_sds, (-1, 1))
        return np.concatenate((ndvi_means, ndvi_sds), axis=1)
        

In [31]:
feature_extractor = FeatureUnion(transformer_list=[
    ("spectral", SpectralFeatureExtractor()),
    ("ndvi", NDVIFeatureExtractor())
])

In [34]:
subset = concatenate_images(train_imgs[:1])
features = feature_extractor.fit_transform(subset)

# model

In [38]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier

In [39]:
# warm_start required for out-of-core learning (in batches)
classifier = RandomForestClassifier(n_estimators=500, warm_start=True)
system = Pipeline([
    ('features', feature_pipeline),
    ('classifier', classifier)
])