In [None]:
import os
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from skimage.io import imread, imread_collection_wrapper

In [None]:
import sys
sys.path.append("../")
from helpers.utils import load_image, tif_to_rgb
from config import TRAIN_PATH, VALIDATION_PATH

# Load data

In [None]:
img_file_pattern = "*.tif"
train_imgs_path = os.path.join(TRAIN_PATH, img_file_pattern)
"""
it is necessary to create custom imread collection function which reads images with the 'imread' function
in order to obtain the raw values from the tif image. 
The default imread_collection function returns images that are uncorrectly scaled between 0 and 255
"""
imread_collection_custom = imread_collection_wrapper(imread)
train_imgs = imread_collection_custom(train_imgs_path, conserve_memory=True)

# Feature extraction

## spectral features

In [None]:
def extract_spectral_features(im_rgb):
    """
    extracts mean and SD of every color channel in the image (RGB) and the brightness,
    where brightness is defined as the mean of all color channels

    Parameters
    ----------
    im_rgb : numpy.ndarray
             image with 3 channels (RGB)
    """
    pixels_axis = tuple(range(im_rgb.ndim-1))
    mean_r, mean_g, mean_b = np.mean(im_rgb, axis=pixels_axis)
    brightness = np.mean([mean_r, mean_g, mean_b])
    std_r, std_g, std_b = np.std(im_rgb, axis=pixels_axis)
    
    return mean_r, mean_g, mean_b, std_r, std_g, std_b, brightness

# function to obtain NDVI
def extract_ndvi(im_rgb_nir):
    """
    extracts normalized difference vegatation index from multispectral image

    Parameters
    ----------
    im_rgb_nir : numpy.ndarray
                 image with 4 channels (RGB + NIR)
    """
    red = im_rgb_nir[0]
    nir = im_rgb_nir[3]
    ndvi = (nir-red)/(nir+red)
    # scale ndvi between -1 and 1
    scaler = MinMaxScaler(feature_range=(-1, 1)) 
    # WARNING: you must fit the scaler only to the training data, and use its transform for both train and test
    # replace when you construct the complete pipeline
    ndvi = scaler.fit_transform(ndvi)
    
    ndvi_mean = np.mean(ndvi)
    ndvi_std = np.std(ndvi)
    return ndvi_mean, ndvi_std

In [None]:
im = load_image('train_1.tif')
im_rgb = tif_to_rgb(im)

In [None]:
mean_r, mean_g, mean_b, std_r, std_g, std_b, brightness = extract_spectral_features(im_rgb)

In [None]:
ndvi_mean, ndvi_std = extract_ndvi(im)

# model

In [None]:
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=500)