In [None]:
## Created By Konstantinos Fokeas
## credits to: @DigitalSreeni

In [None]:
import numpy as np
import cv2
import pandas as pd
from scipy import ndimage as nd
 
import pickle
from matplotlib import pyplot as plt
import os
from skimage.io import imread, imshow
from skimage.filters import roberts, sobel, scharr, prewitt
import glob

In [None]:
####################################################################
## STEP 1:   READ TRAINING IMAGES AND EXTRACT FEATURES  - Sentinel 1
################################################################
image_dataset_s1 = pd.DataFrame()  #Dataframe to capture image features

img_path = "/content/S1Hand/" ## CHANGE ME!!!
for image in os.listdir(img_path):#iterate through each file 
    df = pd.DataFrame()  #Temporary data frame to capture information for each loop.
    #Reset dataframe to blank after each loop.

    vv = imread(img_path + image)[0:1,:,:].reshape((128,128)) 
    vh = imread(img_path + image)[1:2,:,:].reshape((128,128))
    vv_vh = (vv/vh)
 
    #Add pixel values to the data frame
    vv_values = vv.reshape(-1)
    df['VV'] = vv_values   #Pixel value itself as a feature
            
    vh_values = vh.reshape(-1)
    df['VH'] = vh_values   
            
    vv_vh_values = vv_vh.reshape(-1)
    df['VV/VH'] = vv_vh_values   

    #MEDIAN with sigma=3
    median_img = nd.median_filter(vh, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median_s3'] = median_img1

    #VARIANCE with size=3
    variance_img = nd.generic_filter(vh, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance_s3'] = variance_img1  

    #ROBERTS EDGE
    edge_roberts = roberts(vh)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1

    ######################################                    
    #Update dataframe for images to include details for each image in the loop
    image_dataset_s1 = image_dataset_s1.append(df)

In [None]:
####################################################################
## STEP 1:   READ TRAINING IMAGES AND EXTRACT FEATURES - Sentinel 2
################################################################
image_dataset_s2 = pd.DataFrame()  #Dataframe to capture image features

img_path = "/content/S2Hand/" # CHANGE ME!!!
for image in os.listdir(img_path):
    df = pd.DataFrame()  #Temporary data frame to capture information for each loop.
    #Reset dataframe to blank after each loop.

    blue = imread(img_path + image)[0:1,:,:].reshape((128,128))    
    green = imread(img_path + image)[1:2,:,:].reshape((128,128))
    red = imread(img_path + image)[2:3,:,:].reshape((128,128))
    nir = imread(img_path + image)[3:4,:,:].reshape((128,128))
    swir = imread(img_path + image)[4:5,:,:].reshape((128,128))
        
    # Sentinel-2 MNDWI = (B03 - B08) / (B03 + B08)
    ndwi = (red - swir)/ (red + swir)
    ndwi_values = ndwi.reshape(-1)
    df['NDWI'] = ndwi_values  

    # Sentinel 2 NDVI = (NIR - RED) / (NIR + RED), where RED is B4 and NIR is B8
    ndvi = (nir - red)/ (red + nir)
    ndvi_values = ndvi.reshape(-1)
    df['NDVI'] = ndvi_values 

    #MEDIAN with sigma=3
    median_img = nd.median_filter(nir, size=3)
    median_img1 = median_img.reshape(-1)
    df['Median_s3'] = median_img1

   #VARIANCE with size=3
    variance_img = nd.generic_filter(nir, np.var, size=3)
    variance_img1 = variance_img.reshape(-1)
    df['Variance_s3'] = variance_img1  

   #ROBERTS EDGE
    edge_roberts = roberts(nir)
    edge_roberts1 = edge_roberts.reshape(-1)
    df['Roberts'] = edge_roberts1

  #START ADDING DATA TO THE DATAFRAME  

    blue_values = blue.reshape(-1)
    df['Blue'] = blue_values  
            
    green_values = green.reshape(-1)
    df['Green'] = green_values   
             
        
    image_dataset_s2 = image_dataset_s2.append(df)    

In [None]:
###########################################################
# CONCATENATE S2 AND S1 DATAFRAMES
##########################################################
frames = [image_dataset_s1, image_dataset_s2]

image_dataset = pd.concat(frames,axis=1,join='outer')

In [None]:
###########################################################
# STEP 2: READ LABELED IMAGES (MASKS) AND CREATE ANOTHER DATAFRAME
    # WITH LABEL VALUES AND LABEL FILE NAMES
##########################################################
mask_dataset = pd.DataFrame()  #Create dataframe to capture mask info.
# label_path = "/content/S1Label" #iterate through each file to perform some action

mask_path = "/content/S1OtsuLabelHand/" ### CHANGE ME!!!
for mask in os.listdir(mask_path):
    df2 = pd.DataFrame()  #Temporary dataframe to capture info for each mask in the loop
    input_mask = imread(mask_path + mask)
    label = input_mask
    #Add pixel values to the data frame
    label_values = label.reshape(-1)
    df2['Label_Value'] = label_values
    df2['Mask_Name'] = mask
    mask_dataset = mask_dataset.append(df2)  #Update mask dataframe with all the info from each mask

In [None]:
################################################################
 #  STEP 3: GET DATA READY FOR RANDOM FOREST (or other classifier)
 # COMBINE BOTH DATAFRAMES INTO A SINGLE DATASET
###############################################################
dataset = pd.concat([image_dataset, mask_dataset], axis=1)    #Concatenate both image and mask datasets

#If you expect image and mask names to be the same this is where we can perform sanity check
#dataset['Image_Name'].equals(dataset['Mask_Name'])   
##
##If we do not want to include pixels with value 0 
##e.g. Sometimes unlabeled pixels may be given a value 0.
# dataset.replace([np.inf, -np.inf], np.nan, inplace=True)
# dataset.fillna(999, inplace=True)
dataset.fillna(-1, inplace=True)
dataset = dataset[dataset.Label_Value != -1]
dataset.dropna()
# dataset = dataset.reset_index()
# dataset.replace([np.inf, -np.inf], np.nan, inplace=True)
# dataset.fillna(999, inplace=True)


#Assign training features to X and labels to Y
#Drop columns that are not relevant for training (non-features)
X = dataset.drop(labels = ["Mask_Name", "Label_Value"], axis=1) 

#Assign label values to Y (our prediction)
Y = dataset["Label_Value"].values 

In [None]:
## Sanity Check
print(X.shape)
print(Y.shape)

In [None]:
################################################################
 #  STEP : Scale The Features
###############################################################
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(X)
X_scaled = scaler.transform(X)

In [None]:
##Split data into train and test to verify accuracy after fitting the model. 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.3) #random_state=20

In [None]:
####################################################################
# STEP 4: Define the classifier and fit a model with our training data
###################################################################

#Import training classifier
from sklearn.ensemble import RandomForestClassifier
## Instantiate model with n number of decision trees
model = RandomForestClassifier(n_estimators = 20, verbose=10)

## Train the model on training data
model.fit(X_train, y_train)

In [None]:
#######################################################
# STEP 5: Accuracy check - BINARY CLASSIFICATION
#########################################################

from sklearn import metrics
prediction_test = model.predict(X_test)
##Check accuracy on test dataset. 
print ("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))
# print("IOU = ", metrics.jaccard_score(y_test, prediction_test))
print("Precision = ", metrics.precision_score(y_test, prediction_test))
print("Recall = ", 	metrics.recall_score(y_test, prediction_test))
print("F1 score = ", metrics.f1_score(y_test, prediction_test))

In [None]:
#######################################################
# STEP 5: Accuracy check - MULTILABEL CLASSIFICATION
#########################################################

from sklearn import metrics
prediction_test = model.predict(X_test)
##Check accuracy on test dataset. 
print ("Accuracy = ", metrics.accuracy_score(y_test, prediction_test))
print("IOU = ", metrics.jaccard_score(y_test, prediction_test, average ="micro"))
print("Precision = ", metrics.precision_score(y_test, prediction_test, average ="micro"))
print("Recall = ", 	metrics.recall_score(y_test, prediction_test, average ="micro"))
print("F1 score = ", metrics.f1_score(y_test, prediction_test, average ="micro"))

In [None]:
#######################################################
# STEP 6: Plot Feature Importance
#########################################################

# plt.barh(image_dataset.columns, model.feature_importances_)
fig=plt.figure(figsize=(10,10))
sorted_idx = model.feature_importances_.argsort()
plt.barh(image_dataset.columns[sorted_idx], model.feature_importances_[sorted_idx])
# plt.margins(x=0, y=-0.10) 
plt.xlabel("Random Forest Feature Importance")