In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
# importing the necessary packages
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # for plotting
import tifffile as tiff # for reading tiff images

from tqdm.notebook import tqdm # for the beautiful progress-bars
from myfunctions import *

In [None]:
PATH_ORIGINAL_IMAGES = "../../../Nextcloud2/" # the folder with all the images
MSP_FOLDER = "./data/drone_msp_numpy/" # the folder with preprocessed MSP images
PATH_LABELS = "./data/labels_MSP_4classes.csv" # the file with labelled points
PATH_META_LANDCOVER = "./data/msp_meta_landcover.csv"
PATH_RESULT_JPG = "./data/MSP_masks_jpg/"
PATH_RESULT_MSP = './data/MSP_masks_tiff/'

# Creating the ou tput directories if they do not exist
if not os.path.exists(PATH_RESULT_MSP):  
    os.mkdir(PATH_RESULT_MSP)
if not os.path.exists(PATH_RESULT_JPG):  
    os.mkdir(PATH_RESULT_JPG)

In [None]:
drfiles = []
for path, subdirs, files in os.walk(PATH_ORIGINAL_IMAGES):
    for name in files:
        drfiles = drfiles + [os.path.join(path, name)]
grefiles = [drfiles[i] for i in range(len(drfiles)) if any(x in drfiles[i] for x in ["_nir"])]
grefiles = np.array(grefiles)

In [None]:
# Reading the metadata about whether soil, water or snow are present on the picture 
df = pd.read_csv(PATH_META_LANDCOVER, index_col=0)
dark_imgs = df.name[df.dark_image == 1] # Listing dark images
df.head()

In [None]:
# Reading out the labelled points (training data)
labdat = pd.read_csv(PATH_LABELS, header = None)
labdat = labdat[[3,0,2,1]]
labdat.columns = ["imname", "label", "x", "y"]
labdat["imname"] = [x.split("_false")[0] for x in labdat.imname.values]
labdat["region"] = [x.split("_")[1] for x in labdat.imname.values]
# Putting dark images in separate regions
labdat.loc[np.isin(labdat.imname,dark_imgs), "region"] = labdat.region[np.isin(labdat.imname,dark_imgs)] + "_dark"
labdat.head(5)

In [None]:
train_imnames = np.unique(labdat["imname"])
# Normalizing each image to its 99th percentile
for name_im in tqdm(train_imnames):
    msp_names = names_in_folder(name_im, MSP_FOLDER)
    msp = np.load(MSP_FOLDER + msp_names[0])
    inds_im = labdat.imname == name_im
    labdat.loc[inds_im, ["r","g", "reg", "nir"]] = msp[labdat[inds_im].x, labdat[inds_im].y, :]/np.quantile(msp, 0.99)
    
# Calculating the indexes
labdat["ndvi"] = (labdat["nir"] - labdat["r"])/(labdat["nir"] + labdat["r"])
labdat["nir_r"] = labdat["nir"] - labdat["r"]
labdat["sumb"] = labdat["nir"] + labdat["r"] + labdat["r"]
labdat.head(5)

In [None]:
from myfunctions import *

    

In [None]:
def classif_imname_region(name_im, labdat, grefiles, \
                          featurenames = ["ndvi", "nir_r", "sumb","r","nir","g", "reg"], excl_low_imp = True):
    
    ## Getting information abot the image
    regim = name_im.split("_")[1] # determining the region of the image
    nam = name_im.split("_msp")[0]
    metadat = df[df.name == nam]
    if metadat.dark_image.item() == 1:
        regim = regim + "_dark"
        
    ## Reading out an image
    msp_names = names_in_folder(name_im, MSP_FOLDER)
    msp = np.load(MSP_FOLDER + msp_names[0]) # reading out preprocessed image
    msp3 = image_minmax(msp[:,:,[3,0,1]]) # creating falsecolor image (NIR, red, green)
    
    
    ## Fitting Random Forest classifier and predicting the labels
    dct = fitting_rf_for_region(labdat, regim, featurenames, excl_low_imp) # fitting RF on training data
    clf = dct["clf"]
    featurenames = dct["featurenames"]
    
    dim = calculating_features(msp, featurenames) #calculating the indexes
    prlab = clf.predict(dim) # predicting labels for the image
    prlab = prlab.reshape((msp.shape[0],msp.shape[1])) # reshaping prediction back to the image shape
    

    # threshold for taking the superpixel, 0 if there is no water
    const_water = metadat.const_water.item() 
    # threshold for taking the superpixel, 0 if we don't want to add superpixels
    const_soil = metadat.const_soil.item()  
    
    if (const_water > 0) | (const_soil > 0): # if we use the superpixel postrocessing
        img, segm, centers_norm = slic_segm(msp3, n_segments=50000, compactness = 8) # Segmenting the image
        clust_segm1 = spat_segm(msp, img, segm, centers_norm, n_clust1=1000) # Clustering the superpixels

        if const_water > 0: # if tehre is water
            # we replace water by superpixels, containing (fraction of water) > const_water
            prlab = create_mask(prlab, clust_segm1, const_water, "water", replace = True) 
            
        if const_soil > 0: # if tehre is soil and we want to add superpixels
            # we add to soil the superpixels, containing (fraction of soil) > const_soil
            prlab = create_mask(prlab, clust_segm1, const_soil, "soil")
    
    ## Saving the masks
    for maskname in ["water", "soil", "snow"]:
        if metadat[maskname].item() == 1: # if this landcover is present, save the mask
            save_mask(prlab, maskname, nam, grefiles, PATH_RESULT_MSP)
        else: # else - replace it with vegetation
            prlab[prlab == maskname] = "vegetation"
        
    save_mask(prlab, "vegetation", nam, grefiles, PATH_RESULT_MSP)
    
    plot_clfres(prlab, msp3, name_im, path_result = PATH_RESULT_JPG) # plotting the result and saving image
    

In [None]:
for i, name_im in tqdm(enumerate(os.listdir(MSP_FOLDER))):
    print(i, name_im)
    %time classif_imname_region(name_im, labdat, grefiles)