In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
# importing the necessary packages
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt # for plotting
import tifffile as tiff # for reading tiff images

from tqdm.notebook import tqdm # for the beautiful progress-bars
from myfunctions import *

In [None]:
PATH_ORIGINAL_IMAGES = "../../../Nextcloud2/" # the folder with all the images
RGB_FOLDER = "./data/drone_rgb/" # the folder with preprocessed RGB images
PATH_LABELS = "./data/labels_RGB_4classes.csv" # the file with labelled points
PATH_META_LANDCOVER = "./data/meta_rgb_landcover.csv"
PATH_RESULT_JPG = "./data/RGB_masks_jpg/"
PATH_RESULT_RGB = './data/RGB_masks_tiff/'

# Creating the ou tput directories if they do not exist
if not os.path.exists(PATH_RESULT_RGB):  
    os.mkdir(PATH_RESULT_RGB)
if not os.path.exists(PATH_RESULT_JPG):  
    os.mkdir(PATH_RESULT_JPG)

In [None]:
drfiles = []
for path, subdirs, files in os.walk(RGB_FOLDER):
    for name in files:
        drfiles = drfiles + [os.path.join(path, name)]
grefiles = [drfiles[i] for i in range(len(drfiles)) if any(x in drfiles[i] for x in ["_rgb"])]
grefiles = np.array(grefiles)
len(grefiles)

In [None]:
# Reading the metadata about whether soil, water or snow are present on the picture 
df = pd.read_csv(PATH_META_LANDCOVER, index_col=0)
df.head()

In [None]:
# Reading out the labelled points (training data)
labdat = pd.read_csv(PATH_LABELS, header = None)
labdat = labdat[[3,0,2,1]]
labdat.columns = ["imname", "label", "x", "y"]
labdat["imname"] = [x.split("_rgb")[0] for x in labdat.imname.values]
labdat["region"] = [x.split("_")[1] for x in labdat.imname.values]
labdat.head(5)

In [None]:
train_imnames = np.unique(labdat["imname"])
# Normalizing each image to its 99th percentile
for name_im in tqdm(train_imnames):
    rgb_names = names_in_folder(name_im, RGB_FOLDER)[0]
    im = tiff.imread(RGB_FOLDER + rgb_names)/255
    inds_im = labdat.imname == name_im
    labdat.loc[inds_im, ["r","g", "b"]] = im[labdat[inds_im].x, labdat[inds_im].y, :]/np.quantile(im, 0.99)
    
# Calculating the indexes
labdat["rg"] = (labdat["r"])/(labdat["g"])
labdat["br"] = (labdat["b"])/(labdat["r"] + labdat["g"] + labdat["b"])
labdat["sumb"] = labdat["r"] + labdat["g"] + labdat["b"]
labdat.head(5)

In [None]:
def classif_imname_region(name_im, labdat, grefiles, \
                          featurenames = ["rg", "br", "sumb","r","g", "b"], excl_low_imp = True):
    
    ## Getting information abot the image
    regim = name_im.split("_")[1] # determining the region of the image
    nam = name_im.split("_rgb")[0]
    metadat = df[df.name == name_im]
        
    ## Reading out an image
    rgb_names = names_in_folder(name_im, RGB_FOLDER)[0]
    im = tiff.imread(RGB_FOLDER + rgb_names)/255 # reading out preprocessed image
    
    
    ## Fitting Random Forest classifier and predicting the labels
    dct = fitting_rf_for_region(labdat, regim, featurenames, excl_low_imp) # fitting RF on training data
    clf = dct["clf"]
    featurenames = dct["featurenames"]
    
    dim = calculating_features_rgb(im, featurenames) # calculating the indexes
    prlab = clf.predict(dim) # predicting labels for the image
    prlab = prlab.reshape((im.shape[0],im.shape[1])) # reshaping prediction back to the image shape
    

    # threshold for taking the superpixel, 0 if there is no water
    const_water = metadat.const_water.item() 
    # threshold for taking the superpixel, 0 if we don't want to add superpixels
    const_soil = metadat.const_soil.item()  
    
    if (const_water > 0) | (const_soil > 0): # if we use the superpixel postrocessing
        img, segm, centers_norm = slic_segm(im, n_segments=50000, compactness = 8) # Segmenting the image
        clust_segm1 = spat_segm(im, img, segm, centers_norm, n_clust1=1000) # Clustering the superpixels

        if const_water > 0: # if tehre is water
            # we replace water by superpixels, containing (fraction of water) > const_water
            prlab = create_mask(prlab, clust_segm1, const_water, "water", replace = True) 
            
        if const_soil > 0: # if tehre is soil and we want to add superpixels
            # we add to soil the superpixels, containing (fraction of soil) > const_soil
            prlab = create_mask(prlab, clust_segm1, const_soil, "soil")
    
    ## Saving the masks
    for maskname in ["water", "soil", "snow"]:
        if metadat[maskname].item() == 1: # if this landcover is present, save the mask
            save_mask(prlab, maskname, nam, grefiles, PATH_RESULT_RGB)
        else: # else - replace it with vegetation
            prlab[prlab == maskname] = "vegetation"
        
    save_mask(prlab, "vegetation", nam, grefiles, PATH_RESULT_RGB)
    
    plot_clfres(prlab, im*255, name_im, path_result = PATH_RESULT_JPG) # plotting the result and saving image
    

In [None]:
for i, name_im in tqdm(enumerate(os.listdir(RGB_FOLDER)[0:2])):
    print(i, name_im)
    %time classif_imname_region(name_im, labdat, grefiles)