In [None]:
import tensorflow as tf  
import matplotlib.pyplot as plt
import tifffile as tiff
import numpy as np
from PIL import Image
import fiona
import datetime
from tensorflow.keras import models
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import keras
import tensorflow_addons as tfa
import rasterio
import os
import pandas as pd
import cv2

import geopandas as gpd

from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

from FUNCTIONS_Seedlings import *

## CUDA

In [None]:
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

In [None]:
import tensorflow as tf

# Get the list of physical devices
physical_devices = tf.config.list_physical_devices('GPU')

# Configure memory growth for each physical device
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)


The two possible solutions are to change config.gpu_options.per_process_gpu_memory_fraction to a greater number.

The other solutions were to reinstall cuda.

https://stackoverflow.com/questions/34199233/how-to-prevent-tensorflow-from-allocating-the-totality-of-a-gpu-memory

In [None]:
if tf.config.list_physical_devices('GPU'):
    physical_devices = tf.config.list_physical_devices('GPU')
    tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)
    tf.config.experimental.set_virtual_device_configuration(physical_devices[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)])

In [None]:
# os.environ['CUDA_VISIBLE_DEVICES'] = '/gpu:0'

https://developer.nvidia.com/rdp/cudnn-archive

In [None]:
def filter_location(tif, tif_array, mask):
    '''READING raster with original data (tif) and label layer (mask) as numpy arrays & 
       FILTERING the boundaries of label layer using original tif layer.
       
       E.g. you produced tiled CHM for Surmont and have a label layer with wellpads (value 1) 
       and other areas (value 0) for the whole Surmont area. You planning to work with numpy arrays (no coordinates!)
       so you need first to be sure that you have tiled tif and the label of the exact same shape and areal coverage.
       In this case, you're cropping label layer (mask) using boundaries of your tiled original CHM (tif).
    
    tif:      Initial raster Layer with Nband - number of bands (1 by default)
    
    mask:     Label layer with EVERY pixel having a value of 1
              for object (e.g. line footprint in case of line mapping)
              and 0 for the rest area (e.g. lake, forest, peatland)
              Label should be of the same resolution as "tif"
    '''
    
    Nband = 1      #### CHANGE if needed

    ##### Getting its bounds (usually it has larger area that tiled "tif" layer)
    mask_left, mask_bottom, mask_right, mask_top = mask.bounds
    print('\nlabel layer bounds: ', mask.bounds)
    
    ##### Getting resolution of label layer (should be the same for tif and mask!)
    resolution = mask.res[0] ####
    print(resolution)
    
    ##### Calculating relative beginning of tif layer comparing to larger mask area
    bottom_new = int((mask_top - top) / resolution)
    height = int((top - bottom) / resolution)
    left_new = int((left-mask_left)/resolution)
    width = int((right - left) / resolution)
    print('NEW coverage is from {} + {} and from {} to {}'.format(bottom_new, height, left_new, width))
    
    ##### Reading label layer as numpy array
    mask_array = mask.read()
    print('Shape of mask array:', mask_array.shape)
    
    ##### Cropping label layer to be fully coaligned with original tif layer
    mask_array = mask_array[bottom_new:bottom_new+height, left_new:left_new+width]
    print('Shape of mask array:', mask_array.shape)
    return tif_array, mask_array

In [None]:
def label_list(folder, ending_img, ending_label):
    '''Making lists of training images and corresponding labels located in the directory. 
       For example, you have 10 png pictures with CHM and 10 png pictures with label layer and you want to make
       two lists, first with training CHM patches and second - with corresponding label layers. 
       
       folder: root directory with both training images and the corresponding labels
       ending_img:   training images ends with...? e.g. "rgb_256.png"
       ending_label: training labels ends with...? e.g. "label_256.png"
       
       Output:
       rgb_fls: list of training images (full path)
       label_fls: list of their labels (full path)
       '''
    
    ##### Preparing list of training images within the folder
    rgb_fls = [os.path.join(folder,files) for files in os.listdir(folder) if files.endswith(str(ending_img))]
    
    ##### Making the list of corresponding label patches
    label_fls = []

    for i in range(len(rgb_fls)):
        name = str(os.path.basename(rgb_fls[i])[:-len(ending_img)])+str(ending_label)
        if not os.path.exists(os.path.join(folder,name)):
            print(name)
            print('Oooops! Check the path!')
        label_fls.append(os.path.join(folder,name))
    return rgb_fls, label_fls

def checks(arr, norm = True):
    '''Simple checks for patches: they should be squarish and normilizes (if norm = TRUE)'''
    error = 0
    ##### Check have we normilized our image?
    if norm == True and arr.max() > 1:
        print('Houston, we have got a problem with 255!')
        error = 1
    ##### Check is it square patch?
    if arr.shape[0]!=arr.shape[1]:
        print('Houston, we have got a problem with shape!')
        error = 1
    return error

def norm(path, greyscale=False):
    '''Image normalization: first we read image using cv2 (greyscale or rgb), then we normilize it (up to 1)
    path: path to your greyscale or rgb image (usually .png)
    greyscale: check if yes
    
    output: normilized image
    
    '''
    
    ##### Reading the single image
    if greyscale == False:
        image = cv2.imread(path, cv2.IMREAD_COLOR)  # uint8 image
    else:
        image = cv2.imread(path, cv2.IMREAD_GRAYSCALE)  # uint8 image
        print(image.shape)
        
    ##### Normilization of the image
    norm_image = cv2.normalize(image, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
    print('The max value was {}, we nomalize it to {}'.format(image.max(), norm_image.max()))
    return norm_image

def shp_to_patches(shp, tif, src, size = 256):
    '''
    Transforming our shapefile with points to the pandas dataframe with patches (squares) around them!
    & Filtering shp by boundaries of the tif file.
    
    Input
    shp:          shapefile with (usually random) points. We'll build our training patches around them
    tif_path:     full path to original tif file with your data
    size:         size of your output patches (e.g. 256 pix * 256 pix)
    
    Output
    result:  pandas dataframe with 'image_path' of your original tif, 
             boundaries and center coordinates (minx, miny - non geographic, tile_xmin, tile_ymin - geographical)
             and your label (only for object detection).
    '''

    left, bottom, right, top = src.bounds
    resolution = src.res[0]
    print('RGB shape', tif.shape)
    print('Bounds: ', src.bounds)
        
    # Read shapefile
    gdf = gpd.read_file(shp)
    
    ##### Filtering shapefile by tif boundaries
    print('Initial number of points: ', len(gdf))
    gdf = gdf.geometry.bounds
    gdf = gdf[gdf.minx > left]
    gdf = gdf[gdf.minx < right]
    gdf = gdf[gdf.miny > bottom]
    gdf = gdf[gdf.miny < top]
    df = gdf.copy()
    print('Final number of points: ', len(gdf))
    
    # add filename
    df["image_path"] = os.path.basename(tif_path)
    print('Total number of points is ', len(df))
    
    # Transform project coordinates to image coordinates
    size = int(size/2)
    df["tile_x"] = (df.minx - left) / resolution
    df["center_x"] = df["tile_x"].astype(int)
    df["tile_xmin"] = df["tile_x"]-size
    df["tile_xmax"] = df["tile_x"]+size

    # UTM is given from the top, but origin of an image is top left
    df["tile_y"] = (top - df.miny) / resolution
    df["center_y"] = df["tile_y"].astype(int)
    df["tile_ymin"] = df["tile_y"]-size
    df["tile_ymax"] = df["tile_y"]+size
    
    print('Filtering tile x by...', tif.shape[2])
    print('Filtering tile y by...', tif.shape[1])

    df = df[df.tile_x < tif.shape[2]]
    df = df[df.tile_y < tif.shape[1]]
    
    for index, row in df.iterrows():
        df.loc[index, 'label'] = tif[0, int(row.tile_y), int(row.tile_x)]
        
    # select columns
    result = df[[
        "image_path", "minx", "miny", "center_x", "center_y", 'tile_xmin', 'tile_ymin'
    ]]

    return result


def patch_crop(array, mask, df, idnumber, size = 256, base_dir = '/media/irro/Irro/Irina/WellPads/wellpads_res05_512'):
    ''' Cropping patches to training images and labels of the certain size
        & saving the to the directory
        
        Input
        array:     original tif file (e.g. our CHM) read as numpy array
        mask:      corresponding label layer (e.g. pixel-wise mask of lines and non lines) read as nunpy array
        df:        pandas dataframe with coordinates of final patches we need to crop (by def shp_to_patches)
        idnumber:  init number of patch we use when saving it (e.g. patch_661.png means idnumber = 661)
        base_dir:  directory to save our patches!
        
        No output needed: check base_dir with patches.
    '''
    
    
    ##### Iterating over dataframe to crop patches of image and label:
    num = 0
    for index, row in df.iterrows():
        patch = array[int(row.center_y-size/2):int(row.center_y+size/2), int(row.center_x-size/2):int(row.center_x+size/2)]
        patch_l = mask[int(row.center_y-size/2):int(row.center_y+size/2), int(row.center_x-size/2):int(row.center_x+size/2)]
        
#         print(patch.shape)
#         print(patch_l.shape)
        
        #### Miss patches if they are not squares
        if patch.shape[0] != size or patch.shape[1] != size:
            continue  
        #### Miss patches if they are not squares 
        if patch_l.shape[0] < 100 or patch_l.shape[1] < 100:
            continue  
        
        patch = (patch - patch.min()) * (255.0 / (patch.max() - patch.min()))
        
        #### Change axis to fit for cv2 saving format:
        patch = np.moveaxis(patch, 0, -1) 
        patch_l = np.moveaxis(patch_l, 0, -1) 
        
      
            
        ##### Preparing FILENAMES for our patches! You may want to change this!        
        filename_p = "{}/{}_{}_{}.png".format(base_dir, 'line_patches', idnumber, 'rgb')
        filename_l = "{}/{}_{}_{}.png".format(base_dir, 'line_patches', idnumber, 'lab')
        
        print(filename_p)
#         print(filename_l)
        
        ##### SAVING patches into the folder (individually, training image and label)
        cv2.imwrite(filename_p, patch)
        cv2.imwrite(filename_l, patch_l)
        
        #### Counting patches and increasing index (so the next file will have +1 idnumber)
        num = num+1
        idnumber = idnumber+1

    print('The number of patches is', num)

In [None]:
def patch_in_batch(shp, tif_path, mask, size, base_dir, idnumber = 0):
    '''Function for cropping patches from the beginning. The same procedure for every tile:
    1) shapefile with points to dataframe with square coordinate WITHIN the tile boundaries
    2) reading tif_path and label layer to numpy array & filtering their boundaries to make correspond to each other
    with their shapes and covered area
    3) Vizualization just to check that everythin is right
    4) Cropping patches using dataframe and saving them to the base_dir
    
    Input
    shp:          shapefile with (usually random) points. We'll build our training patches around them
    tif_path:     full path to original tif file with your data
    mask:         Label layer with EVERY pixel having a value of 1
                  for object (e.g. line footprint in case of line mapping)
                  and 0 for the rest area (e.g. lake, forest, peatland)
                  Label should be of the same resolution as "tif"
    size:         size of your output patches (e.g. 256 pix * 256 pix) 
    base_dir:     directory to save our patches!
    idnumber:     init number of patch we use when saving it (e.g. patch_661.png means idnumber = 661)
    '''
    
    
    df = shp_to_patches(shp, tif_path)
    print('\nNumber of points within the shapefile is {}'.format(len(df)))
    
    rgb_array, mask_array = filter_location(tif_path, mask)
    
    ### IMPORTANT: If Lable != 1, you should change its value to 1 and non-ROI to 0
    mask_array = np.where(mask_array==0, 1, 0).astype(np.uint8)
    
    # Prediction vizualization:
    f, axarr = plt.subplots(1,2,figsize=(20,20))
    axarr[0].imshow(rgb_array[:500,:500])
    axarr[1].imshow(mask_array[:500,:500])
    
    patch = patch_crop(rgb_array, mask_array, df, 
            idnumber = idnumber, size = size, 
            base_dir = base_dir)    

# Patching

In [None]:
train_dir = '/media/irro/Irro/HumanFootprint/'

##### List of tif files in the folder
mask_list = [os.path.join(train_dir,files) for files in os.listdir(train_dir) if files.startswith("Kirby_DSM_10cm_lab")
           and files.endswith('.tif')]

##### Check that you're reading only needed files
print(mask_list)

In [None]:
#### Train images and labels should correspond to each other within the list!
#### That's why sometimes it's easier to read label list and then just add right extension to it:

tif_list=[]
for x in mask_list:
#     print(x)
    path = x[:-11]+x[-7:]
    if not os.path.exists(path):
        print('Ooops with {}'.format(path))
    tif_list.append(path)
print(tif_list)

### Tif List

In [None]:
train_dir = '/media/irro/Irro/HumanFootprint/Training_Images_256'

##### List of tif files in the folder
mask_list = [os.path.join(train_dir,files) for files in os.listdir(train_dir) if files.endswith('.tif')]

##### Check that you're reading only needed files
print(tif_list)
print(mask_list)

In [None]:
# Nband = 1      #### CHANGE if needed
i=0
size = 256
patch_dir = '/media/irro/Irro/HumanFootprint/Training_256_cnn'
shp = '/media/irro/Irro/HumanFootprint/random_cnn_lots.shp'

if not os.path.exists(patch_dir):
    os.makedirs(patch_dir)    

In [None]:
for tif_path in tif_list:
    ##### Reading tif raster using rasterio (usually it's a tile, not the whole image)
    tif = rasterio.open(tif_path)
    print(tif_path)
    print(mask_list[i])
    
    ##### Getting raster bounds (in geo coordinates)
    left, bottom, right, top = tif.bounds
    ##### Reading tif as numpy array
    tif_array = tif.read()#.astype(np.float32)
    ##### Printing its shape and bounds
    print('tif array shape', tif_array.shape)
    print('tif bounds: ', tif.bounds)
    ##### Reading LABEL layer

    ### CHANGE
    mask = rasterio.open(mask_list[i])
    
    
    ##### Getting its bounds (usually it has larger area that tiled "tif" layer)
    mask_left, mask_bottom, mask_right, mask_top = mask.bounds
    print('\nlabel layer bounds: ', mask.bounds)

    ##### Getting resolution of label layer (should be the same for tif and mask!)
    resolution = mask.res[0] ####
    print(resolution)

    ##### Calculating relative beginning of tif layer comparing to larger mask area
    bottom_new = int((mask_top - top) / resolution)
    height = int((top - bottom) / resolution)
    left_new = int((left-mask_left)/resolution)
    width = int((right - left) / resolution)
    print('NEW coverage is from {} + {} and from {} to {}'.format(bottom_new, height, left_new, width))

    ##### Reading label layer as numpy array
    mask_array = mask.read().astype(np.uint8)
    print('Shape of mask array:', mask_array.shape)

    ##### Cropping label layer to be fully coaligned with original tif layer
    mask_array = mask_array[bottom_new:bottom_new+height, left_new:left_new+width]
    print('Shape of mask array:', mask_array.shape)

    df = shp_to_patches(shp, tif_array, tif, size = size)
    rgb_array, mask_array = filter_location(tif, tif_array, mask)

    normalized_array = (rgb_array-rgb_array.min())
    
    patch = patch_crop(normalized_array[0,:,:], mask_array[0,:,:], df, 
            idnumber = i*2000, size = size, 
            base_dir = patch_dir)        
    
    i=i+1

### Remove bad pairs

In [None]:
import os
from PIL import Image

# Set the directory path where the image pairs are located
directory = '/media/irro/Irro/HumanFootprint/Training_256_cnn/'

i=0

# Iterate over the files in the directory
for file_name in os.listdir(directory):
    if file_name.endswith('_rgb.png'):
        # Get the corresponding label image file
        label_file = os.path.join(directory, file_name.replace('_rgb.png', '_lab.png'))
        
        # Open the label image
        label_image = Image.open(label_file)
        
        # Count the number of pixels with a value of 1
        count = sum(1 for pixel in label_image.getdata() if pixel == 1)
    
        # Check if the count is less than 50
        if count < 500:
            # Delete the image pair
            os.remove(os.path.join(directory, file_name))
            os.remove(label_file)
            print('Removed')
            i = i+1

print(i)

# U-Net INPUT

It means that when you are loading PNG, you are loading it wrong. If you have 1 class, the masks should be loaded as greyscale (1 channel), with values between 0 and 1 (most often only 0s and 1s). If there are mask values greater than 1, the Binary Cross Entropy is confused...
solution:  im = resize_and_crop(Image.open(dir + id + suffix), scale=scale) 

A Tensorflow record file consists of serialised messages which is a dictionary of a feature label and its associated value. To convert images into TFrecord files we utilize the protocol tensorflow.train.Example

Steps 1:

    Break down the image into smaller images (tiling)
    Create helper functions to cast datatypes into 1 of the type lists (integer,float and bytes)
    Create a feature dictionary which will be the contents of message. This is how we associate the image to the mask
    Convert the features into to bytes, a process called serialization
    Add the features to a message
    Create a tfrecord file and write the messages (image and its associated features) to it


###### https://www.kaggle.com/code/uysalserkan/uav-dataset-reading-augmentation/notebook?scriptVersionId=53034343

In [None]:
#### Root folder with training data: labels and images
train_folder = patch_dir
print(train_folder)
train_images = [os.path.join(train_folder,files) for files in os.listdir(train_folder) if files.endswith("rgb.png") and not files.endswith("lab.png")]
train_labels = [os.path.join(train_folder,files) for files in os.listdir(train_folder) if files.endswith("lab.png") and not files.endswith("rgb.png")]

In [None]:
#### Train images and labels should correspond to each other within the list!
#### That's why sometimes it's easier to read label list and then just add right extension to it:

train_images=[]
for x in train_labels:
#     print(x)
    path = x[:-7]+str('rgb.png')
    train_images.append(path)

In [None]:
#### We should have THE SAME number of labels and train images
print('We have {} train images and {} trains labels\nwith the size of {}'.format(len(train_images), len(train_labels), np.array(Image.open(train_images[1])).shape))

In [None]:
#### Checking image-label pairs!
n = np.random.randint(len(train_images))
print(train_images[n])
print(train_labels[n])

if not os.path.exists(train_images[n]) or not os.path.exists(train_labels[n]):
    print(name)
    print('Oooops! NO FILE')
else:
    print('\nBoth files exists... check the pair! image and label should have the same idnumber')

In [None]:
f, axarr = plt.subplots(1,2,figsize=(20,20))
n = np.random.randint(len(train_images))
print(n)
axarr[0].imshow(np.array(Image.open(train_images[n])))
axarr[1].imshow(np.array(Image.open(train_labels[n])))

# Preprocessing from kz-whale-tails

In [None]:
############## INPUT DATA

### CHECK IF YOU NEED RESIZING
### size of the training patch (usually 2**x): your original training images WILL BE RESIZED to this size
size =256
rs = 1

# rs = int(np.array(Image.open(train_images[1])).shape[0]/size)
print('IF RESIZING, you need to use resizing coefficient rs = {}'.format(rs))

# how many patches model will see during the one step (usually 8-16-32)
batch_size = 8

In [None]:
#### Readings png images to numpy array with the shape: (N of images, size, size)
imgs_list = []
masks_list = []

### RESIZING patches to fit into computational resources
for image, mask in zip(train_images, train_labels):
    imgs_list.append(np.array(Image.open(image).resize((size,size))))
    masks_list.append(np.array(Image.open(mask).resize((size,size))))
imgs_np = np.asarray(imgs_list)
masks_np = np.asarray(masks_list)

print(imgs_np.shape, masks_np.shape)

In [None]:
############ PLOTTING original tif data (e.g. CHM), the corresponding label layer and their overlay!
from keras_unet.utils import plot_imgs
plot_imgs(org_imgs=imgs_np, mask_imgs=masks_np, nm_img_to_plot=10, figsize=6)

print(imgs_np.max(), masks_np.max())
#### IMPORTANT: original and ground truth should CORRESPOND to each other!

In [None]:
#### Normalizing images by dividing to 255:
x = np.asarray(imgs_np, dtype=np.float32)/255
#### Reading label layer as numpy array
y = np.asarray(masks_np, dtype=np.float32)

#### Reshaping to fit the model
y = y.reshape(y.shape[0], y.shape[1], y.shape[2], 1)
x = x.reshape(x.shape[0], x.shape[1], x.shape[2], 1)

print(x.shape, y.shape)

### Train-Val Split

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.1, random_state=0)

print("x_train: ", x_train.shape)
print("y_train: ", y_train.shape)
print("x_val: ", x_val.shape)
print("y_val: ", y_val.shape)

len_train = len(x_train)

### Data Augmentation

In [None]:
from keras_unet.utils import get_augmented

train_gen = get_augmented(
    x_train, y_train, batch_size=batch_size,
    data_gen_args = dict(
        rotation_range=360.,
#         width_shift_range=0.05,
#         height_shift_range=0.05,
#         shear_range=40,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        fill_mode='constant'
    ))

In [None]:
sample_batch = next(train_gen)
xx, yy = sample_batch
print(xx.shape, yy.shape)
from keras_unet.utils import plot_imgs

plot_imgs(org_imgs=xx, mask_imgs=yy, nm_img_to_plot=10, figsize=5)

# Define the U-Net Model

In [None]:
print('Input Shape: ', x_train[0].shape)

In [None]:
from keras_unet.models import custom_unet

input_shape = x_train[0].shape
# input_shape = (None, None, 1)

# input_shape = (None, 512, 512, 1)

model = custom_unet(
    input_shape,
    filters=32,
    use_batch_norm=True,
    dropout=0.3,
#     dropout_change_per_layer=0.0,
    num_classes=1,
    output_activation='sigmoid',
    num_layers=4
)

In [None]:
# model.summary()

### Compile the model

In [None]:
from keras.callbacks import ModelCheckpoint
from keras_unet.metrics import iou, iou_thresholded
from keras_unet.losses import jaccard_distance

model_filename = '/media/irro/Irro/CNN_Models/human_lots_{}.h5'.format(size)

callback_checkpoint = ModelCheckpoint(
    model_filename, 
    verbose=1, 
    monitor='val_loss', 
    save_best_only=True,
)

model.compile(
    optimizer=tf.keras.optimizers.Adam(), 
    #optimizer=SGD(lr=0.01, momentum=0.99),
    loss='binary_crossentropy',
#     loss=jaccard_distance,
    metrics=[iou, iou_thresholded]
)

### Training starts here...

In [None]:
history = model.fit(
    train_gen,
    steps_per_epoch=int(len_train/batch_size),
    epochs=1,
    validation_data=(x_val, y_val),
    callbacks=[callback_checkpoint]
)

### Plot

In [None]:
# from keras_unet.utils import plot_segm_history
# plot_segm_history(history)

### Test

In [None]:
import cv2

path = '/media/irro/Irro/HumanFootprint/test.tif'
data = tiff.imread(path)[:256, :256]

# Preprocess the data
patch = (data - data.min())
patch = (patch - patch.min()) * (255.0 / (patch.max() - patch.min()))
data = np.asarray(patch, dtype=np.float32) / 255
# print(data.shape)
# Expand dimensions to match the model's input shape
input_data = np.expand_dims(data, axis=0)
input_data = np.expand_dims(input_data, axis=-1)
print(input_data.shape)

# Perform prediction
pred = model.predict(input_data) * 100
# print(pred.shape)
pred = np.squeeze(pred).astype(np.uint8)

# Prediction visualization
plt.imshow(pred, cmap='gray')
plt.show()

# # Save the prediction as an image
# # cv2.imwrite('/media/irro/Irro/HumanFootprint/test_CNN.png', pred)


## SAVE the Model

In [None]:
# size = 256
model_name = '/media/irro/Irro/CNN_Models/Human_lots_{}_byCNN_test.h5'.format(size)
print(model_name)

### Saving the model! Careful!
# model.save(model_name)
# print(model_name)

## LOAD the Model

In [None]:
# model_name = '/media/irro/Irro/CNN_Models/WellPads_big_batch8_256.h5'
# model.load_weights(model_name)

In [None]:
##### Predict validation images
y_pred = model.predict(x_val[0:10], batch_size=1)

In [None]:
from keras_unet.utils import plot_imgs
plot_imgs(org_imgs=x_val, mask_imgs=y_val, pred_imgs=y_pred, nm_img_to_plot=10)

# Predict

In [None]:
import slidingwindow
from tqdm import tqdm
import rasterio

def resize_arr(arr, rs):
    '''Resizing array using PIL.Image module
    arr:  one-band numpy array for resizing
    rs:   resizing coefficient (rs = 4 than we changing size 4 times, e.g. from 1024 to 256)
    
    Output: resized numpy array
    '''
    ### Reading numpy array as PIL Image
    img = Image.fromarray(arr)
    ### Resizing as Image
    data1 = img.resize(size=(int(arr.shape[0]/rs), int(arr.shape[1]/rs)))
    ### Reading back to numpy array
    data1 = np.array(data1)
    return data1

from PIL import Image

def predict_tif(model, path, size):
    '''
    Since Model training requires computational resources, it may NOT work with large tiles as 1024 pixels.
    But, to predict objects like seismic lines, we need big tiles for visibility.
    To address this problem, we can first downscale our tiles, e.g., from 1024 to 256 (rs = 4), and then train the model.
    To make predictions using the model, we need tiles of the same size, resolution, and spatial feature representation.
    Therefore, for predictions, we also need to downscale the image (resize = True).

    Inputs:
    - model: Trained CNN model for making the segmentation map.
    - data: TIF file read as a numpy array to use as input for predictions.
    - resize: If True, downscale the data using "rs".
    - rs: Resizing coefficient (e.g., rs = 2 means downscale by a factor of 2).

    Output:
    - pred: Numpy array with the normalized predictions (1 = 100% probability, 0.3 = 30% probability of object).
    '''

    # Read the TIFF file using sliding window
    data = tiff.imread(path)

    # Define the size of the patch
    patch_size = size

    # Generate sliding windows of the specified patch size
    windows = slidingwindow.generate(data, slidingwindow.DimOrder.HeightWidthChannel, patch_size, 0)

    # Create an empty array to store the predictions
    pred = np.zeros(data.shape)

    # Iterate over the sliding windows and make predictions
    for index, window in enumerate(tqdm(windows)):
        # Extract the patch from the data
        patch = data[windows[index].indices()]

        # Preprocess the patch if necessary
        patch = (patch - patch.min()) * (255.0 / (patch.max() - patch.min()))
        patch = np.asarray(patch, dtype=np.float32) / 255
        patch = patch.reshape(1, patch.shape[0], patch.shape[1], 1)

    #     print('Predictions...')
        # Make predictions for the patch
        pred_patch = model.predict(patch) * 100
    #     print('Predictions Done...\n')
        pred_patch = np.squeeze(pred_patch).astype(np.uint8)

        # Assign the predicted patch to the corresponding location in the output array
        pred[windows[index].indices()] = pred_patch

    print('Saving...')
    # Save the predicted image as a georeferenced TIFF
    output_path = path[:-4]+'_CNN{}_3.tif'.format(size)
    with rasterio.open(path) as src:
        profile = src.profile
        profile.update(count=1, dtype='uint8', nodata=0)

        with rasterio.open(output_path, 'w', **profile) as dst:
            dst.write(pred.astype('uint8'), 1)

    # Visualization
    plt.imshow(pred)

    return pred


In [None]:
for path in tif_list[0:]:
    predict_tif(model, path, 256)
    break

# Raster to Vector

In [None]:
tif_list

In [None]:
#### Train images and labels should correspond to each other within the list!
#### That's why sometimes it's easier to read label list and then just add right extension to it:

cnn_list=[]
for x in tif_list:
#     print(x)
    path = x[:-4]+'_CNN256_2.tif'
    if not os.path.exists(path):
        print('Ooops with {}'.format(path))
    cnn_list.append(path)
print(cnn_list)

In [None]:
for path in cnn_list:

    # Read the TIFF file using sliding window
    data = tiff.imread(path)
    print(data.shape)
    data_filt = np.where(data<10, 0, 1)

    print('Saving...')
    # Save the predicted image as a georeferenced TIFF
    output_path = path[:-4]+'_CNN{}_filt.tif'.format(size)
    with rasterio.open(path) as src:
        profile = src.profile
        profile.update(count=1, dtype='uint8', nodata=0)

        with rasterio.open(output_path, 'w', **profile) as dst:
            dst.write(data_filt.astype('uint8'), 1)

In [None]:
#### Train images and labels should correspond to each other within the list!
#### That's why sometimes it's easier to read label list and then just add right extension to it:

cnn_list_filt=[]
for x in tif_list:
#     print(x)
    path = x[:-4]+'_CNN256_CNN256_filt.tif'
    if not os.path.exists(path):
        print('Ooops with {}'.format(path))
    cnn_list_filt.append(path)
print(cnn_list_filt)

In [None]:
# output_vector = input_raster[:-4]+'.shp'

In [None]:
import rasterio
from shapely.geometry import shape, mapping
import fiona

def raster_to_vector(input_raster):
    # Read the input raster
    with rasterio.open(input_raster) as src:
        image = src.read(1)  # Read the raster band
        crs = src.crs  # Get the CRS from the input raster
        transform = src.transform  # Get the transform from the input raster

    # Create a collection of vector geometries from the raster
    geoms = list(
        shape(geometry) for geometry, value in rasterio.features.shapes(image, mask=None, transform=transform) if
        value == 1)
    output_vector = input_raster[:-4]+'.shp'
    # Create the output vector file
    schema = {'geometry': 'Polygon', 'properties': {'id': 'int'}}
    with fiona.open(output_vector, 'w', 'ESRI Shapefile', schema=schema, crs=crs) as dst:
        # Iterate over the geometries and add them to the vector file
        for i, geom in enumerate(geoms):
            feature = {
                'geometry': mapping(geom),  # Convert geometry to a valid format
                'properties': {'id': i}  # You can add properties to the vector features if needed
            }
            dst.write(feature)

In [None]:
for tif in cnn_list_filt:
    raster_to_vector(tif)

### Merging shapefiles

In [None]:
#### Train images and labels should correspond to each other within the list!
#### That's why sometimes it's easier to read label list and then just add right extension to it:

shp_list=[]
for x in cnn_list_filt:
#     print(x)
    path = x[:-4]+'.shp'
    if not os.path.exists(path):
        print('Ooops with {}'.format(path))
    shp_list.append(path)
print(shp_list)

In [None]:
import fiona
from shapely.geometry import shape
from shapely.geometry import mapping

def merge_shapefiles(input_files, output_file):
    # Read the schema from the first input shapefile
    with fiona.open(input_files[0], 'r') as input:
        schema = input.schema.copy()
        crs = input.crs

    # Open the output shapefile in write mode using the same schema and CRS
    with fiona.open(output_file, 'w', 'ESRI Shapefile', schema=schema, crs=crs) as output:
        # Iterate over the input shapefiles
        for input_file in input_files:
            # Open each input shapefile in read mode
            with fiona.open(input_file, 'r') as input:
                # Iterate over the features in the input shapefile
                for feature in input:
                    # Convert the geometry to a shapely object
                    geometry = shape(feature['geometry'])
                    
                    # Add the feature to the output shapefile
                    output.write({
                        'geometry': mapping(geometry),
                        'properties': feature['properties'],
                    })

    print("Shapefiles merged successfully!")


In [None]:
merge_shapefiles(shp_list, '/media/irro/Irro/HumanFootprint/Kirby_DSM_10cm_CNN256.shp')

### Pred for Test file

In [None]:
import slidingwindow
from tqdm import tqdm

path = '/media/irro/Irro/HumanFootprint/test.tif'
data = tiff.imread(path)[100:356, 100:356]

patch = (data-data.min())
patch = (patch - patch.min()) * (255.0 / (patch.max() - patch.min()))

data = np.asarray(patch, dtype=np.float32)/255
data = data.reshape(1, data.shape[0], data.shape[1], 1)

pred = model.predict(data)*100
pred = (np.squeeze(pred)).astype(np.uint8)

# Prediction vizualization:
f, axarr = plt.subplots(1,2,figsize=(20,20))
axarr[0].imshow(np.squeeze(data))
axarr[1].imshow(pred)

# cv2.imwrite('/media/irro/Irro/HumanFootprint/test_CNN.png', pred)

### Predicting all TIF files in directory

In [None]:
############ Preparing a list of files for predictions
#### Root directory with tif files:
test_dir = '/media/irro/Irro/Irina/WellPads/CHM_testing'

#### Making a list of tifs which NOT end with... "CNN256.tif" - you should put your own extension
test_list = [os.path.join(test_dir,files) for files in os.listdir(test_dir) if files.endswith(".tif")
            and not files.endswith("CNN256.tif")]
print(test_list)

In [None]:
################ Predictions for the single tif (with further saving)
predict_tif(model, '/media/irro/Irro/Irina/WellPads/CHM_testing/CHM_9.tif', name = '_over0.5', overla = 0.5)

In [None]:
######## Predictions for list of tif files (one by one)
for tif in test_list:
    predict_tif(model, tif, name = '_over0.5', overla = 0.5)
    print('\nDone: {}'.format(tif))

### Vizualization

In [None]:
# Prediction vizualization:
f, axarr = plt.subplots(1,figsize=(20,20))
axarr.imshow(tiff.imread(path)*255)

In [None]:
# Prediction vizualization:
f, axarr = plt.subplots(1,figsize=(20,20))
axarr.imshow(pred)

In [None]:
# path = '/media/irro/Irro/Irina/Rasters/CHMs/CHM_0.tif'
# predict_lines(model, path, 500, 500, resize = True)