# **DeepLandforms - v2**

Author: giacomo.nodjoumi@hyranet.info - g.nodjoumi@jacobs-university.de

## DeepLandforms

With this notebook, users can use train [YOLOv8](https://github.com/ultralytics/ultralytics) models for object detection and instance segmentation models on custom dataset of georeferenced images.
Results can be visualized directly in the noteboo using leafmap and WMS backend.

This notebook includes:
* customizable augmentations using albumentation package
* data train/valid split

## Usage

* Put or link the dataset into the **DeepLandforms** *.env* file
* Run docker-compose up
* Edit the *configs* section by editing the following parameters:

## Parameters
 ------------------------------------------------------------------
| **Parameter** | **Description** | **Example** |
| ---- | ---- | ---- |
| **data_dir** | local path of the data dir |  | /home/user/data |
| **device** | device where to run the model | cuda or cpu |
------------------------------------------------------------------
Then just execute the notebook and monitor the training in **Tensorboard** container.

## Funding
*This study is within the Europlanet 2024 RI and EXPLORE project, and it has received funding from the European Union’s Horizon 2020 research and innovation programme under grant agreement No 871149 and No 101004214.*

In [None]:
import albumentations as A
import cv2 as cv
import os
import shutil
from sklearn.model_selection import train_test_split
from ultralytics import YOLO
from utils.DataUtils import get_paths

In [None]:
basepath = 'BC_n_SQCRP_n_CellSize_10_m__LIM_n_None_px_cog_n_V2/YOLO/'
home_dir = '/home/Giacomo/data/'
src_path = f'{home_dir}/{basepath}'
image_path = f'{home_dir}{basepath}'
base_dir = src_path +'/train'
#train_dir = f'{image_path}/train'
if os.path.isdir(base_dir):
    shutil.rmtree(base_dir)
os.makedirs(base_dir)

In [None]:
train_path = f'{image_path}/train'
if os.path.isdir(train_path):
    shutil.rmtree(train_path)
valid_path = f'{image_path}/val'
if os.path.isdir(valid_path):
    shutil.rmtree(valid_path)
test_path = f'{image_path}/test'
if os.path.isdir(test_path):
    shutil.rmtree(test_path)

In [None]:
image_list = [f"{image_path}{image}" for image in get_paths(image_path,'tiff')]
label_list = [f"{image_path}{label}" for label in get_paths(image_path,'txt')]
len(image_list)

In [None]:
import pandas as pd
cls = []
class_df = pd.DataFrame(columns=['Type','x0','y0','x1','y1'])
for ll in label_list:
    df = pd.read_csv(ll, delimiter=' ', header=None)
    df.columns=['Type','x0','y0','x1','y1']
    class_df=pd.concat([df,class_df]).reset_index(drop=True)
    cls.append(df.iloc[0][0])

In [None]:
class_df

In [None]:
list(set(cls))

In [None]:
import os
import rasterio as rio
from rasterio.plot import reshape_as_image, reshape_as_raster
import albumentations as A
import cv2
import numpy as np
import math
import random
def augment_data(image_file, label_file, img, version, augmentations_list):
    
    image = reshape_as_image(img.read())
    with open(label_file, 'r') as f:
        annotations = f.readlines()
    
        bboxes = []
        labels = []
        for annotation in annotations:
            label, *bbox = map(float, annotation.strip().split())
            bboxes.append(bbox)
            labels.append(label)
            
    #print(augmentations)
    for k in augmentation_dict:
        aug = augmentation_dict[k]
        augmentation = A.Compose([aug], bbox_params=A.BboxParams(format='yolo', label_fields=['category_ids']))
    
    
       
    
        augmented = augmentation(image=image, bboxes=bboxes, category_ids=labels)
        augmented_image = augmented['image']
        augmented_bboxes = np.array(augmented['bboxes'])
        augmented_labels = np.array(augmented['category_ids'])
    
        basename, ext = os.path.splitext(image_file)
        savename = f"{basename}_augmented_{version}_{k}{ext}"
        with rio.open(savename, 'w', **img.meta) as dst:
            dst.write(reshape_as_raster(augmented_image))
    
        with open(f"{basename}_augmented_{version}_{k}.txt", 'w') as f:
            for label, bbox in zip(augmented_labels, augmented_bboxes):
                x_center = bbox[0]# / augmented_image.shape[1]
                y_center = bbox[1]# / augmented_image.shape[0]
                width = bbox[2] #/ augmented_image.shape[1]
                height = bbox[3]# / augmented_image.shape[0]
                f.write(f"{int(label)} {x_center} {y_center} {width} {height}\n")
    
        #print("Augmentation complete.")
    return augmented_image, augmented_bboxes, augmented_labels, savename

In [None]:
import tqdm
def parallel_augss(image_files, label_files, augmentations, resize, version, JOBS):
    from joblib import Parallel, delayed, parallel_backend
    with parallel_backend("loky", inner_max_num_threads=2):
    
        results = Parallel (n_jobs=JOBS)(delayed(data_augmenter)(image_files[i], label_files[i], version, augmentations, resize)
                            for i in range(len(image_files)))
    return results

In [None]:
def data_augmenter(image_file, label_file, version, augmentations, resize):
    image_file=image_file        
    label_file =label_file        
    img = rio.open(image_file)
    rnd=np.random.randint(1.5,3)
    if resize==True:            
        augmentations['resize']=A.RandomSizedBBoxSafeCrop(height=math.ceil(img.height / rnd), width=math.ceil(img.width / rnd), erosion_rate=0.0, interpolation=1, always_apply=False)#, p=.25)        
    return(augment_data(image_file, label_file, img, version=version, augmentations_list=augmentations))

In [None]:
def chunk_creator(item_list, chunksize):
    import itertools
    it = iter(item_list)
    while True:
        chunk = tuple(itertools.islice(it, chunksize))
        if not chunk:
            break
        yield chunk

# Initialize augmentation sets list

In [None]:
augmentations_list = []

In [None]:
rnd = np.random.uniform(1,3)
resize:True
version=1
augmentation_dict = {
    "clahe":A.CLAHE(),#p=.25),
    #"rotate":A.RandomRotate90(),#p=.5),
    #"transpose":A.Transpose(),#p=.25),
    "shiftscale":A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.30, rotate_limit=25,always_apply=True),#, p=.25),
    "rbc":A.RandomBrightnessContrast(always_apply=True),#p=0.5),
    "blur":A.Blur(blur_limit=4),#, p=.25),
    "optdis":A.OpticalDistortion(distort_limit=0.5, shift_limit=0.35, interpolation=1, border_mode=4, value=None, mask_value=None, always_apply=True),
    "GauNoise":A.GaussNoise(var_limit=(30.0, 60.0), mean=0,always_apply=True),    
    "MNoise":A.MultiplicativeNoise(multiplier=(0.5, 1.5), per_channel=False, elementwise=True,always_apply=True),
    #"Fog":A.RandomFog(fog_coef_lower=0.2, fog_coef_upper=0.9, alpha_coef=0.1,always_apply=True),
    "Sharp":A.Sharpen(alpha=(0.6, 0.9), lightness=(0.5, 1.0),always_apply=True),    
                           }

In [None]:
for k in augmentation_dict:
    print(augmentation_dict[k])

In [None]:
import psutil
from tqdm import tqdm
avram=psutil.virtual_memory().total >> 30
avcores=psutil.cpu_count(logical=False)
JOBS = avcores
#for aug_set in augmentations_list:
    #version = aug_set[2]
    #resize =aug_set[1]
    #augmentations= aug_set[0]
with tqdm(total=len(image_list),
         desc = 'Generating Images',
         unit='File') as pbar:
    
    filerange = len(image_list)
    chunksize = round(filerange/JOBS)
    if chunksize <1:
        chunksize=1
        JOBS = filerange
    image_chunks = []
    for c in chunk_creator(image_list, JOBS):
        image_chunks.append(c)
    label_chunks = []
    for c in chunk_creator(label_list, JOBS):
        label_chunks.append(c)
    for i in range(len(image_chunks)):
        image_files = image_chunks[i]
        label_files = label_chunks[i]        
        try:
            results=parallel_augss(image_files, label_files, augmentation_dict,  True, 1, JOBS)                               
        except Exception as e:
            print(e)
            print(image_files)
        
        pbar.update(len(image_files))           

In [None]:
image_list = [f"{image_path}{image}" for image in get_paths(image_path,'tiff')]
label_list = [f"{image_path}{label}" for label in get_paths(image_path,'txt')]
len(image_list)

In [None]:
train_perc=0.7
valid_perc=0.2
test_perc=0.1
train_set, valid_set = train_test_split(image_list, test_size=test_perc+valid_perc, random_state=1,shuffle=False)
valid_set, test_set = train_test_split(valid_set, test_size=valid_perc, random_state=1,shuffle=False)

In [None]:
len(train_set)

In [None]:
len(valid_set)

In [None]:
len(test_set)

In [None]:
def dataMoverYOLO(dst_root, image_list):
    try:
        os.makedirs(dst_root)
    except:
        shutil.rmtree(dst_root)
        os.makedirs(dst_root)
    images_path = f"{dst_root}/images"
    os.makedirs(images_path)
    labels_path = f"{dst_root}/labels"        
    os.makedirs(labels_path)
    for i, image in enumerate(image_list):  
        pathname, ext =os.path.splitext(image)
        label= f"{pathname}.txt"
        name = f"{os.path.basename(pathname)}"        
        shutil.copy(image, f"{images_path}/{name}{ext}")
        shutil.copy(label, f"{labels_path}/{name}.txt")
    print("Done")

In [None]:
dataMoverYOLO(train_path, train_set)

In [None]:
dataMoverYOLO(test_path, test_set)

In [None]:
dataMoverYOLO(valid_path, valid_set)

In [None]:
class_file = f"{src_path}classes.csv"
class_df = pd.read_csv(class_file, header=None, delimiter=' ')
class_df.columns=['Type']
classnum=len(class_df)
class_names=class_df.Type.to_list()#['0', '1','2', '3', '4']
yaml_file = f"{src_path}/data.yaml"
lines = [f"train: train/images\nval: val/images\nnc: {classnum}\nnames: {class_names}"]
with open(yaml_file, 'w') as f:
    f.writelines(lines)

In [None]:
model = YOLO('yolov8x.pt')
results = model.train(
   data=yaml_file,
   imgsz=640,
   epochs=150,
   batch=4,
   val=True,
   name='yolov8x_custom_640_multiclass_10m_Augm',
    seed=0,
deterministic=True,
cache=True)

In [None]:
result.export_visuals(export_dir="demo_data/")

Image("demo_data/prediction_visual.png")