# **DeepLandforms**

Author: giacomo.nodjoumi@hyranet.info - g.nodjoumi@jacobs-university.de

## DeepLandforms Training

With this notebook, users can train instance segmentation models on custom dataset of georeferenced images.
The models are based on state-of-the-art general purpose architectures, available [here](https://github.com/facebookresearch/detectron2).
Despite several types of networks are supported, such as object detection, image segmentation ad instance segmentation, and available in the above repository, this notebook and the complementary **DeepLandrorms-Segmentation** notebook are specific for instance segmentation architectures for georefernced images.

## Usage

* Prepare the dataset in COCO label format, using provided **LabelMe** container or else.
* Put or link the dataset into the **DeepLandforms** *.env* file
* Run docker-compose up
* Edit the *configs* section by editing the following parameters:
------------------------------------------------------------------
| **Parameter** | **Function** | **Common Values** |
| ---- | ---- | ---- |
| **cfg.merge_from_file(model_zoo.get_config_file(""))** | Model Architecture | MASK-R-CNN in this work |
| **cfg.TEST.EVAL_PERIOD** |  N° of epochs after an evaluation is performed | depending on SOLVER.MAX_ITER, usually every 1/10 of ITER, e.g. every 1000 on a 10000 iter |
| **cfg.DATALOADER.NUM_WORKERS** | Number of workers for dataloader | usually correspond to cpu cores |
| **cfg.MODEL.WEIGHTS** | model_zoo.get_checkpoint_url("") | Optional but advised to start from a pretrained model from the model zoo, MUST be of the same architecture of the get_config_file. see default values as example. |
| **cfg.SOLVER.IMS_PER_BATCH** | How many image to be ingested, depends on the performance of the GPU, especiall VRAM |  up to 8 for 8GB VRAM |
| **cfg.SOLVER.BASE_LR** | learning rate | 0.0002 is a good starting point |
| **cfg.SOLVER.MAX_ITER** | N° of epochs | Rise up for low mAP, lower to prevent overfitting |
| **cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE** | parameter to sample a subset of proposals coming out of RPN to calculate cls and reg loss during training. | multiple of 2, commonly 64 |
------------------------------------------------------------------
Then just execute the notebook and monitor the training in **Tensorboard** container.

## Funding
*This study is within the Europlanet 2024 RI and EXPLORE project, and it has received funding from the European Union’s Horizon 2020 research and innovation programme under grant agreement No 871149 and No 101004214.*

------------------------------------------------------------------

In [None]:
import cv2
import detectron2
from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
%matplotlib inline
from matplotlib import pyplot as plt
import os
import random
import torch
from utils.detectron_utils import Trainer
from utils.train_utils import categories_gen, classes_distribution, dataframes_gen, dataMover, getmeta, classDump, label2coco, dsReg, trainaugmenter
from detectron2.evaluation import COCOEvaluator
import labelme2coco
from detectron2.data.datasets import register_coco_instances
from detectron2.data import MetadataCatalog
from colour import Color
import json
from detectron2.data.catalog import Metadata
import pandas as pd
import shutil
from sklearn.model_selection import train_test_split

from utils.GenUtils import get_paths, folder_file_size, chunk_creator
import rasterio as rio
from os.path import exists

In [None]:
print(torch.__version__)
torch.cuda.is_available()
torch.cuda.get_device_name()

In [None]:
basepath = 'MARSPIT_v2/MixRes/filtered_in/'
home_dir = '/home/user/data/'
src_path = f'{home_dir}/{basepath}'
image_path = f'{home_dir}{basepath}'
base_dir = src_path +'/train'
#train_dir = f'{image_path}/train'
if os.path.isdir(base_dir):
    shutil.rmtree(base_dir)
os.makedirs(base_dir)

In [None]:
train_path = f'{image_path}/train_data'
if os.path.isdir(train_path):
    shutil.rmtree(train_path)
valid_path = f'{image_path}/valid_data'
if os.path.isdir(valid_path):
    shutil.rmtree(valid_path)
test_path = f'{image_path}/test_data'
if os.path.isdir(test_path):
    shutil.rmtree(test_path)


In [None]:
dataset_name = 'dataset.json'
dataset_path = f'{src_path}/{dataset_name}'
dataset_path
if exists(dataset_path):
    os.remove(dataset_path)
    
dataset, dataset_meta, dataset_classes = dsReg(f'{src_path}', dataset_name, base_dir)

In [None]:
train_df_dis, valid_df_dis, test_df_dis, train, valid, test = dataframes_gen(dataset_classes, dataset, 0.6,0.1)

In [None]:
dataMover(image_path, train, valid, test)

In [None]:
trainaugmenter(f'{image_path}/train_data',f'{image_path}/train_data')

In [None]:
train_name = 'dataset.json'
train_path = f'{image_path}/train_data'
train_json = f'{train_path}/{train_name}'
if exists(train_json):
    os.remove(train_json)
train, train_meta, train_classes = dsReg(train_path, 'traindata', train_path)

In [None]:
valid_name = 'dataset.json'
valid_path = f'{image_path}/valid_data'
valid_json = f'{valid_path}/{valid_name}'
if exists(valid_json):
    os.remove(valid_json)
valid, valid_meta, valid_classes = dsReg(valid_path, 'valid_data', valid_path)

In [None]:
test_name = 'dataset.json'
test_path = f'{image_path}/test_data'
test_json = f'{test_path}/{test_name}'
if exists(test_json):
    os.remove(test_json)
test, test_meta, test_classes = dsReg(test_path, 'test_data', test_path)

In [None]:
import pandas as pd
def clsdis(categories, datatype, classes):    
    classes_dis=[]
    for cat in categories:
    #    print(classes[cat])
        classes_dis.append(classes[cat])
    classes_dis =list(zip(classes_dis,[datatype for i in range(len(classes_dis))]))
    df_dis = pd.DataFrame(classes_dis, columns=['Class','Dataset'])
    return df_dis

In [None]:
train_cat = categories_gen(train)
train_df_dis = clsdis(train_cat, 'Train', train_classes)

In [None]:
label = dataset_classes
plt.figure(figsize = (10,5), facecolor='white',dpi=300)
plt.suptitle('Class-labels distributions', fontsize=15)
ax1 = plt.subplot(131)
train_df_dis.groupby(['Class']).count().plot(kind='pie', figsize=(10,1,0), autopct=lambda p:f'{p:.2f}%, \n{p*len(train_df_dis)/100:.0f} labels',startangle=90, subplots=True, ax =ax1, fontsize=5, legend=False)
plt.title('Train Dataset\n{} Labels'.format(len(train_df_dis), loc='center'))
ax2 = plt.subplot(132)
valid_df_dis.groupby(['Class']).count().plot(kind='pie', figsize=(10,10),autopct=lambda p:f'{p:.2f}%, \n{p*len(valid_df_dis)/100:.0f} labels',startangle=90, subplots=True, ax =ax2, fontsize=5,legend=False)
plt.title('Valid Dataset\n{} Labels'.format(len(valid_df_dis), loc='center', ))
ax3 = plt.subplot(133)
test_df_dis.groupby(['Class']).count().plot(kind='pie', figsize=(10,10),autopct=lambda p:f'{p:.2f}%, \n{p*len(test_df_dis)/100:.0f} labels',startangle=90, subplots=True, ax =ax3, fontsize=5,legend=False)
plt.title('Test Dataset\n{} Labels'.format(len(test_df_dis), loc='center', ))

**CONFIGS - edit befor run**

In [None]:
cfg = get_cfg()
#model_config='mask_rcnn_R_50_C4_1x.yaml'
model_config='mask_rcnn_R_50_FPN_1x.yaml'
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/"+model_config))
#cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ('traindata',)
cfg.DATASETS.TEST = ('valid_data',)
cfg.TEST.EVAL_PERIOD = 500
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/"+model_config)  # Let training initialize from model zoo
#cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")  # Let training initialize from model zoo
#cfg.MODEL.MASK_ON = True
cfg.SOLVER.IMS_PER_BATCH = 4
#cfg.SOLVER.BASE_LR = 0.001
#cfg.SOLVER.BASE_LR = 0.00025
#cfg.SOLVER.BASE_LR = 0.00015
#cfg.SOLVER.WARMUP_ITERS = 1000
#cfg.SOLVER.MAX_ITER = 10000 #adjust up if val mAP is still rising, adjust down if overfit
cfg.SOLVER.CHECKPOINT_PERIOD= 5000
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 10000  # and a good number of iterations
cfg.SOLVER.STEPS = (6000, 7000, 8000,9000) 
cfg.SOLVER.WARMUP_ITERS = 1000




# Solver
#cfg.SOLVER.IMS_PER_BATCH = 8
#cfg.SOLVER.BASE_LR = 0.001
#cfg.SOLVER.BASE_LR = 0.00025
#cfg.SOLVER.BASE_LR = 0.0001
#cfg.SOLVER.WARMUP_ITERS = 1000
#cfg.SOLVER.MAX_ITER = 15000 #adjust up if val mAP is still rising, adjust down if overfit
#cfg.SOLVER.CHECKPOINT_PERIOD= 2500
#cfg.SOLVER.WARMUP_ITERS = 1000
#cfg.SOLVER.STEPS = (200,400,600, 800)
#cfg.SOLVER.GAMMA = 0.05
#cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
#cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE =  1024
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(train_classes)  
cfg.OUTPUT_DIR = base_dir
#cfg.INPUT.MIN_SIZE_TRAIN = 640
#cfg.INPUT.MAX_SIZE_TRAIN = 900
#cfg.INPUT.MIN_SIZE_TEST = 640
#cfg.INPUT.MAX_SIZE_TEST = 900
cfg.SOLVER.AMP.ENABLED=True



**End of configs**

In [None]:
for d in random.sample(train, 1):
    #srpath = f'{image_path}/train_data/'
    img_path = d["file_name"]
    print(img_path)
    img = cv2.imread(img_path)
    visualizer = Visualizer(img[:, :, 1:-1], metadata=train_meta, scale=2)
    out = visualizer.draw_dataset_dict(d)
    fig = plt.figure(figsize=(10,10))
    plt.imshow(out.get_image()[:, :, :])

In [None]:
# RUN
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = Trainer(cfg)

%load_ext tensorboard
%tensorboard --logdir {base_dir}

In [None]:
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
import json
import matplotlib.pyplot as plt

experiment_folder = '../data/MARSPIT_v2/MixRes/filtered_in/train'

def load_json_arr(json_path):
    lines = []
    with open(json_path, 'r') as f:
        for line in f:
            lines.append(json.loads(line))
    return lines

experiment_metrics = load_json_arr(experiment_folder + '/metrics.json')



In [None]:
train_val = []
train_iter = []
for x in experiment_metrics:
    try:
        train_val.append(x['total_loss'])
        train_iter.append(x['iteration'])
    except:
        pass

In [None]:
plt.figure(figsize=(10,5))
plt.plot(
    train_iter,
    train_val)
plt.plot(
    [x['iteration'] for x in experiment_metrics if 'validation_loss' in x], 
    [x['validation_loss'] for x in experiment_metrics if 'validation_loss' in x])
plt.legend(['total_loss', 'validation_loss'], loc='upper left', fontsize=15)
plt.xlabel('Epochs', fontsize=15)
plt.ylabel('Loss', fontsize=15)
plt.title('Total/Validation Loss', fontsize=15)

plt.savefig('../data/MARSPIT_v2/MixRes/filtered_in/train/total_val_loss.png', dpi=300)
plt.show()