# Training model in Google Colab


## Connect to Google Drive and set up the environment = installing the required packages for Detectron2

In [None]:
# from google.colab import drive

# drive.mount("/content/drive")

In [None]:
# !python -m pip install pyyaml==5.1
# import sys, os, distutils.core
# !git clone 'https://github.com/facebookresearch/detectron2'
# dist = distutils.core.run_setup("./detectron2/setup.py")
# !python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
# sys.path.insert(0, os.path.abspath('./detectron2'))

# # !python -m pip install "git+https://github.com/facebookresearch/detectron2.git"

## Importing all necessary libraries

In [None]:
import os
import cv2, random
import matplotlib.pyplot as plt
import torch, detectron2

# !nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

In [None]:
from detectron2.utils.logger import setup_logger
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances

setup_logger()

## Loading the data

- The data is in COCO format. The register_coco_instances function registers the datasets. The MetadataCatalog.get and DatasetCatalog.get functions retrieve the metadata and the dataset respectively. The results are printed at the end.

In [None]:
train_images = "/home/james/Projects/Fenotypizace/data/annotations/Train/images"
train_json = "/home/james/Projects/Fenotypizace/data/annotations/Train/result.json"

test_images = "/home/james/Projects/Fenotypizace/data/annotations/Test/images/"
test_json = "/home/james/Projects/Fenotypizace/data/annotations/Test/result.json"

register_coco_instances("my_trainset", {}, train_json, train_images)
register_coco_instances("my_testset", {}, test_json, test_images)

metadata = MetadataCatalog.get("my_trainset")
dataset_dicts = DatasetCatalog.get("my_trainset")
metadata_test = MetadataCatalog.get("my_testset")
dataset_dicts_test = DatasetCatalog.get("my_testset")

print(f"\n{metadata}\n \n{metadata_test}")

In [None]:
for d in random.sample(dataset_dicts, 5):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    plt.figure(figsize=(15, 13))
    plt.imshow(cv2.cvtColor((vis.get_image()[:, :, ::-1]), cv2.COLOR_BGR2RGB))
    plt.show()

## Training the model

- The get_cfg function is used to create a new configuration that holds default values for configurations.
- The merge_from_file function is then used to merge the values from a YAML file that contains the pre-defined configurations for the Mask R-CNN model.
- The configuration object is then customized for the specific training task.
- THe directory for the output is created if it doesn't exist. The model is then trained using the DefaultTrainer class.

In [None]:
from detectron2.engine import DefaultTrainer
from detectron2.model_zoo import get_config

cfg = get_cfg()
cfg.merge_from_file(
    model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
)
cfg.DATASETS.TRAIN = ("my_trainset",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 4
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
    "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
)
cfg.SOLVER.IMS_PER_BATCH = 8
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 1300
cfg.SOLVER.STEPS = []
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

## Display training curves using tensorboard

In [None]:
%reload_ext tensorboard
%load_ext tensorboard
%tensorboard --logdir output

## Test sample

- cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7 sets the threshold for the prediction score. Only regions with a score above this threshold will be considered in the final prediction.
- predictor = DefaultPredictor(cfg) creates a predictor object using the defined configuration. This object can be used to make predictions on new data.
- print(inference_on_dataset(trainer.model, test_loader, test_evaluator)) performs inference on the test dataset using the trained model and the test data loader, evaluates the results using the test evaluator, and prints the results

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
cfg.DATASETS.TEST = ("my_testset",)
predictor = DefaultPredictor(cfg)

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

test_evaluator = COCOEvaluator("my_testset", cfg, False, output_dir="./output/")
test_loader = build_detection_test_loader(cfg, "my_testset")
print(inference_on_dataset(trainer.model, test_loader, test_evaluator))

## Visualize the predictions on the test sample

In [None]:
from detectron2.utils.visualizer import ColorMode

for d in random.sample(dataset_dicts_test, 12):
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(
        im[:, :, ::-1], metadata=metadata_test, scale=0.8, instance_mode=ColorMode.IMAGE
    )
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.figure(figsize=(15, 13))
    plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
    plt.show()

## Save the model on specific directory in Google Drive

In [None]:
# import shutil

# # Zdrojová cesta k souboru nebo složce
# source_path = "/content/drive/Shareddrives/KIT ML/Fenotypizace - vzchazeni/Code/Final/1. Trays segmentation/output"

# # # Cílová cesta na Google disku
# destination_path = "/content/output"

# # Kopírování souboru nebo složky
# shutil.copytree(source_path, destination_path)