### Imports

In [None]:
# COMMON LIBRARIES
import os
import cv2
import json

from datetime import datetime

# DATA SET PREPARATION AND LOADING
from detectron2.data.datasets import register_coco_instances
from detectron2.data import DatasetCatalog, MetadataCatalog

# VISUALIZATION
from detectron2.utils.visualizer import Visualizer
from detectron2.utils.visualizer import ColorMode
from google.colab.patches import cv2_imshow

# CONFIGURATION
from detectron2 import model_zoo
from detectron2.config import get_cfg

# EVALUATION
from detectron2.engine import DefaultPredictor

# TRAINING
from detectron2.engine import DefaultTrainer

### For local dataset

In [None]:
# import os
# from detectron2.data import DatasetCatalog, MetadataCatalog
# from detectron2.data.datasets import register_coco_instances

# # Paths for local dataset
# LOCAL_DATASET_DIR = "/path/to/your/local/dataset"  # Update this with the root directory of your dataset
# ANNOTATIONS_FILE_NAME = "_annotations.coco.json"

# # Paths for training, testing, and validation datasets
# TRAIN_DATASET_NAME = "local-train"
# TRAIN_IMAGES_DIR = os.path.join(LOCAL_DATASET_DIR, "train")
# TRAIN_ANNOTATIONS_PATH = os.path.join(TRAIN_IMAGES_DIR, ANNOTATIONS_FILE_NAME)

# TEST_DATASET_NAME = "local-test"
# TEST_IMAGES_DIR = os.path.join(LOCAL_DATASET_DIR, "test")
# TEST_ANNOTATIONS_PATH = os.path.join(TEST_IMAGES_DIR, ANNOTATIONS_FILE_NAME)

# VALID_DATASET_NAME = "local-valid"
# VALID_IMAGES_DIR = os.path.join(LOCAL_DATASET_DIR, "valid")
# VALID_ANNOTATIONS_PATH = os.path.join(VALID_IMAGES_DIR, ANNOTATIONS_FILE_NAME)

# # Function to register a dataset
# def register_local_dataset(name, image_dir, annotations_path):
#     # Unregister the dataset if it's already registered
#     if name in DatasetCatalog.list():
#         DatasetCatalog.remove(name)
#         MetadataCatalog.remove(name)

#     # Register the dataset
#     register_coco_instances(
#         name=name,
#         metadata={},
#         json_file=annotations_path,
#         image_root=image_dir
#     )

# # Register local datasets
# register_local_dataset(TRAIN_DATASET_NAME, TRAIN_IMAGES_DIR, TRAIN_ANNOTATIONS_PATH)
# register_local_dataset(TEST_DATASET_NAME, TEST_IMAGES_DIR, TEST_ANNOTATIONS_PATH)
# register_local_dataset(VALID_DATASET_NAME, VALID_IMAGES_DIR, VALID_ANNOTATIONS_PATH)

# # Access the metadata and dataset to verify
# train_metadata = MetadataCatalog.get(TRAIN_DATASET_NAME)
# test_metadata = MetadataCatalog.get(TEST_DATASET_NAME)
# valid_metadata = MetadataCatalog.get(VALID_DATASET_NAME)

# print(f"Training dataset registered: {TRAIN_DATASET_NAME}")
# print(f"Testing dataset registered: {TEST_DATASET_NAME}")
# print(f"Validation dataset registered: {VALID_DATASET_NAME}")


### For Dataset in roboflow

In [None]:
# Adjust category ids to start from 1
def adjust_category_ids(annotations_file_path):
    with open(annotations_file_path, 'r') as f:
        data = json.load(f)

    # Create a mapping from old category IDs to new category IDs starting from 1
    category_mapping = {category['id']: category['id'] + 1 for category in data['categories']}

    # Update categories with new ids
    for category in data['categories']:
        category['id'] = category_mapping[category['id']]

    # Update annotations to reflect new category ids
    for annotation in data['annotations']:
        old_category_id = annotation['category_id']
        annotation['category_id'] = category_mapping[old_category_id]

    # Save the updated annotations to a new file
    updated_annotations_file_path = annotations_file_path.replace(".json", "_adjusted.json")
    with open(updated_annotations_file_path, 'w') as f:
        json.dump(data, f)

    return updated_annotations_file_path

# Roboflow setup
from roboflow import Roboflow
rf = Roboflow(api_key="djEgfS7pNSTB4vcvlKkw")
project = rf.workspace("abhishek-ogefs").project("weed-8dpr6")
version = project.version(3)
dataset = version.download("coco-segmentation")

DATA_SET_NAME = dataset.name.replace(" ", "-")
ANNOTATIONS_FILE_NAME = "_annotations.coco.json"

# Paths for training, testing, and validation sets
TRAIN_DATA_SET_NAME = f"{DATA_SET_NAME}-train"
TRAIN_DATA_SET_IMAGES_DIR_PATH = os.path.join(dataset.location, "train")
TRAIN_DATA_SET_ANN_FILE_PATH = os.path.join(dataset.location, "train", ANNOTATIONS_FILE_NAME)

# Adjust category IDs in the training annotations
TRAIN_DATA_SET_ANN_FILE_PATH_ADJUSTED = adjust_category_ids(TRAIN_DATA_SET_ANN_FILE_PATH)

# Unregister the dataset from both DatasetCatalog and MetadataCatalog if it's already registered
if TRAIN_DATA_SET_NAME in DatasetCatalog.list():
    DatasetCatalog.remove(TRAIN_DATA_SET_NAME)
    MetadataCatalog.remove(TRAIN_DATA_SET_NAME)

# Register the dataset with the adjusted annotations
register_coco_instances(
    name=TRAIN_DATA_SET_NAME,
    metadata={},
    json_file=TRAIN_DATA_SET_ANN_FILE_PATH_ADJUSTED,
    image_root=TRAIN_DATA_SET_IMAGES_DIR_PATH
)

# TEST SET
TEST_DATA_SET_NAME = f"{DATA_SET_NAME}-test"
TEST_DATA_SET_IMAGES_DIR_PATH = os.path.join(dataset.location, "test")
TEST_DATA_SET_ANN_FILE_PATH = os.path.join(dataset.location, "test", ANNOTATIONS_FILE_NAME)

# Adjust category IDs in the test annotations
TEST_DATA_SET_ANN_FILE_PATH_ADJUSTED = adjust_category_ids(TEST_DATA_SET_ANN_FILE_PATH)

# Unregister the dataset from both DatasetCatalog and MetadataCatalog if it's already registered
if TEST_DATA_SET_NAME in DatasetCatalog.list():
    DatasetCatalog.remove(TEST_DATA_SET_NAME)
    MetadataCatalog.remove(TEST_DATA_SET_NAME)

# Register the test dataset with the adjusted annotations
register_coco_instances(
    name=TEST_DATA_SET_NAME,
    metadata={},
    json_file=TEST_DATA_SET_ANN_FILE_PATH_ADJUSTED,
    image_root=TEST_DATA_SET_IMAGES_DIR_PATH
)

# VALID SET
VALID_DATA_SET_NAME = f"{DATA_SET_NAME}-valid"
VALID_DATA_SET_IMAGES_DIR_PATH = os.path.join(dataset.location, "valid")
VALID_DATA_SET_ANN_FILE_PATH = os.path.join(dataset.location, "valid", ANNOTATIONS_FILE_NAME)

# Adjust category IDs in the validation annotations
VALID_DATA_SET_ANN_FILE_PATH_ADJUSTED = adjust_category_ids(VALID_DATA_SET_ANN_FILE_PATH)

# Unregister the dataset from both DatasetCatalog and MetadataCatalog if it's already registered
if VALID_DATA_SET_NAME in DatasetCatalog.list():
    DatasetCatalog.remove(VALID_DATA_SET_NAME)
    MetadataCatalog.remove(VALID_DATA_SET_NAME)

# Register the validation dataset with the adjusted annotations
register_coco_instances(
    name=VALID_DATA_SET_NAME,
    metadata={},
    json_file=VALID_DATA_SET_ANN_FILE_PATH_ADJUSTED,
    image_root=VALID_DATA_SET_IMAGES_DIR_PATH
)

loading Roboflow workspace...
loading Roboflow project...


In [None]:
[
    data_set
    for data_set
    in MetadataCatalog.list()
    if data_set.startswith(DATA_SET_NAME)
]

['Weed-train', 'Weed-test', 'Weed-valid']

**TRAINING**

In [None]:
def get_weed_detection_config():
    cfg = get_cfg()

    # Base Configuration
    ARCHITECTURE = "mask_rcnn_R_50_FPN_3x"  # Changed to ResNet-50 for memory efficiency
    CONFIG_FILE_PATH = f"COCO-InstanceSegmentation/{ARCHITECTURE}.yaml"
    cfg.merge_from_file(model_zoo.get_config_file(CONFIG_FILE_PATH))
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(CONFIG_FILE_PATH)

    # Dataset Configuration
    cfg.DATASETS.TRAIN = ("Weed-train",)
    cfg.DATASETS.TEST = ("Weed-test",)
    cfg.DATASETS.VAL = ("Weed-valid",)  # Adding validation dataset explicitly

    # Model Architecture
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4  # Ensure this matches your number of classes
    cfg.MODEL.MASK_ON = True  # Enable mask predictions
    cfg.MODEL.DEVICE = "cuda"  # Use GPU for faster processing

    # Training Hyperparameters
    cfg.SOLVER.IMS_PER_BATCH = 4  # Reduced batch size to fit in memory
    cfg.SOLVER.BASE_LR = 0.002  # Slightly adjusted for smaller batch size
    cfg.SOLVER.MAX_ITER = 15000  # Number of iterations for training
    cfg.SOLVER.STEPS = (10000, 13000)  # Steps to reduce learning rate
    cfg.SOLVER.GAMMA = 0.1  # LR decay factor
    cfg.SOLVER.WARMUP_FACTOR = 1.0 / 1000  # Gradual warm-up
    cfg.SOLVER.WARMUP_ITERS = 1000
    cfg.SOLVER.WARMUP_METHOD = "linear"
    cfg.SOLVER.WEIGHT_DECAY = 0.0001
    cfg.SOLVER.MOMENTUM = 0.9

    # ROI Head Configuration
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128  # Adjusted for memory usage
    cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.5  # Balanced sampling of positive/negative samples
    cfg.MODEL.ROI_HEADS.NMS_THRESH_TEST = 0.5
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4  # Lower threshold for detection confidence

    # RPN Configuration
    cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE = 128  # Reduced for memory efficiency
    cfg.MODEL.RPN.POSITIVE_FRACTION = 0.5  # Balanced sampling
    cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 10000  # Slightly reduced proposals for training
    cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 1500  # Reduced post-NMS proposals for training
    cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 5000  # Reduced pre-NMS proposals for testing
    cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 800  # Reduced post-NMS proposals for testing
    cfg.MODEL.RPN.NMS_THRESH = 0.7  # Standard NMS threshold

    # Anchor Generator
    cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[32, 64, 128, 256, 512]]  # Adjusted anchor sizes
    cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.5, 1.0, 2.0]]  # Standard aspect ratios

    # Loss Weights
    cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_LOSS_WEIGHT = 1.0  # Default loss weights
    cfg.MODEL.ROI_MASK_HEAD.LOSS_WEIGHT = 1.0
    cfg.MODEL.ROI_HEADS.CLS_LOSS_WEIGHT = 1.0

    # Input Configuration
    cfg.INPUT.MIN_SIZE_TRAIN = (640, 672, 704, 736, 768, 800)  # Adjusted input size
    cfg.INPUT.MAX_SIZE_TRAIN = 1200  # Reduced maximum input size for training
    cfg.INPUT.MIN_SIZE_TEST = 800
    cfg.INPUT.MAX_SIZE_TEST = 1200  # Adjusted for testing
    cfg.INPUT.MASK_FORMAT = "bitmask"  # Consistent with Detectron2 requirements

    # Data Augmentation
    cfg.INPUT.RANDOM_FLIP = "horizontal"  # Horizontal flip as augmentation

    # Dataloader
    cfg.DATALOADER.NUM_WORKERS = 4  # Adjusted for more efficient data loading
    cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = True  # Ensure only valid annotations are used

    # Evaluation
    cfg.TEST.EVAL_PERIOD = 500  # Evaluate every 500 iterations
    cfg.TEST.DETECTIONS_PER_IMAGE = 100  # Maximum detections per image

    # Enable Test-Time Augmentation (TTA)
    cfg.TEST.AUG.ENABLED = True
    cfg.TEST.AUG.MIN_SIZES = (400, 500, 600, 700, 800)  # Variety of scales
    cfg.TEST.AUG.MAX_SIZE = 1200  # Adjusted maximum size for testing
    cfg.TEST.AUG.FLIP = True  # Flip testing enabled

    # Mixed Precision
    cfg.SOLVER.MIXED_PRECISION = True  # Enable mixed precision training for memory optimization

    return cfg

# Usage
cfg = get_weed_detection_config()


In [None]:
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=True)
trainer.train()

### Tensorflow training curves

In [None]:
# Look at training curves in tensorboard:
%load_ext tensorboard
%tensorboard --logdir $/content/drive/MyDrive/ColabOutputs

In [None]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7
predictor = DefaultPredictor(cfg)

### Save config

In [None]:
f = open('config.yaml', 'w')
f.write(cfg.dump())
f.close()