In [None]:
%%capture
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
print(' '.join([f"'{x}'" for x in dist.install_requires]))
print(dist)
sys.path.insert(0, os.path.abspath('./detectron2'))

In [None]:
from detectron2.utils.memory import retry_if_cuda_oom
from detectron2.utils.logger import setup_logger
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.modeling import build_model
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
import detectron2.data.transforms as T
from detectron2.data import detection_utils as utils
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader, build_detection_train_loader, DatasetMapper
from detectron2.utils.visualizer import Visualizer
from detectron2.structures import BoxMode
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm  # progress bar
import matplotlib.pyplot as plt
import json
import cv2
import copy
from typing import Optional


from IPython.display import FileLink
import sys
# torch
import torch

import gc

import warnings
# Ignore "future" warnings and Data-Frame-Slicing warnings.
warnings.filterwarnings('ignore')

setup_logger()

In [None]:
### Data load
torch.cuda.empty_cache()
from pathlib import Path

TRAIN_IMG_DIR = Path("/kaggle/input/banlad2599-bangla-newspaper-layout-dataset/All_Crumpled_Images")

TRAIN_COCO_PATH=Path("/kaggle/input/banlad2599-bangla-newspaper-layout-dataset/merged_coco.json")

# Training output directory
OUTPUT_DIR = Path("./output")
OUTPUT_MODEL = OUTPUT_DIR/"model_final.pth"

# Path to your pretrained model weights
PRETRAINED_PATH = Path("")

In [None]:
### Coco Annotation
from pycocotools.coco import COCO

with TRAIN_COCO_PATH.open() as f:
    train_dict = json.load(f)

train_coco_labels=COCO(annotation_file=TRAIN_COCO_PATH)

print("#### LABELS AND METADATA LOADED ####")

In [None]:
#Decisions

from datetime import datetime

# if False, model is set to `PRETRAINED_PATH` model
is_train = True

# if True, evaluate on validation dataset
is_evaluate = False

# if True, run inference on test dataset
is_inference = True

# if True and `is_train` == True, `PRETRAINED_PATH` model is trained further
is_resume_training = False

# Perform augmentation
is_augment = True

SEED = 1234

# Model path based on Decisions
MODEL_PATH = OUTPUT_MODEL if is_train else PRETRAINED_PATH

In [None]:
print("There are " + str(len(train_dict['categories'])) + " categories.\n")
# print("There are " + str(len(test_dict['images']) + len(train_dict['images'])) + " images in the dataset.")
print("There are " + str(len(train_dict['images'])) + " images in the train set.")
# print("There are " + str(len(test_dict['images'])) + " images in the test set.\n")
print("There are " + str(len(train_dict['annotations'])) + " annotations in the train set.\n")

print("We will focus on mainly categories, images and annotations.")

In [None]:
def organize_coco_data(data_dict: dict) -> tuple[list[str], list[dict], list[dict]]:
    thing_classes: list[str] = []

    # Map Category Names to IDs
    for cat in data_dict['categories']:
        thing_classes.append(cat['name'])

    # Images
    images_metadata: list[dict] = data_dict['images']

    # Convert COCO annotations to detectron2 annotations format
    data_annotations = []
    for ann in data_dict['annotations']:
        # coco format -> detectron2 format
        annot_obj = {
            # Annotation ID
            "id": ann['id'],

            # Segmentation Polygon (x, y) coordinnates
            "gt_masks": ann['segmentation'],

            # Image ID for this annotation (Which image does this annotation belong to?)
            "image_id": ann['image_id'],

            # Category Label (0: paragraph, 1: text box, 2: image, 3: table)
            "category_id": ann['category_id'],

            "x_min": ann['bbox'][0],  # left
            "y_min": ann['bbox'][1],  # top
            "x_max": ann['bbox'][0] + ann['bbox'][2],  # left+width
            "y_max": ann['bbox'][1] + ann['bbox'][3]  # top+height
        }
        data_annotations.append(annot_obj)

    return thing_classes, images_metadata, data_annotations

In [None]:
thing_classes, images_metadata, data_annotations = organize_coco_data(train_dict)

# thing_classes_test, images_metadata_test, _ = organize_coco_data(test_dict)

print(thing_classes)
# thing_classes = [cls for cls in thing_classes if cls != 'newspaper']

print(len(thing_classes))

In [None]:
train_metadata = pd.DataFrame(images_metadata)
train_metadata = train_metadata[['id', 'file_name', 'width', 'height']]
train_metadata = train_metadata.rename(columns={"id": "image_id"})
print("train_metadata size=", len(train_metadata))
train_metadata.head(5)

In [None]:
train_annot_df = pd.DataFrame(data_annotations)
print("train_annot_df size=", len(train_annot_df))
train_annot_df.head(5)

In [None]:
import pandas as pd

# Assuming the provided data is stored in a DataFrame named df
# Replace df with the actual variable name if different

# Count occurrences of each category ID
category_counts = train_annot_df['category_id'].value_counts()

# Print the result
print("Category ID\tNumber of Instances")
for category_id, count in category_counts.items():
    print(f"{category_id}\t\t{count}")

In [None]:
#Formatting Data for detectron2
def convert_coco_to_detectron2_format(
    imgdir: Path,
    metadata_df: pd.DataFrame,
    annot_df: Optional[pd.DataFrame] = None,
    target_indices: Optional[np.ndarray] = None,
):

    dataset_dicts = []
    for _, train_meta_row in tqdm(metadata_df.iterrows(), total=len(metadata_df)):
        # Iterate over each image
        image_id, filename, width, height = train_meta_row.values

        annotations = []
        
        # If train/validation data, then there will be annotations
        if annot_df is not None:
            for _, ann in annot_df.query("image_id == @image_id").iterrows():
                # Get annotations of current iteration's image
                class_id = ann["category_id"]
                gt_masks = ann["gt_masks"]
                bbox_resized = [
                    float(ann["x_min"]),
                    float(ann["y_min"]),
                    float(ann["x_max"]),
                    float(ann["y_max"]),
                ]

                annotation = {
                    "bbox": bbox_resized,
                    "bbox_mode": BoxMode.XYXY_ABS,
                    "segmentation": gt_masks,
                    "category_id": class_id,
                }

                annotations.append(annotation)
        # coco format -> detectron2 format dict
        record = {
            "file_name": str(imgdir/filename),
            "image_id": image_id,
            "width": width,
            "height": height,
            "annotations": annotations
        }

        dataset_dicts.append(record)

    if target_indices is not None:
        dataset_dicts = [dataset_dicts[i] for i in target_indices]

    return dataset_dicts

In [None]:
def custom_mapper(dataset_dict):
    dataset_dict = copy.deepcopy(dataset_dict)
    image = utils.read_image(dataset_dict["file_name"], format="BGR")

    transform_list = [T.RandomBrightness(0.9, 1.0),
                      T.RandomContrast(0.8,1.4),
#                       T.RandomFlip(prob=0.5, horizontal=False, vertical=True)
                        T.Resize((800,800))
                      #T.RandomFlip(prob=0.5, horizontal=True, vertical=False)
                      ]
    image, transforms = T.apply_transform_gens(transform_list, image)

    dataset_dict["image"] = torch.as_tensor(
        image.transpose(2, 0, 1).astype("float32"))

    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
    instances = utils.annotations_to_instances(annos, image.shape[:2])

    dataset_dict["instances"] = utils.filter_empty_instances(instances)

    return dataset_dict

In [None]:
class AugTrainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=custom_mapper)

In [None]:
TRAIN_SPLIT = 0.95

In [None]:
n_dataset = len(train_metadata)
n_train = int(n_dataset * TRAIN_SPLIT)
print("n_dataset", n_dataset, "n_train", n_train, "n_val", n_dataset-n_train)

np.random.seed(1234)

inds = np.random.permutation(n_dataset)
train_inds, valid_inds = inds[:n_train], inds[n_train:]

In [None]:
DatasetCatalog.clear()

In [None]:
#Registering and Loading Data for detectron2
DATA_REGISTER_TRAINING = "badlad_train"
DATA_REGISTER_VALID    = "badlad_valid"
DATA_REGISTER_TEST     = "badlad_test"

In [None]:
# Register Training data
if is_train:
    DatasetCatalog.register(
        DATA_REGISTER_TRAINING,
        lambda: convert_coco_to_detectron2_format(
            TRAIN_IMG_DIR,
            train_metadata,
            train_annot_df,
            target_indices=train_inds,
        ),
    )

    # Set Training data categories
    MetadataCatalog.get(DATA_REGISTER_TRAINING).set(thing_classes=thing_classes)

    dataset_dicts_train = DatasetCatalog.get(DATA_REGISTER_TRAINING)
    metadata_dicts_train = MetadataCatalog.get(DATA_REGISTER_TRAINING)

    print("dicts training size=", len(dataset_dicts_train))
    print("################")

In [None]:
# Register Validation data
if is_train or is_evaluate:
    DatasetCatalog.register(
        DATA_REGISTER_VALID,
        lambda: convert_coco_to_detectron2_format(
            TRAIN_IMG_DIR,
            train_metadata,
            train_annot_df,
            target_indices=valid_inds,
        ),
    )

    # Set Validation data categories
    MetadataCatalog.get(DATA_REGISTER_VALID).set(thing_classes=thing_classes)

    dataset_dicts_valid = DatasetCatalog.get(DATA_REGISTER_VALID)
    metadata_dicts_valid = MetadataCatalog.get(DATA_REGISTER_VALID)

    print("dicts valid size=", len(dataset_dicts_valid))
    print("################")

In [None]:
# !pip install gdown
!gdown 1XsjWuZjnQFxrYu9hgWJ1_qIf22q9P2EX

In [None]:
PRETRAINED_PATH=Path("/kaggle/working/model_final.pth")

In [None]:
if is_train:
    cfg = get_cfg()

    # config_name = "COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"
    config_name = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"

    cfg.merge_from_file(model_zoo.get_config_file(config_name))

    cfg.DATASETS.TRAIN = (DATA_REGISTER_TRAINING,)
#     cfg.DATASETS.TEST = (DATA_REGISTER_VALID,)

    # to evaluate during training, you have to implement `build_evaluator()` method of the trainer.
    # https://github.com/facebookresearch/detectron2/blob/94113be6e12db36b8c7601e13747587f19ec92fe/detectron2/engine/defaults.py#L561
    # cfg.TEST.EVAL_PERIOD = 500
    cfg.DATALOADER.NUM_WORKERS = 2

    # cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config_name)
    if (is_resume_training):
        print("#### SETTING PRETRAINED WEIGHTS TO RESUME TRAINING ####")
        cfg.MODEL.WEIGHTS = str(PRETRAINED_PATH)
    else:
        print("#### TRAINING MODEL FROM SCRATCH ####")

    cfg.SOLVER.AMP.ENABLED = True
    cfg.SOLVER.IMS_PER_BATCH = 4
    cfg.SOLVER.BASE_LR = 0.001

    cfg.SOLVER.WARMUP_ITERS = 5

    # Maximum number of iterations
    cfg.SOLVER.MAX_ITER = 30000

    # cfg.SOLVER.STEPS = (500, 1000) # must be less than MAX_ITER

    cfg.SOLVER.GAMMA = 0.09
    # Small value == Frequent save need a lot of storage.
    cfg.SOLVER.CHECKPOINT_PERIOD = 10000
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 6

    # Create Output Directory
    cfg.OUTPUT_DIR = str(OUTPUT_DIR)
    print("creating cfg.OUTPUT_DIR -> ", cfg.OUTPUT_DIR)
    OUTPUT_DIR.mkdir(exist_ok=True)

In [None]:
if is_train:
    trainer = AugTrainer(cfg)
        
    trainer.resume_or_load(resume=is_resume_training)

    trainer.train()
    
    print("#### TRAINING COMPLETE ####")
    _ = trainer.model.train(False)  # turn off training
    
    FileLink(str(OUTPUT_MODEL))

In [None]:
# !pip install gdown
# !gdown 1ulMedVIsY-WjaH3_DLnipfMngNx8JT4v
!gdown 16jl3AXYa91c3OXr3EoSV-ymQJ4ZpGhSx

In [None]:
inf_cfg = get_cfg()

config_name = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"

inf_cfg.merge_from_file(model_zoo.get_config_file(config_name))
inf_cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
inf_cfg.MODEL.ROI_HEADS.NUM_CLASSES = 6
inf_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
inf_cfg.MODEL.DEVICE = "cuda"

inf_cfg.DATALOADER.NUM_WORKERS = 2  # lower this if CUDA overflow occurs
inf_cfg.MODEL.WEIGHTS = str("/kaggle/working/output/model_final.pth")

In [None]:
BATCH = 1  # lower if CUDA overflow occurs
test_loader = build_detection_test_loader(inf_cfg, DATA_REGISTER_VALID, batch_size=BATCH)

In [None]:
#Test Data Inference
#Building Inference Model

def rebuild_model():
    model = build_model(inf_cfg)
    _ = DetectionCheckpointer(model).load(inf_cfg.MODEL.WEIGHTS)
    return model

In [None]:
model = rebuild_model()


In [None]:
def build_model_from_cfg(cfg):
    model = build_model(cfg)
    _ = DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
    return model

In [None]:
print("### EVALUATING ON VALIDATION DATA ####")

# trained model weights
evaluation_model = build_model_from_cfg(inf_cfg)

evaluator = COCOEvaluator(
    DATA_REGISTER_VALID, inf_cfg, False, output_dir=inf_cfg.OUTPUT_DIR, use_fast_impl=True
)

test_loader = build_detection_test_loader(inf_cfg, DATA_REGISTER_VALID,mapper=custom_mapper)

In [None]:
metrics_df = pd.read_json(
        "/kaggle/working/output/metrics.json", orient="records", lines=True)
mdf = metrics_df.sort_values("iteration")
print(mdf.head(10).T)

# Plot loss
fig, ax = plt.subplots()

mdf1 = mdf[~mdf["total_loss"].isna()]
ax.plot(mdf1["iteration"], mdf1["total_loss"], c="C0", label="train")

if "validation_loss" in mdf.columns:
    mdf2 = mdf[~mdf["validation_loss"].isna()]
    ax.plot(mdf2["iteration"], mdf2["validation_loss"],
            c="C1", label="validation")

ax.legend()
ax.set_title("Loss curve")
plt.show()

# Plot Accuracy
fig, ax = plt.subplots()

mdf1 = mdf[~mdf["fast_rcnn/cls_accuracy"].isna()]
ax.plot(mdf1["iteration"], mdf1["fast_rcnn/cls_accuracy"],
        c="C0", label="train")

ax.legend()
ax.set_title("Accuracy curve")
plt.show()

# Plot Bounding Box regressor loss
fig, ax = plt.subplots()

mdf1 = mdf[~mdf["loss_box_reg"].isna()]
ax.plot(mdf1["iteration"], mdf1["loss_box_reg"], c="C0", label="train")

ax.legend()
ax.set_title("loss_box_reg")
plt.show()

In [None]:
ACCEPTANCE_THRESHOLD = 0.5  # for all categories
print(f"#### MODEL: {inf_cfg.MODEL.WEIGHTS} FOR INFERENCE ####")


In [None]:
predictor = DefaultPredictor(inf_cfg)


In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from detectron2.utils.visualizer import Visualizer

fig, ax = plt.subplots(4, 2, figsize=(20, 40))  # Updated subplot dimensions
indices = [ax[i // 2][i % 2] for i in range(8)]  # Adjusted indices for 8 plots

# Show some qualitative results by predicting on test set images
NUM_TEST_SAMPLES = 8  # Updated number of test samples
samples = np.random.choice(dataset_dicts_valid, NUM_TEST_SAMPLES)

for i, sample in enumerate(samples):
    img = cv2.imread(sample["file_name"])
    outputs = predictor(img)
    visualizer = Visualizer(img, metadata=metadata_dicts_valid, scale=0.5)
    visualizer = visualizer.draw_instance_predictions(outputs["instances"].to("cpu"))
    display_img = visualizer.get_image()[:, :, ::-1]
    indices[i].grid(False)
    indices[i].imshow(display_img)

plt.tight_layout()
plt.show()

In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt
from detectron2.utils.visualizer import Visualizer

fig, ax = plt.subplots(4, 2, figsize=(20, 40))  # Updated subplot dimensions
indices = [ax[i // 2][i % 2] for i in range(8)]  # Adjusted indices for 8 plots

# Show some qualitative results by predicting on test set images
NUM_TEST_SAMPLES = 8  # Updated number of test samples
samples = np.random.choice(dataset_dicts_valid, NUM_TEST_SAMPLES)

for i, sample in enumerate(samples):
    img = cv2.imread(sample["file_name"])
    outputs = predictor(img)
    visualizer = Visualizer(img, metadata=metadata_dicts_valid, scale=0.5)
    visualizer = visualizer.draw_instance_predictions(outputs["instances"].to("cpu"))
    display_img = visualizer.get_image()[:, :, ::-1]
    indices[i].grid(False)
    indices[i].imshow(display_img)

plt.tight_layout()
plt.show()

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
results = inference_on_dataset(
    evaluation_model, test_loader, evaluator=evaluator
)

In [None]:
!zip -r /kaggle/working/output1.zip /kaggle/working/output


In [None]:
!rm -rf /kaggle/working/output/model_0019999.pth /kaggle/working/output/model_0029999.pth /kaggle/working/output/model_0009999.pth