In [None]:
import os
import json
import cv2
from pathlib import Path
import matplotlib.pyplot as plt
import shutil
import random
import torch
import pandas as pd
import seaborn as sns

In [None]:
COCO_PATH = Path("/kaggle/input/dlsprint2/badlad/labels/coco_format/train/badlad-train-coco.json")
with open(COCO_PATH, "r") as f:
    coco_data = json.load(f)

In [None]:
td=pd.DataFrame(coco_data['images'])
print(len(td))
td=pd.DataFrame(coco_data['annotations'])
print(len(td))

In [None]:
#image Aspect Ratio
train_images = pd.DataFrame( coco_data["images"])

train_images['aspect_ratio']=train_images['width']/train_images['height']

train_images.rename(columns={"id":"image_id"}, inplace=True)

train_images_aspect_ratio_count = train_images.groupby('aspect_ratio', as_index=False)['image_id'].nunique()
train_images_aspect_ratio_count.rename(columns={'image_id':'image_count'}, inplace=True)
train_images_aspect_ratio_count.sort_values(by='image_count', ascending=False,inplace=True)

plt.figure(figsize=(15, 6))
plt.title('Area vs Image Count Line Plot')
sns.lineplot(x=train_images_aspect_ratio_count['aspect_ratio'], y = train_images_aspect_ratio_count['image_count'])

In [None]:
#bbox Aspect Ratio
train_annotations = pd.DataFrame(coco_data['annotations'])

train_annotations.rename(columns={"id":"annotation_id"}, inplace=True)
bbox_aspect_ratio=[]
for idx in train_annotations.index:
    bbox_aspect_ratio.append(train_annotations['bbox'][idx][3]/train_annotations['bbox'][idx][2])
train_annotations['bbox_aspect_ratio']=bbox_aspect_ratio


bbox_aspect_ratio_count = train_annotations.groupby('bbox_aspect_ratio', as_index=False)['annotation_id'].count()
bbox_aspect_ratio_count.rename(columns={'annotation_id':'annotation_count'}, inplace=True)

plt.figure(figsize=(15, 6))
plt.title('bbox aspect ratio vs Annotation Count Line Plot')
sns.lineplot(x=bbox_aspect_ratio_count['bbox_aspect_ratio'], y = bbox_aspect_ratio_count['annotation_count'])

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# low_aspect_ratio_threshold = 0.1
high_aspect_ratio_threshold = 2.5
high_bbox_aspect_ratio_threshold= 15.0

In [None]:
images_to_keep = []
images_to_remove = []
anno_to_keep = []
anno_to_remove = []

In [None]:
for image_info in coco_data["images"]:
    image_id = image_info["id"]
    image_aspect_count = image_info["width"] / image_info["height"]
    
    image_aspect_count_tensor = torch.tensor(image_aspect_count, dtype=torch.float32, device=device)

    if (image_aspect_count_tensor > high_aspect_ratio_threshold):
        images_to_remove.append(image_id)
    else:
        images_to_keep.append(image_id)


In [None]:
for anno in coco_data["annotations"]:
    anno_id = anno['id']
    bbox_aspect_ratio = anno['bbox'][3]/ anno['bbox'][2]
    bbox_aspect_ratio_tensor = torch.tensor( bbox_aspect_ratio, dtype=torch.float32, device=device)
    if bbox_aspect_ratio_tensor > high_bbox_aspect_ratio_threshold:
        anno_to_remove.append(anno_id)
    else:
        anno_to_keep.append(anno_id)

In [None]:
# print(len(anno_to_remove))
# print(anno_to_remove[0:100])

In [None]:
image_category_counts = {}
for image_id in images_to_keep:
        image_category_counts[image_id] = [0, 0, 0, 0]

In [None]:
for annotation in coco_data['annotations']:
    image_id = annotation['image_id']
    category_id = annotation['category_id']
    if image_id in images_to_keep:
        image_category_counts[image_id][category_id] += 1

In [None]:
df_image_category_counts = pd.DataFrame.from_dict(image_category_counts, orient='index', columns=['Paragraph', 'Text-Box', 'Image', 'Table'])

In [None]:
# print(df_image_category_counts)

In [None]:
filtered_images = df_image_category_counts[
    (df_image_category_counts['Paragraph'] != 0) & (df_image_category_counts['Text-Box'] !=0) & (df_image_category_counts['Image'] ==0) & (df_image_category_counts['Table'] ==0)
]

In [None]:
# print(filtered_images)

In [None]:
filtered_images_final = filtered_images[
    ((filtered_images['Paragraph'] > 100) | (filtered_images['Text-Box'] > 200))
]

In [None]:
# print(filtered_images_final)

In [None]:
# print(len(filtered_images))
# print(len(filtered_images_final))
# print(list(filtered_images_final.index))

In [None]:
# print(len(images_to_remove))
# print(images_to_remove[0:100])

In [None]:
image_ids_to_delete = list(filtered_images_final.index)
image_ids_to_delete.extend(images_to_remove)

In [None]:
# print(len(image_ids_to_delete))
# print(image_ids_to_delete[0:100])

In [None]:
image_ids_to_delete=set(image_ids_to_delete)
image_ids_to_delete=list(image_ids_to_delete)

In [None]:
image_ids_to_delete=sorted(image_ids_to_delete)

In [None]:
# print(len(image_ids_to_delete))
# print(image_ids_to_delete[0:100])

In [None]:
coco_data['images'] = [image_info for image_info in coco_data['images'] if image_info['id'] not in image_ids_to_delete]
coco_data['annotations'] = [annotation for annotation in coco_data['annotations'] if (annotation['image_id'] not in image_ids_to_delete)]

In [None]:
coco_data['annotations'] = [annotation for annotation in coco_data['annotations'] if (annotation['id'] not in anno_to_remove)]

In [None]:
td=pd.DataFrame(coco_data['annotations'])
print("-----Train Anotation: ",len(td))
td=pd.DataFrame(coco_data['images'])
print("-----Train Image: ",len(td))

In [None]:
filtered_json_file_path = 'filtered_badlad-train-coco.json'
with open(filtered_json_file_path, 'w') as filtered_json_file:
    json.dump(coco_data, filtered_json_file)

In [None]:
kaggle_working_directory = '/kaggle/working/'
shutil.move(filtered_json_file_path, Path(kaggle_working_directory) / filtered_json_file_path)

In [None]:
print("Preprocessing Done")

In [None]:
%%capture
import sys, os, distutils.core
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

In [None]:
from datetime import datetime
is_train = True
is_evaluate = False
is_inference = True
is_resume_training = False
is_augment = False
SEED = 1081983

In [None]:
from pathlib import Path

TRAIN_IMG_DIR = Path("/kaggle/input/dlsprint2/badlad/images/train")
TRAIN_COCO_PATH = Path("/kaggle/working/filtered_badlad-train-coco.json")

TEST_IMG_DIR = Path("/kaggle/input/dlsprint2/badlad/images/test")
TEST_METADATA_PATH = Path("/kaggle/input/dlsprint2/badlad/badlad-test-metadata.json")

OUTPUT_DIR = Path("./output")
OUTPUT_MODEL = OUTPUT_DIR/"model_final.pth"

PRETRAINED_PATH = Path("")

In [None]:
MODEL_PATH = OUTPUT_MODEL if is_train else PRETRAINED_PATH

In [None]:
from detectron2.utils.memory import retry_if_cuda_oom
from detectron2.utils.logger import setup_logger
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.modeling import build_model
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
import detectron2.data.transforms as T
from detectron2.data import detection_utils as utils
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader, build_detection_train_loader, DatasetMapper
from detectron2.utils.visualizer import Visualizer
from detectron2.structures import BoxMode
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm 
import matplotlib.pyplot as plt
import json
import cv2
import copy
from typing import Optional

from IPython.display import FileLink
import torch
import gc

import warnings
warnings.filterwarnings('ignore')

setup_logger()

In [None]:
with TRAIN_COCO_PATH.open() as f:
    train_dict = json.load(f)

with TEST_METADATA_PATH.open() as f:
    test_dict = json.load(f)

In [None]:
def organize_coco_data(data_dict: dict) -> tuple[list[str], list[dict], list[dict]]:
    thing_classes: list[str] = []

    for cat in data_dict['categories']:
        thing_classes.append(cat['name'])

    images_metadata: list[dict] = data_dict['images']
    data_annotations = []
    for ann in data_dict['annotations']:
        annot_obj = {
            "id": ann['id'],
            "gt_masks": ann['segmentation'],
            "image_id": ann['image_id'],
            "category_id": ann['category_id'],

            "x_min": ann['bbox'][0], 
            "y_min": ann['bbox'][1],  
            "x_max": ann['bbox'][0] + ann['bbox'][2], 
            "y_max": ann['bbox'][1] + ann['bbox'][3] 
        }
        data_annotations.append(annot_obj)
    return thing_classes, images_metadata, data_annotations

In [None]:
thing_classes, images_metadata, data_annotations = organize_coco_data(
    train_dict
)

thing_classes_test, images_metadata_test, _ = organize_coco_data(
    test_dict
)

In [None]:
print(thing_classes)

In [None]:
train_metadata = pd.DataFrame(images_metadata)
train_metadata = train_metadata[['id', 'file_name', 'width', 'height']]
train_metadata = train_metadata.rename(columns={"id": "image_id"})
print("train_metadata size=", len(train_metadata))
train_metadata.head(5)

In [None]:
train_annot_df = pd.DataFrame(data_annotations)
print("train_annot_df size=", len(train_annot_df))
train_annot_df.head(5)

In [None]:
test_metadata = pd.DataFrame(images_metadata_test)
test_metadata = test_metadata[['id', 'file_name', 'width', 'height']]
test_metadata = test_metadata.rename(columns={"id": "image_id"})
print("test_metadata size=", len(test_metadata))
test_metadata.head(5)

In [None]:
TRAIN_SPLIT = 0.90

In [None]:
n_dataset = len(train_metadata)
n_train = int(n_dataset * TRAIN_SPLIT)
print("n_dataset", n_dataset, "n_train", n_train, "n_val", n_dataset-n_train)

np.random.seed(SEED)

inds = np.random.permutation(n_dataset)
train_inds, valid_inds = inds[:n_train], inds[n_train:]

In [None]:
def convert_coco_to_detectron2_format(
    imgdir: Path,
    metadata_df: pd.DataFrame,
    annot_df: Optional[pd.DataFrame] = None,
    target_indices: Optional[np.ndarray] = None,
):

    dataset_dicts = []
    for _, train_meta_row in tqdm(metadata_df.iterrows(), total=len(metadata_df)):
        image_id, filename, width, height = train_meta_row.values

        annotations = []
        if annot_df is not None:
            for _, ann in annot_df.query("image_id == @image_id").iterrows():
                class_id = ann["category_id"]
                gt_masks = ann["gt_masks"]
                bbox_resized = [
                    float(ann["x_min"]),
                    float(ann["y_min"]),
                    float(ann["x_max"]),
                    float(ann["y_max"]),
                ]

                annotation = {
                    "bbox": bbox_resized,
                    "bbox_mode": BoxMode.XYXY_ABS,
                    "segmentation": gt_masks,
                    "category_id": class_id,
                }

                annotations.append(annotation)
        record = {
            "file_name": str(imgdir/filename),
            "image_id": image_id,
            "width": width,
            "height": height,
            "annotations": annotations
        }

        dataset_dicts.append(record)

    if target_indices is not None:
        dataset_dicts = [dataset_dicts[i] for i in target_indices]

    return dataset_dicts

In [None]:
DATA_REGISTER_TRAINING = "badlad_train"
DATA_REGISTER_VALID    = "badlad_valid"
DATA_REGISTER_TEST     = "badlad_test"

In [None]:
if is_train:
    DatasetCatalog.register(
        DATA_REGISTER_TRAINING,
        lambda: convert_coco_to_detectron2_format(
            TRAIN_IMG_DIR,
            train_metadata,
            train_annot_df,
            target_indices=train_inds,
        ),
    )

    MetadataCatalog.get(DATA_REGISTER_TRAINING).set(thing_classes=thing_classes)

    dataset_dicts_train = DatasetCatalog.get(DATA_REGISTER_TRAINING)
    metadata_dicts_train = MetadataCatalog.get(DATA_REGISTER_TRAINING)

    print("dicts training size=", len(dataset_dicts_train))
    print("-----------------------")

In [None]:
if is_train or is_evaluate:
    DatasetCatalog.register(
        DATA_REGISTER_VALID,
        lambda: convert_coco_to_detectron2_format(
            TRAIN_IMG_DIR,
            train_metadata,
            train_annot_df,
            target_indices=valid_inds,
        ),
    )
    
    MetadataCatalog.get(DATA_REGISTER_VALID).set(thing_classes=thing_classes)

    dataset_dicts_valid = DatasetCatalog.get(DATA_REGISTER_VALID)
    metadata_dicts_valid = MetadataCatalog.get(DATA_REGISTER_VALID)

    print("dicts valid size=", len(dataset_dicts_valid))
    print("-------------------------")

In [None]:
DatasetCatalog.register(
    DATA_REGISTER_TEST,
    lambda: convert_coco_to_detectron2_format(
        TEST_IMG_DIR,
        test_metadata,
    )
)

MetadataCatalog.get(DATA_REGISTER_TEST).set(
    thing_classes=thing_classes_test
)

dataset_dicts_test = DatasetCatalog.get(DATA_REGISTER_TEST)
metadata_dicts_test = MetadataCatalog.get(DATA_REGISTER_TEST)
print("dicts test size=", len(dataset_dicts_test))
print("-------------------------")

In [None]:
print("---------Data is Ready-----------")

In [None]:
def custom_mapper(dataset_dict):
    dataset_dict = copy.deepcopy(dataset_dict)
    image = utils.read_image(dataset_dict["file_name"], format="BGR")

    transform_list = [
                        T.RandomRotation(angle=[-7, 7]),
#                         T.RandomScale(scale=(0.2, 1.2)),
                        T.RandomFlip(prob=0.5, horizontal=False, vertical=True),
                        T.RandomFlip(prob=0.5, horizontal=True, vertical=False),
#                         T.RandomBrightness(0.5, 1.2),
#                         T.RandomContrast(0.5, 1.2),
#                         T.RandomSaturation(0.5, 1.2),
#                         T.RandomHue(0.1)
                    ]    
                    
    image, transforms = T.apply_transform_gens(transform_list, image)

    dataset_dict["image"] = torch.as_tensor(
        image.transpose(2, 0, 1).astype("float32"))

    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
    instances = utils.annotations_to_instances(annos, image.shape[:2])

    dataset_dict["instances"] = utils.filter_empty_instances(instances)

    return dataset_dict

In [None]:
class AugTrainer(DefaultTrainer):
    @classmethod
    def build_train_loader(cls, cfg):
        return build_detection_train_loader(cfg, mapper=custom_mapper)

In [None]:
if is_train:
    cfg = get_cfg()
    
    config_name = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"

    cfg.merge_from_file(model_zoo.get_config_file(config_name))

    cfg.DATASETS.TRAIN = (DATA_REGISTER_TRAINING,)
    cfg.DATASETS.TEST = (DATA_REGISTER_VALID,)

    cfg.DATALOADER.NUM_WORKERS = 2

    if (is_resume_training):
        print("#### SETTING PRETRAINED WEIGHTS TO RESUME TRAINING ####")
        cfg.MODEL.WEIGHTS = str(PRETRAINED_PATH)
    else:
        print("#### TRAINING MODEL FROM SCRATCH ####")

    cfg.SOLVER.AMP.ENABLED = True
    cfg.SOLVER.IMS_PER_BATCH = 8
    cfg.SOLVER.BASE_LR = 0.01
    cfg.SOLVER.WARMUP_ITERS = 20
    cfg.SOLVER.MAX_ITER = 18000
    cfg.SOLVER.GAMMA = 0.005
    cfg.SOLVER.CHECKPOINT_PERIOD = 500
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 180
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4

    cfg.OUTPUT_DIR = str(OUTPUT_DIR)
    print("creating cfg.OUTPUT_DIR -> ", cfg.OUTPUT_DIR)
    OUTPUT_DIR.mkdir(exist_ok=True)

In [None]:
if is_train:
    trainer = DefaultTrainer(cfg) if not is_augment else AugTrainer(cfg)
        
    trainer.resume_or_load(resume=is_resume_training)

    trainer.train()
    
    print("----Training Finish----")
    _ = trainer.model.train(False)
    
    FileLink(str(OUTPUT_MODEL))

In [None]:
if is_train:
    metrics_df = pd.read_json(
        OUTPUT_DIR/"metrics.json", orient="records", lines=True
    )
    mdf = metrics_df.sort_values("iteration")
    print(mdf.head(10).T)
    fig, ax = plt.subplots()

    mdf1 = mdf[~mdf["total_loss"].isna()]
    ax.plot(mdf1["iteration"], mdf1["total_loss"], c="C0", label="train")

    if "validation_loss" in mdf.columns:
        mdf2 = mdf[~mdf["validation_loss"].isna()]
        ax.plot(mdf2["iteration"], mdf2["validation_loss"],
                c="C1", label="validation")

    ax.legend()
    ax.set_title("Loss curve")
    plt.show()
    fig, ax = plt.subplots()

    mdf1 = mdf[~mdf["fast_rcnn/cls_accuracy"].isna()]
    ax.plot(mdf1["iteration"], mdf1["fast_rcnn/cls_accuracy"],
            c="C0", label="train")

    ax.legend()
    ax.set_title("Accuracy curve")
    plt.show()
    fig, ax = plt.subplots()

    mdf1 = mdf[~mdf["loss_box_reg"].isna()]
    ax.plot(mdf1["iteration"], mdf1["loss_box_reg"], c="C0", label="train")

    ax.legend()
    ax.set_title("loss_box_reg")
    plt.show()

In [None]:
if is_evaluate:
    print("------Evaluating on Validation data-----------")
    cfg.MODEL.WEIGHTS = str(MODEL_PATH)
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

    cfg.SOLVER.IMS_PER_BATCH = 64

    evaluator = COCOEvaluator(
        DATA_REGISTER_VALID, cfg, False, output_dir=cfg.OUTPUT_DIR, use_fast_impl=True
    )

    val_loader = build_detection_test_loader(cfg, DATA_REGISTER_VALID)

    results = inference_on_dataset(
        trainer.model, val_loader, evaluator=evaluator
    )

In [None]:
inf_cfg = get_cfg()

config_name = "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"

inf_cfg.merge_from_file(model_zoo.get_config_file(config_name))
inf_cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 180
inf_cfg.MODEL.ROI_HEADS.NUM_CLASSES = 4
inf_cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
inf_cfg.MODEL.DEVICE = "cuda"

inf_cfg.DATALOADER.NUM_WORKERS = 2 
inf_cfg.MODEL.WEIGHTS = str(MODEL_PATH)

In [None]:
BATCH = 10
test_loader = build_detection_test_loader(inf_cfg, DATA_REGISTER_TEST, batch_size=BATCH)

In [None]:
ACCEPTANCE_THRESHOLD = 0.6

In [None]:
print(f"------------ MODEL: {inf_cfg.MODEL.WEIGHTS} FOR INFERENCE --------------")

In [None]:
predictor = DefaultPredictor(inf_cfg)

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(20, 20))
indices = [ax[0][0], ax[1][0], ax[0][1], ax[1][1]]

# Show some qualitative results by predicting on test set images
NUM_TEST_SAMPLES = 4
samples = np.random.choice(dataset_dicts_test, NUM_TEST_SAMPLES)

for i, sample in enumerate(samples):
    img = cv2.imread(sample["file_name"])
    outputs = predictor(img)
    visualizer = Visualizer(img, metadata=metadata_dicts_test, scale=0.5,)
    visualizer = visualizer.draw_instance_predictions(
        outputs["instances"].to("cpu")
    )
    display_img = visualizer.get_image()[:, :, ::-1]
    indices[i].grid(False)
    indices[i].imshow(display_img)

In [None]:
def rebuild_model():
    model = build_model(inf_cfg)
    _ = DetectionCheckpointer(model).load(inf_cfg.MODEL.WEIGHTS)
    return model

In [None]:
model = rebuild_model()

In [None]:
!export LRU_CACHE_CAPACITY=1
!export 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512'

In [None]:
vars_to_del = ["trainer", "predictor", "outputs"]

for v in vars_to_del:
    if v in globals():
        print(f"Deleting {v}")
        del globals()[v]
    elif v in locals():
        print(f"Deleting {v}")
        del locals()[v]

In [None]:
def rle_encode(mask):
    pixels = mask.T.flatten()
    use_padding = False
    if pixels[0] or pixels[-1]:
        use_padding = True
        pixel_padded = np.zeros([len(pixels) + 2], dtype=pixels.dtype)
        pixel_padded[1:-1] = pixels
        pixels = pixel_padded
    rle = np.where(pixels[1:] != pixels[:-1])[0] + 2
    if use_padding:
        rle = rle - 1
    rle[1::2] = rle[1::2] - rle[:-1:2]
    return ' '.join(str(x) for x in rle)

In [None]:
@retry_if_cuda_oom
def get_masks(prediction):
    # get masks for each category
    take = prediction.scores >= ACCEPTANCE_THRESHOLD
    pred_masks = (prediction.pred_masks[take] != 0)
    pred_classes = prediction.pred_classes[take]
  
    rles = []
    for cat in range(len(thing_classes)):
        pred_mask = pred_masks[pred_classes == cat]
        
        # pred_mask = retry_if_cuda_oom(torch.any)(pred_mask, dim=0)
        pred_mask = torch.any(pred_mask, dim=0)
        rles.append(rle_encode(pred_mask.short().to("cpu").numpy()))

    return rles

In [None]:
def run_inference(data):
    results = []
    with torch.no_grad():
        outputs = model(data)
        if torch.cuda.is_available():
            torch.cuda.synchronize()

        for idx, output in enumerate(outputs):
            output = output["instances"]

            rles = get_masks(output)

            result = [
                f"{data[idx]['image_id']}_{cat},{rles[cat]}\n"
                for cat in range(len(thing_classes))
            ]

            results.extend(result)

        del outputs, output

    return results

In [None]:
print("---------Inferance Start----------")

In [None]:
torch.cuda.empty_cache()
gc.collect()

In [None]:
if is_inference:
    model.eval()
    submission_file = open("submission.csv", "w")
    submission_file.write("Id,Predicted\n")

    results: list[str] = []
    
    for i, data in enumerate(tqdm(test_loader)):
        res = run_inference(data)
        results.extend(res)
        
        if i % (500 // BATCH) == 0:
            print(f"Inference on batch {i}/{len(test_loader)} done")
            submission_file.writelines(results)
            results = []

    submission_file.writelines(results)
    submission_file.close()

In [None]:
if Path("submission.csv").exists:
    display(FileLink("submission.csv"))

In [None]:
!rm -r detectron2/