In [1]:
from data_gradients.datasets.detection import COCOFormatDetectionDataset
from data_gradients.managers.detection_manager import DetectionAnalysisManager
import os
from data_gradients.managers.detection_manager import DetectionAnalysisManager

from data_gradients.feature_extractors import (
    SummaryStats,
    ImagesResolution,
    ImageColorDistribution,
    ImagesAverageBrightness,
    ImageDuplicates,
    DetectionSampleVisualization,
    DetectionClassHeatmap,
    DetectionBoundingBoxArea,
    DetectionBoundingBoxPerImageCount,
    DetectionBoundingBoxSize,
    DetectionClassFrequency,
    DetectionClassesPerImageCount,
    DetectionBoundingBoxIoU,
)


def _get_all_report_features(train_image_dir: str, valid_image_dir: str):
    """Features defined manually in order to dynamically define `ImageDuplicates(train_image_dir=..., valid_image_dir=...)`"""
    features = [
        SummaryStats(),
        ImagesResolution(),
        ImageColorDistribution(),
        ImagesAverageBrightness(),
        ImageDuplicates(train_image_dir=train_image_dir, valid_image_dir=valid_image_dir),
        DetectionSampleVisualization(n_rows=3, n_cols=4, stack_splits_vertically=True),
        DetectionClassHeatmap(n_rows=6, n_cols=2, heatmap_shape=(200, 200)),
        DetectionBoundingBoxArea(topk=30, prioritization_mode="train_val_diff"),
        DetectionBoundingBoxPerImageCount(),
        DetectionBoundingBoxSize(),
        DetectionClassFrequency(topk=30, prioritization_mode="train_val_diff"),
        DetectionClassesPerImageCount(topk=30, prioritization_mode="train_val_diff"),
        DetectionBoundingBoxIoU(num_bins=10, class_agnostic=True),
    ]
    return features

In [None]:
train_data = COCOFormatDetectionDataset(
    root_dir="/home/surajkumarsureshbhai/data-gradients/data/hygiene_v3_dataset_20_08_24", images_subdir="valid/images", annotation_file_path="coco_annotations/valid_gt_cleaned.json"
)
val_data = COCOFormatDetectionDataset(
    root_dir="/home/surajkumarsureshbhai/data-gradients/data/hygiene_v3_dataset_20_08_24", images_subdir="valid/images", annotation_file_path="coco_annotations/valid_gt_cleaned.json"
)

class_names = ['cavity','gap','root_recession','swollen_gums','tartar'] # [<class-1>, <class-2>, ...]
dataset_path='/home/surajkumarsureshbhai/data-gradients/data/hygiene_v3_dataset_20_08_24'
analyzer = DetectionAnalysisManager.analyze_coco_format(
            root_dir=dataset_path,
            feature_extractors=_get_all_report_features(train_image_dir=f"{dataset_path}/valid/images/", valid_image_dir=f"{dataset_path}/valid/images/"),
            train_images_subdir="valid/images",
            train_annotation_file_path="coco_annotations/valid_gt_cleaned.json",
            val_images_subdir="valid/images",
            val_annotation_file_path="coco_annotations/valid_gt_cleaned.json",
            report_title=f"train_test_report",
        )
# analyzer.run()

In [None]:
import json

def check_and_remove_zero_area_bounding_boxes(annotation_file_path, output_file_path):
    with open(annotation_file_path, 'r') as file:
        data = json.load(file)
    
    zero_area_boxes = []
    updated_annotations = []
    
    for annotation in data['annotations']:
        bbox = annotation['bbox']
        # bbox format is [x, y, width, height]
        width = bbox[2]
        height = bbox[3]
        area = int(width * height)
        
        if area == 0:
            zero_area_boxes.append(annotation)
        else:
            updated_annotations.append(annotation)
    
    data['annotations'] = updated_annotations
    
    with open(output_file_path, 'w') as file:
        json.dump(data, file, indent=4)
    
    return zero_area_boxes

annotation_file_path = "source.json"
output_file_path = "output.json"
zero_area_boxes = check_and_remove_zero_area_bounding_boxes(annotation_file_path, output_file_path)

if zero_area_boxes:
    print(f"Found and removed {len(zero_area_boxes)} bounding boxes with zero area:")
    for box in zero_area_boxes:
        print(box)
else:
    print("No bounding boxes with zero area found.")