In [None]:
import os
import argparse
import matplotlib.pyplot as plt

import torch
import torchvision.transforms as transforms

from os2d.modeling.model import build_os2d_from_config
from os2d.config import cfg
import  os2d.utils.visualization as visualizer
from os2d.structures.feature_map import FeatureMapSize
from os2d.utils import setup_logger, read_image, get_image_size_after_resize_preserving_aspect_ratio

In [None]:
import os
import argparse

import torch

from os2d.data import dataloader
from os2d.modeling.model import build_os2d_from_config

from os2d.data.dataloader import build_eval_dataloaders_from_cfg, build_train_dataloader_from_config
from os2d.engine.train import trainval_loop
from os2d.utils import set_random_seed, get_trainable_parameters, mkdir, save_config, setup_logger, get_data_path
from os2d.engine.optimization import create_optimizer
from os2d.config import cfg
from os2d.utils.visualization import *
import random
import os2d.utils.visualization as visualizer
from pathlib import Path
import cv2
import numpy as np
from main import parse_opts, init_logger
from os2d.utils import get_image_size_after_resize_preserving_aspect_ratio
from src.util.detection import generate_detection_boxes
from src.util.visualize import visualize_boxes_on_image
from src.util.filter import DataLoaderDB


### INIT

In [None]:
if cfg.is_cuda:
    assert torch.cuda.is_available(), "Do not have available GPU, but cfg.is_cuda == 1"
    torch.backends.cudnn.benchmark = True

# random seed
set_random_seed(cfg.random_seed, cfg.is_cuda)

# Model
net, box_coder, criterion, img_normalization, optimizer_state = build_os2d_from_config(cfg)

# Optimizer
parameters = get_trainable_parameters(net)
optimizer = create_optimizer(parameters, cfg.train.optim, optimizer_state)

# load the dataset
data_path = get_data_path()
dataloader_train, datasets_train_for_eval = build_train_dataloader_from_config(cfg, box_coder, img_normalization,
                                                                                data_path=data_path)

dataloaders_eval = build_eval_dataloaders_from_cfg(cfg, box_coder, img_normalization,
                                                    datasets_for_eval=datasets_train_for_eval,
                                                    data_path=data_path)

db = DataLoaderDB( path = './src/db/data.csv' , dataloader = dataloader_train)

### Write database from dataloader

In [None]:
db.initialize_csv()
map_of_classes_per_image_id = {}

# 獲取 dataset 參照 (用於類別名稱映射)
dataset = dataloader_train.dataset

# 處理前5個批次
for k in range(149):
    batch = dataloader_train.get_batch(k)
    images, class_images, loc_targets, cls_targets, class_ids, class_sizes, transforms, boxes, img_sizes = batch
    
    # 獲取當前批次的所有圖像ID
    image_ids = dataloader_train.get_image_ids_for_batch_index(k)

    # 為每個圖像寫入標註資訊
    for img_id in image_ids:
        map_of_classes_per_image_id[img_id] = {}
        annotation = dataloader_train.get_image_annotation_for_imageid(img_id)
        img = dataloader_train._get_dataset_image_by_id(img_id)
        w, h = img.size
        print( w , h )
        
        # 寫入每個物件的詳細資訊
        for obj_idx in range(len(annotation)):
            # 邊界框座標 (x_min, y_min, x_max, y_max)
            bbox = annotation.bbox_xyxy[obj_idx].tolist()
            
            # 物件類別ID
            class_id = annotation.get_field('labels')[obj_idx].item()
            map_of_classes_per_image_id[img_id][class_id] = map_of_classes_per_image_id[img_id].get(class_id, 0) + 1
    
            # 將 ground truth bbox 轉換為 point1 和 point2
            point1 = (bbox[0] / w, bbox[1] / h )  # (x_min, y_min)
            point2 = (bbox[2] / w, bbox[3] / h )  # (x_max, y_max)
            
            # 寫入資料庫 (只寫入 image_id, class_id, point1, point2)
            db.write_to_db(
                image_id=img_id,
                class_id=class_id,
                point1=point1,
                point2=point2
            )
            
            # 困難標記 (如果存在) - 保留但不影響資料寫入
            if annotation.has_field('difficult'):
                is_difficult = annotation.get_field('difficult')[obj_idx].item()
                difficult = f", Difficult: {bool(is_difficult)}"
                
        visualize_boxes_on_image(
            image_id=img_id,
            boxes_one_image=annotation,
            dataloader=dataloader_train,
            cfg=cfg,
            class_ids=class_ids,
            showfig=True,
        )

Image 0 size FeatureMapSize(w=3264, h=2448) has 30 boxes
Image 1 size FeatureMapSize(w=2448, h=3264) has 28 boxes
Image 2 size FeatureMapSize(w=3264, h=2448) has 35 boxes
Image 3 size FeatureMapSize(w=2448, h=3264) has 22 boxes
Image 0 size FeatureMapSize(w=3264, h=2448) has 30 boxes
3264 2448
Image 1 size FeatureMapSize(w=2448, h=3264) has 28 boxes
2448 3264
Image 2 size FeatureMapSize(w=3264, h=2448) has 35 boxes
3264 2448
Image 3 size FeatureMapSize(w=2448, h=3264) has 22 boxes
2448 3264
Image 4 size FeatureMapSize(w=3264, h=2448) has 16 boxes
Image 5 size FeatureMapSize(w=2448, h=3264) has 34 boxes
Image 7 size FeatureMapSize(w=3264, h=2448) has 28 boxes
Image 8 size FeatureMapSize(w=3264, h=2448) has 36 boxes
Image 4 size FeatureMapSize(w=3264, h=2448) has 16 boxes
3264 2448
Image 5 size FeatureMapSize(w=2448, h=3264) has 34 boxes
2448 3264
Image 7 size FeatureMapSize(w=3264, h=2448) has 28 boxes
3264 2448
Image 8 size FeatureMapSize(w=3264, h=2448) has 36 boxes
3264 2448
Image 9 

### Write detection box to db

In [18]:
image_ids = list ( map( int , db.get_image_ids()) )
sorted_image_ids = sorted(image_ids)
print( sorted_image_ids )

[0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 19, 20, 21, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 127, 128, 129, 131, 132, 133, 134, 135, 136, 137, 138, 139, 141, 142, 143, 144, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 194, 195, 196, 197, 198, 201, 202, 203, 207, 209, 210, 212, 213, 214, 215, 216, 217, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 2

In [19]:
print( db.get_class_ids_by_image_id(0))

{'0': 3, '1': 3, '2': 2, '3': 4, '4': 4, '5': 3, '6': 3, '7': 2, '1055': 6}


In [20]:
print( len(db.get_value_by_id( 10 , 42 )) )

13


In [21]:
def convert_tensor_to_box_list(tensor):
    return [
        ((box[0].item(), box[1].item()),
         (box[2].item(), box[3].item()))
        for box in tensor
    ]

def covert_point_back_by_ratio(boxes, w, h):
    """
    将检测框坐标从缩放后的图像坐标系转换回原始图像坐标系
    
    Args:
        boxes: 缩放后图像上的检测框列表，格式 [((x1,y1), (x2,y2)), ...]
        original_size: 原始图像尺寸 (width, height)
        resized_size: 缩放后图像尺寸 (width, height)
    
    Returns:
        list: 转换后的原始图像坐标系中的检测框列表
    """
    
    
    converted_boxes = []
    for box in boxes:
        # 解包坐标点
        (x1, y1), (x2, y2) = box
        
        # 应用缩放比例转换坐标
        orig_x1 = x1 / w
        orig_y1 = y1 / h
        orig_x2 = x2 / w
        orig_y2 = y2 / h
        
        converted_boxes.append(((orig_x1, orig_y1), (orig_x2, orig_y2)))
    print( converted_boxes )
    return converted_boxes

In [None]:
for image_id in sorted_image_ids:
    get_class_id = db.get_class_ids_by_image_id(image_id)
    class_ids = list( map(int, get_class_id.keys()) )
    for class_id in class_ids:
        count = get_class_id[str(class_id)]
        print(f"Image ID: {image_id}, Class ID: {class_id}, Count: {count}")
        # print(f"Image ID: {img_id}, Class ID: {class_id}, Count: {count}")
        get , labels , scores = generate_detection_boxes(dataloader_train, net, img_normalization, box_coder, image_id, class_id, cfg, class_num=count*2)
        # image_path = f"./visualized_images/{img_id}.jpg"
        from os2d.modeling.box_coder import BoxList
        
        # Get the original image size from dataloader for proper bounding box scaling
        original_image = dataloader_train._get_dataset_image_by_id(image_id)
        orig_h, orig_w = original_image.size[1], original_image.size[0]
        
        image_height, image_width = get_image_size_after_resize_preserving_aspect_ratio(h=original_image.size[1],
                                                                                        w=original_image.size[0],
                                                                                        target_size=1500)
        # box_list = BoxList(get, (image_width, image_height), mode="xyxy")
        # box_list.add_field("labels", labels)
        # box_list.add_field("scores", scores)  # Add scores field for proper visualization
        # visualize_boxes_on_image(
        #     image_id=image_id,
        #     boxes_one_image=box_list,
        #     dataloader=dataloader_train,
        #     cfg=cfg,
        #     class_ids=class_ids,
        #     path="detection",
        #     is_detection=True,
        #     showfig=True,  # Specify this is detection visualization
        # )
        
        convert_boxes = convert_tensor_to_box_list(get)
        converted_boxes = covert_point_back_by_ratio(
            convert_boxes,
            w=image_width,
            h=image_height
        )
        
        db.write_detect_point_to_db_by_ids(
            image_id=image_id,
            class_id=class_id,
            values=converted_boxes
        )

        print(f"IOU for image {image_id} with class {class_id}: {db.get_ioU_list_by_ids(image_id, class_id)}")


Image ID: 0, Class ID: 0, Count: 3
[((0.20428406889389072, 0.35483546914092573), (0.37387032101391093, 0.599112730195102)), ((0.8074024367828017, 0.018803785175796287), (0.9654388075337399, 0.29137363683452416)), ((0.3524978320538034, 0.36543972846450395), (0.5056563238624062, 0.6204486286025308)), ((0.0, 0.0), (0.09690095756125505, 0.23445901143908776)), ((0.11878050172026108, 0.0), (0.2400835843339528, 0.2559559787576982)), ((0.22302009987776053, 0.0), (0.3891012002359086, 0.25988515805427254))]
IOU for image 0 with class 0: [0.8367523860814454, 0.8253786561656425, 0.9186545080780981]
Image ID: 0, Class ID: 1, Count: 3
[((0.9489214679110133, 0.0), (1.0, 0.27018135194139725)), ((0.8094493698578233, 0.1089148517752538), (0.9772331819247979, 0.2929382559150067)), ((0.342850400999567, 0.0), (0.8304584318165416, 0.30583824057869036)), ((0.6400449545774394, 0.04206425268160003), (0.7686661859032188, 0.29148539366219206)), ((0.3988902243951176, 0.006622103014939377), (0.5256152075928299, 0.

In [24]:
from src.lcp.ct_aoi_align import ContextAoiAlign
import torchvision.transforms as transforms
transform_image = transforms.Compose([
                      transforms.ToTensor(),
                      transforms.Normalize(img_normalization["mean"], img_normalization["std"])
                      ])

context_aoi_align = ContextAoiAlign( db, dataloader_train, transform_image , net , cfg )

In [25]:
context_aoi_align.compute_roi_region_for_all()

In [None]:
for class_id, count in map_of_classes_per_image_id[img_id].items():
    print(f"Image ID: {img_id}, Class ID: {class_id}, Count: {count}")
    get , labels , scores = generate_detection_boxes(dataloader_train, net, img_normalization, box_coder, img_id, class_id, cfg, class_num=count)
    # image_path = f"./visualized_images/{img_id}.jpg"
    from os2d.modeling.box_coder import BoxList
    
    # Get the original image size from dataloader for proper bounding box scaling
    original_image = dataloader_train._get_dataset_image_by_id(img_id)
    image_width, image_height = get_image_size_after_resize_preserving_aspect_ratio(h=original_image.size[1],
                                                                                    w=original_image.size[0],
                                                                                    target_size=1500)
    
    # Create BoxList with proper image dimensions
    box_list = BoxList(get, (image_width, image_height), mode="xyxy")
    box_list.add_field("labels", labels)
    box_list.add_field("scores", scores)  # Add scores field for proper visualization
    visualize_boxes_on_image(
        image_id=img_id,
        boxes_one_image=box_list,
        dataloader=dataloader_train,
        cfg=cfg,
        class_ids=class_ids,
        path="detection",
        is_detection=True,
        showfig=True,  # Specify this is detection visualization
    )
    # f.write(f"Image ID: {img_id}, Class ID: {class_id}, Get: {get}\n")

print(f"Processed batch {k} with {len(image_ids)} images")
# print(f"map_of_classes_per_image_id: {map_of_classes_per_image_id}\n")
print("Dataloader information saved to text2.txt")


In [None]:
map_of_classes_per_image_id = {}
with open("text2.txt", "w") as f:
    # 1. 寫入 dataloader 的基本 API 資訊
    f.write("="*80 + "\n")
    f.write("Dataloader API Information\n")
    f.write("="*80 + "\n")
    f.write(f"Total batches: {len(dataloader_train)}\n")
    f.write(f"Batch size: {dataloader_train.batch_size}\n")
    f.write(f"Image normalization: {dataloader_train.img_normalization}\n")
    f.write(f"Class batch size: {dataloader_train.max_batch_labels}\n")
    f.write(f"Data augmentation: {'Enabled' if dataloader_train.data_augmentation else 'Disabled'}\n\n")
    
    # 獲取 dataset 參照 (用於類別名稱映射)
    dataset = dataloader_train.dataset
    
    # 2. 處理前5個批次
    for k in range(5):
        batch = dataloader_train.get_batch(k)
        images, class_images, loc_targets, cls_targets, class_ids, class_sizes, transforms, boxes, img_sizes = batch
        
        # 3. 寫入批次標題
        f.write("\n" + "="*80 + "\n")
        f.write(f"Batch {k} - Image Annotations\n")
        f.write("="*80 + "\n")
        
        # 4. 獲取當前批次的所有圖像ID
        image_ids = dataloader_train.get_image_ids_for_batch_index(k)
        
        # 5. 寫入批次級別資訊
        f.write(f"\nBatch-Level Information:\n")
        f.write(f"Class IDs in batch: {class_ids}\n")
        f.write(f"Image tensor shape: {images.shape}\n")
        f.write(f"Localization targets shape: {loc_targets.shape}\n")
        f.write(f"Classification targets shape: {cls_targets.shape}\n")
        
        # 6. 為每個圖像寫入標註資訊
        for img_id in image_ids:
            map_of_classes_per_image_id[img_id] = {}
            annotation = dataloader_train.get_image_annotation_for_imageid(img_id)
            
            # 寫入圖像ID標題
            f.write(f"\n[Image ID: {img_id}]\n")
            f.write(f"Object count: {len(annotation)}\n")
            
            # 寫入每個物件的詳細資訊
            for obj_idx in range(len(annotation)):
                # 邊界框座標 (x_min, y_min, x_max, y_max)
                bbox = annotation.bbox_xyxy[obj_idx].tolist()
                print( bbox )
                # 物件類別ID
                class_id = annotation.get_field('labels')[obj_idx].item()
                map_of_classes_per_image_id[img_id][class_id] = map_of_classes_per_image_id[img_id].get(class_id, 0) + 1
                # 獲取類別名稱 (如果可用)
                class_name = "N/A"
                if hasattr(dataset, 'get_class_name'):
                    try:
                        class_name = dataset.get_class_name(class_id)
                    except:
                        class_name = f"Unknown (ID: {class_id})"
                elif hasattr(dataset, 'class_id_to_name'):
                    class_name = dataset.class_id_to_name.get(class_id, f"Unknown (ID: {class_id})")
                
                # 困難標記 (如果存在)
                difficult = ""
                if annotation.has_field('difficult'):
                    is_difficult = annotation.get_field('difficult')[obj_idx].item()
                    difficult = f", Difficult: {bool(is_difficult)}"
                
                # 寫入單行物件資訊
                f.write(f"  Object {obj_idx+1}: Class={class_id} ({class_name}), "
                        f"BBox=[{bbox[0]:.1f}, {bbox[1]:.1f}, {bbox[2]:.1f}, {bbox[3]:.1f}]"
                        f"{difficult}\n")
                
            visualize_boxes_on_image(
                image_id=img_id,
                boxes_one_image=annotation,
                dataloader=dataloader_train,
                cfg=cfg,
                class_ids=class_ids,
                showfig=True,
            )
            for class_id, count in map_of_classes_per_image_id[img_id].items():
                print(f"Image ID: {img_id}, Class ID: {class_id}, Count: {count}")
                get , labels , scores = generate_detection_boxes(dataloader_train, net, img_normalization, box_coder, img_id, class_id, cfg, class_num=count)
                # image_path = f"./visualized_images/{img_id}.jpg"
                from os2d.modeling.box_coder import BoxList
                
                # Get the original image size from dataloader for proper bounding box scaling
                original_image = dataloader_train._get_dataset_image_by_id(img_id)
                image_width, image_height = get_image_size_after_resize_preserving_aspect_ratio(h=original_image.size[1],
                                                                                                w=original_image.size[0],
                                                                                                target_size=1500)
                
                # Create BoxList with proper image dimensions
                box_list = BoxList(get, (image_width, image_height), mode="xyxy")
                box_list.add_field("labels", labels)
                box_list.add_field("scores", scores)  # Add scores field for proper visualization
                visualize_boxes_on_image(
                    image_id=img_id,
                    boxes_one_image=box_list,
                    dataloader=dataloader_train,
                    cfg=cfg,
                    class_ids=class_ids,
                    path="detection",
                    is_detection=True,
                    showfig=True,  # Specify this is detection visualization
                )
                # f.write(f"Image ID: {img_id}, Class ID: {class_id}, Get: {get}\n")

        print(f"Processed batch {k} with {len(image_ids)} images")
    # print(f"map_of_classes_per_image_id: {map_of_classes_per_image_id}\n")
    print("Dataloader information saved to text2.txt")
