In [21]:
# 예: 다음 패키지들이 설치되어 있어야 합니다.
# !pip install torch torchvision torchaudio
# !pip install git+https://github.com/facebookresearch/detectron2.git
# !pip install git+https://github.com/aim-uofa/AdelaiDet.git
# (셀 1) AdelaiDet 레포 클론
# !git clone https://github.com/aim-uofa/AdelaiDet.git

# !pip install onnx
# !pip install onnxruntime

In [7]:
# # (셀 2) 설치 (editable install)
# %cd AdelaiDet
# !pip install -e .
# %cd ..


In [None]:
import os
import torch

# Detectron2 & AdelaiDet 관련 import
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from adet.config import get_cfg as get_adet_cfg
# from adet.config.defaults import get_cfg as get_adet_cfg



## FCOS2onnx

In [None]:
import torch
import torch.nn as nn

# Detectron2 + AdelaiDet 관련 import
from detectron2.config import get_cfg
from adet.config import get_cfg as get_adet_cfg
from detectron2.engine import DefaultTrainer
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.modeling import build_model

# AdelaiDet FCOS 모델 로드하는 예시 함수
def load_fcos_model(config_path, weight_path, device="cpu"):
    """
    config_path: FCOS_RT/MS_DLA_34_4x_syncbn.yaml 경로
    weight_path: 학습 완료된 model_final.pth 경로

    """
   
    cfg = get_adet_cfg()  # AdelaiDet 전용 cfg
    cfg.merge_from_file("./AdelaiDet/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn.yaml")
    cfg.MODEL.WEIGHTS = "./model_final.pth"
    cfg.MODEL.DEVICE = "cpu"

    #  # 1) 커스텀 데이터셋 사용 설정
    # cfg.DATASETS.TRAIN = ("custom_train",)
    # cfg.DATASETS.TEST = ("custom_validation",)

    cfg.MODEL.FCOS.NUM_CLASSES = 3
    cfg.MODEL.FPN.IN_FEATURES = ["level1", "level2", "level3", "level4", "level5"]
    cfg.MODEL.FCOS.IN_FEATURES = ["p1", "p2", "p3"]
    cfg.MODEL.FCOS.FPN_STRIDES = [2, 4, 8]
    cfg.MODEL.FCOS.SIZES_OF_INTEREST = [6, 20]
    
    cfg.MODEL.FCOS.NMS_TH = 0.8  # NMS IoU 임계값
    cfg.INPUT.MIN_SIZE_TEST = 640
    cfg.INPUT.MAX_SIZE_TEST = 640
    
    # 모델 생성
    # DefaultTrainer 내부 로직을 이용해 모델만 빌드
    model = build_model(cfg)
    
    # 가중치 로드
    DetectionCheckpointer(model).load("./model_final.pth")
    # DetectionCheckpointer(model).load(weight_path)
    model.eval()
    
    return model


class FCOSInferenceWrapper(nn.Module):
    """
    후처리(NMS 등)를 배제하고, FCOS head의 raw prediction만 반환하는 래퍼.
    """
    def __init__(self, fcos_model, input_shape=(3, 640, 640)):
        super().__init__()
        self.fcos_model = fcos_model
        self.input_shape = input_shape

    def forward(self, images):
        """
        images: (B, 3, H, W) 이미 전처리된 이미지 텐서
        반환값:
          - class_logits (List[Tensor]): 각 FPN 레벨별 (B, C, H, W)
          - box_reg (List[Tensor]): 각 FPN 레벨별 (B, 4, H, W)
          - centerness (List[Tensor]): 각 FPN 레벨별 (B, 1, H, W)
        """
        # 1) Backbone + FPN + Shared Layers 통과한 feature 추출
        # detectron2는 ImageList 구조를 많이 사용하지만, 여기서는 간단히 features만 받아옴
        # 내부적으로 모델 추론 파이프라인을 조금 수정해 사용합니다.
        # 공식적으로는 detectron2의 DefaultPredictor를 그대로 쓰기 어렵기 때문에
        # backbone -> proposal_generator (FCOS) -> forward_head 등을 직접 호출해야 합니다.

        # Feature 추출
        # self.fcos_model.backbone -> features
        # 하지만 AdelaiDet의 FCOS 모델 구조상, 
        # self.fcos_model 는 GeneralizedRCNN 형태일 수 있으므로 
        # 아래와 같이 접근합니다.
        
        # images는 detectron2에서 처리하는 ImageList 형태로 만들어줘야 합니다.
        from detectron2.modeling import build_backbone
        from detectron2.structures import ImageList
        
        # 이미지를 ImageList로 감싸기
        # 크기가 동일한 배치라고 가정
        im_list = ImageList(images, [(self.input_shape[1], self.input_shape[2])]*images.shape[0])
        
        # im_list = ImageList(images, [(images.shape[2], images.shape[3])]*images.shape[0])
        
        
        # backbone 추론
        features = self.fcos_model.backbone(im_list.tensor)
        
        # proposal_generator(FCOS)
        # 여기서 postprocessing이 일어나는 부분을 대신, raw output만 뽑도록 모델 내부 함수를 수정할 수 있어야 합니다.
        # 일반적으로 fcos_model.proposal_generator.forward_head(features, top_module=None)을 호출하면
        # (pred_class_logits, pred_deltas, pred_centerness, top_feats, bbox_towers)가 반환됩니다.
        
        # postprocessing이 없는, FCOSHead의 원본 결과만 받아오기
        pred_class_logits, pred_deltas, pred_centerness, _, _ = \
            self.fcos_model.proposal_generator.forward_head(features, top_module=None)
        
        # List[Tensor] 형태: [P1, P2, P3 … 레벨별 Feature]
        # 각각의 shape: (B, C, H, W)
        return pred_class_logits, pred_deltas, pred_centerness

import os
import torch

def export_fcos_to_onnx(config_path, 
                        weight_path, 
                        onnx_output_path="fcos_raw.onnx",
                        input_height=640, 
                        input_width=640,
                        device="cpu"):

    # 1) 학습된 FCOS 모델 로드
    fcos_model = load_fcos_model(config_path, weight_path, device=device)

    # 2) Wrapper 정의
    wrapper = FCOSInferenceWrapper(fcos_model, 
                                   input_shape=(3, input_height, input_width))
    wrapper.to(device)
    wrapper.eval()

    # 3) 더미 입력(batch_size=1, 3, H, W)
    dummy_input = torch.randn(1, 3, input_height, input_width).to(device)

    # 4) ONNX 변환
    torch.onnx.export(
        wrapper, 
        dummy_input, 
        onnx_output_path,
        input_names=["input"],
        output_names=["class_logits", "box_reg", "centerness"],
        export_params=True,
        opset_version=11,    # opset_version은 필요에 따라 조정
        do_constant_folding=True
    )
    print(f"[INFO] Exported FCOS model (no postprocessing) to ONNX: {onnx_output_path}")


# 실행 예시
if __name__ == "__main__":
    config_path = "./AdelaiDet/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn.yaml"
    weight_path = "./model_final.pth"
    export_fcos_to_onnx(config_path, weight_path,
                        onnx_output_path="fcos_raw.onnx",
                        device="cpu")  # GPU에서 수행할 경우

[32m[01/08 18:52:15 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from ./model_final.pth ...


  return torch.load(f, map_location=torch.device("cpu"))


[INFO] Exported FCOS model (no postprocessing) to ONNX: fcos_raw.onnx


## onnx2tf

In [105]:
!pip install onnx onnx2tf




In [107]:
!onnx2tf -i fcos_raw.onnx -o tensorflow_model



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/opt/anaconda3/envs/detectron2/bin/onnx2tf", line 5, in <module>
    from onnx2tf import main
  File "/opt/anaconda3/envs/detectron2/lib/python3.9/site-packages/onnx2tf/__init__.py", line 1, in <module>
    from onnx2tf.onnx2tf import convert, main
  File "/opt/anaconda3/envs/detectron2/lib/python3.9/site-packages/onnx2tf/onnx2tf.py", line 26, in <module>
    import tensorflow as tf
  File "/opt/anaconda3/envs/detectron2/lib/python3.9/site-packages/tensorflow/__init__.py", line 49, in <module>
    from tensorflow._

## 변환 비교 확인

In [116]:
import os
import cv2
import numpy as np
import torch
import torch.nn as nn

# Detectron2 + AdelaiDet import
from detectron2.config import get_cfg
from adet.config import get_cfg as get_adet_cfg
from detectron2.modeling import build_model
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.structures import ImageList

import onnxruntime as ort
import tensorflow as tf



#############################################
# 실제 이미지 로드 & 전처리
#############################################
def load_and_preprocess_image(image_path, target_size=640):
    """
    - image_path에 있는 이미지를 로드하여 (640, 640) 크기로 리사이즈
    - BGR->RGB 변환 (OpenCV는 BGR로 로드)
    - (1, 3, H, W) PyTorch 텐서로 변환
    """
    # 1) 이미지 로드 (OpenCV: BGR)
    img_bgr = cv2.imread(image_path)
    if img_bgr is None:
        raise FileNotFoundError(f"Image not found: {image_path}")
    
    # 2) 리사이즈
    img_bgr = cv2.resize(img_bgr, (target_size, target_size))

    # 3) BGR -> RGB
    img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)

    # 4) (H, W, 3) -> (3, H, W)
    img_rgb = np.transpose(img_rgb, (2, 0, 1))  # (3,640,640)

    # 5) float32 변환
    img_rgb = img_rgb.astype(np.float32)

    # (선택) 정규화: ex) 이미지 픽셀(0~255)을 [0,1] 범위로 맞춤
    # img_rgb /= 255.0

    # 6) 배치 차원 추가 -> (1,3,640,640)
    img_tensor = torch.from_numpy(img_rgb).unsqueeze(0)
    
    return img_tensor  # PyTorch 텐서 반환


#############################################
#  MSE 계산 함수
#############################################
def mse(a, b):
    return np.mean((a - b)**2)



In [None]:

#############################################
# 메인 테스트 예시
#############################################
if __name__ == "__main__":
    # ------------------------------------------------------
    # (A) 모델 로드
    # ------------------------------------------------------
    config_path = "./AdelaiDet/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn.yaml"
    weight_path = "./model_final.pth"
    device = "cpu"  # or "cuda"

    # Detectron2+AdelaiDet FCOS 모델 로드
    base_fcos_model = load_fcos_model(config_path, weight_path, device=device)

    # 후처리 없이 raw head만 추론하는 래퍼
    fcos_wrapper = FCOSInferenceWrapper(base_fcos_model).to(device)
    fcos_wrapper.eval()

    # ------------------------------------------------------
    # (B) 실제 이미지 로드 & 전처리
    # ------------------------------------------------------
    image_path = "./Dataset/IMG_8457.JPG"  # 실제 테스트할 이미지 경로
    image_tensor = load_and_preprocess_image(image_path, target_size=640)
    image_tensor = image_tensor.to(device)  # GPU 사용 시 .to("cuda")
    
    # ------------------------------------------------------
    # (C) PyTorch로 Raw Output 추론
    # ------------------------------------------------------
    with torch.no_grad():
        pt_class_logits_list, pt_box_reg_list, pt_centerness_list = fcos_wrapper(image_tensor)

    pt_class_np = [x.cpu().numpy() for x in pt_class_logits_list]
    pt_box_np   = [x.cpu().numpy() for x in pt_box_reg_list]
    pt_center_np= [x.cpu().numpy() for x in pt_centerness_list]

    # ------------------------------------------------------
    # (D) ONNX Runtime 추론
    # ------------------------------------------------------
    onnx_session = ort.InferenceSession("fcos_raw.onnx", providers=["CPUExecutionProvider"])

    # ONNX 입력은 (1,3,640,640) 형태
    onnx_inputs = {onnx_session.get_inputs()[0].name: image_tensor.cpu().numpy()}
    onnx_outputs = onnx_session.run(None, onnx_inputs)
    # 보통 9개 텐서가 [0:3], [3:6], [6:9] 으로 class, box, center
    
    onnx_class_np   = onnx_outputs[0:3]
    onnx_box_np     = onnx_outputs[3:6]
    onnx_center_np  = onnx_outputs[6:9]

    # ------------------------------------------------------
    # (E) TFLite 추론
    # ------------------------------------------------------
    tflite_path = "./tensorflow_model/fcos_raw_float32.tflite"
    interpreter = tf.lite.Interpreter(model_path=tflite_path)
    interpreter.allocate_tensors()

    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    # TFLite는 (1,640,640,3)로 입력 가정
    # PyTorch 텐서 (1,3,640,640)를 NumPy로 꺼내고 (1,640,640,3)로 transpose
    tfl_input = image_tensor.cpu().numpy()  # shape (1,3,640,640)
    tfl_input = np.transpose(tfl_input, (0, 2, 3, 1))  # -> (1,640,640,3)
    tfl_input = tfl_input.astype(np.float32)

    # 추론
    interpreter.set_tensor(input_details[0]['index'], tfl_input)
    interpreter.invoke()

    # 출력(9개)
    tflite_outputs = []
    for i in range(len(output_details)):
        out_data = interpreter.get_tensor(output_details[i]['index'])
        tflite_outputs.append(out_data)

    tfl_class_np   = tflite_outputs[0:3]
    tfl_box_np     = tflite_outputs[3:6]
    tfl_center_np  = tflite_outputs[6:9]


    # ------------------------------------------------------
    # (F) 결과 비교: PyTorch vs. ONNX / PyTorch vs. TFLite
    # ------------------------------------------------------
    num_levels = len(pt_class_np)  # 예: 3개 레벨
    for lvl in range(num_levels):
        # --- PyTorch vs. ONNX ---
        mse_class_pt_onnx  = mse(pt_class_np[lvl], onnx_class_np[lvl])
        mse_box_pt_onnx    = mse(pt_box_np[lvl],   onnx_box_np[lvl])
        mse_center_pt_onnx = mse(pt_center_np[lvl],onnx_center_np[lvl])
        
        # --- PyTorch vs. TFLite ---
        # TFLite 출력은 (B,H,W,C) => (B,C,H,W)로 transpose
        tfl_c = tfl_class_np[lvl].transpose(0, 3, 1, 2)
        tfl_b = tfl_box_np[lvl].transpose(0, 3, 1, 2)
        tfl_ce= tfl_center_np[lvl].transpose(0, 3, 1, 2)

        mse_class_pt_tfl  = mse(pt_class_np[lvl], tfl_c)
        mse_box_pt_tfl    = mse(pt_box_np[lvl],   tfl_b)
        mse_center_pt_tfl = mse(pt_center_np[lvl],tfl_ce)
        
        print(f"[Level {lvl+1}]")
        print(f"  PyTorch vs. ONNX:   class={mse_class_pt_onnx:.6f}, "
              f"box={mse_box_pt_onnx:.6f}, centerness={mse_center_pt_onnx:.6f}")
        print(f"  PyTorch vs. TFLite: class={mse_class_pt_tfl:.6f}, "
              f"box={mse_box_pt_tfl:.6f}, centerness={mse_center_pt_tfl:.6f}")
        print()

    ## 출력이 잘 나오는지 확인 (선택)
    # print(pt_class_np[0])


[32m[01/09 12:00:39 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from ./model_final.pth ...


  return torch.load(f, map_location=torch.device("cpu"))


[Level 1]
  PyTorch vs. ONNX:   class=0.000000, box=0.000000, centerness=0.000000
  PyTorch vs. TFLite: class=0.000000, box=0.000000, centerness=0.000000

[Level 2]
  PyTorch vs. ONNX:   class=0.000000, box=0.000000, centerness=0.000000
  PyTorch vs. TFLite: class=0.000000, box=0.000000, centerness=0.000000

[Level 3]
  PyTorch vs. ONNX:   class=0.000000, box=0.000000, centerness=0.000000
  PyTorch vs. TFLite: class=0.000000, box=0.000000, centerness=0.000000

[[[[-3.7475986 -3.4489274 -3.8326697 ... -4.4988065 -4.2263637
    -4.4829097]
   [-4.420302  -3.927745  -4.3192916 ... -4.5970197 -4.645439
    -5.096771 ]
   [-4.5329003 -4.0292034 -4.0936317 ... -4.634769  -4.646837
    -5.2706413]
   ...
   [-6.1862917 -5.4706616 -6.1654353 ... -4.295063  -4.6587205
    -4.8525605]
   [-5.51345   -5.173307  -5.68625   ... -4.224737  -4.4019866
    -4.849878 ]
   [-5.1971006 -5.0337186 -5.1431746 ... -4.288286  -4.118121
    -4.536551 ]]

  [[-4.3797736 -4.7616587 -4.7479086 ... -4.2496867 -4.

In [122]:
# print(pt_class_np[0])

diff_class = pt_class_np[lvl] - tfl_c  # 예: PyTorch vs. TFLite class logits 비교
print("Max abs diff:", np.max(np.abs(diff_class)))
print("Min abs diff:", np.min(np.abs(diff_class)))


Max abs diff: 4.9114227e-05
Min abs diff: 0.0
