# Task 2: Training and testing the object detection models Faster R-CNN and YOLO V3 on the VOC dataset

## 1. Install MMDetection

Reference: https://mmdetection.readthedocs.io/en/latest/get_started.html

In [None]:
# Step 1. Install MMEngine and MMCV using MIM.
!pip3 install openmim
!mim install mmengine
!mim install "mmcv>=2.0.0,<2.1.0"

In [None]:
# Step 2. Install MMDetection from the source.
!git clone https://github.com/open-mmlab/mmdetection.git
%cd mmdetection
%pip install -e .

In [1]:
# Step 3. Verification.
import mmdet
print(mmdet.__version__)
# Example output: 3.0.0, or an another version.

3.3.0


## 2. Load the VOC dataset

In [5]:
import tarfile
import os

files_to_extract = [
    "/root/autodl-tmp/VOCtrainval_06-Nov-2007.tar",
    "/root/autodl-tmp/VOCtest_06-Nov-2007.tar",
    "/root/autodl-tmp/VOCtrainval_11-May-2012.tar"
]

destination_folder = "/root/autodl-tmp/mmdetection_voc/data/"

# create folder if not exists
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

def extract_tar_file(file_path, dest_folder):
    with tarfile.open(file_path, 'r') as tar:
        tar.extractall(path=dest_folder)
        print(f"Extracted {file_path} to {dest_folder}")

for file_path in files_to_extract:
    extract_tar_file(file_path, destination_folder)

print("All files have been extracted successfully.")

Extracted /root/autodl-tmp/VOCtrainval_06-Nov-2007.tar to /root/autodl-tmp/mmdetection_voc/data/
Extracted /root/autodl-tmp/VOCtest_06-Nov-2007.tar to /root/autodl-tmp/mmdetection_voc/data/
Extracted /root/autodl-tmp/VOCtrainval_11-May-2012.tar to /root/autodl-tmp/mmdetection_voc/data/
All files have been extracted successfully.


## 3. Train and test the models

In [11]:
!python tools/train.py configs/my_configs/VOC_faster-rcnn.py

06/02 14:38:57 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.8.10 (default, Jun  4 2021, 15:09:15) [GCC 7.5.0]
    CUDA available: True
    MUSA available: False
    numpy_random_seed: 2097683153
    GPU 0: NVIDIA GeForce RTX 4090
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.8, V11.8.89
    GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
    PyTorch: 2.0.0+cu118
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.8
  - NVCC architecture flags: -gencode;arch=compute

In [13]:
!python tools/train.py configs/my_configs/VOC_yolov3.py

06/02 16:00:31 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.8.10 (default, Jun  4 2021, 15:09:15) [GCC 7.5.0]
    CUDA available: True
    MUSA available: False
    numpy_random_seed: 1028003349
    GPU 0: NVIDIA GeForce RTX 4090
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.8, V11.8.89
    GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.1) 9.4.0
    PyTorch: 2.0.0+cu118
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v2.7.3 (Git Hash 6dbeffbae1f23cbbeae17adb7b5b13f1f37c080e)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.8
  - NVCC architecture flags: -gencode;arch=compute

## 4. Inference

### 4.1 Generate proposal boxes for Faster R-CNN

In [None]:
import os
import numpy as np
import torch
import mmcv
from mmcv.transforms import Compose
from mmdet.apis import init_detector, inference_detector, show_result_pyplot
from mmdet.utils import get_test_pipeline_cfg
import matplotlib.pyplot as plt
import cv2
import warnings

warnings.filterwarnings("ignore")

# configuration file and checkpoint file
config_file = './configs/pascal_voc/faster-rcnn.py'
checkpoint_file = './work_dirs/faster-rcnn/faster_rcnn.pth'

# load config and checkpoint to the model
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
model = init_detector(config_file, checkpoint_file, device=device)

def get_proposal_boxes(model, img):
    """
    获取Faster R-CNN第一阶段产生的proposal boxes
    """
    cfg = model.cfg.copy()
    test_pipeline = get_test_pipeline_cfg(cfg)
    test_pipeline = Compose(test_pipeline)
    
    if isinstance(img, np.ndarray):
        data = dict(img=img, img_id=0)
    else:
        data = dict(img_path=img, img_id=0)

    data = test_pipeline(data)
    data['inputs'] = [data['inputs']]
    data['data_samples'] = [data['data_samples']]

    with torch.no_grad():
        data = model.data_preprocessor(data, False)
        x = model.extract_feat(data['inputs'][0])
        proposal_list = model.rpn_head.predict(x, data['data_samples'], rescale=False)
    
    return proposal_list[0]

def save_image_with_proposals_and_predictions(image_path, output_path_proposals, output_path_final, model):
    """
    保存带有proposal boxes和最终预测结果的图像
    """
    image = mmcv.imread(image_path)
    image_with_proposals = image.copy()
    
    proposals = get_proposal_boxes(model, image_with_proposals).bboxes.cpu().numpy()
    
    for proposal in proposals[:5]:
        x1, y1, x2, y2 = proposal
        cv2.rectangle(image_with_proposals, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
    
    image_with_proposals_rgb = cv2.cvtColor(image_with_proposals, cv2.COLOR_BGR2RGB)
    plt.imsave(output_path_proposals, image_with_proposals_rgb)
    
    result = inference_detector(model, image_path)
    show_result_pyplot(model, image_path, result, score_thr=0.3, out_file=output_path_final)

input_dir = './demo/in'
output_dir_proposals = './img/in/first/vis'
output_dir_final = './img/in/second/vis'

os.makedirs(output_dir_proposals, exist_ok=True)
os.makedirs(output_dir_final, exist_ok=True)

image_name = '000001.jpg'  # 假设只对这张图片进行推断
img_path = os.path.join(input_dir, image_name)
output_path_proposals = os.path.join(output_dir_proposals, image_name)
output_path_final = os.path.join(output_dir_final, image_name)

save_image_with_proposals_and_predictions(img_path, output_path_proposals, output_path_final, model)


### 4.2 Generate detection results

In [None]:
import mmcv
from mmdet.apis import init_detector, inference_detector, show_result_pyplot

config_file = 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_voc0712.py'
checkpoint_file = 'work_dirs/faster_rcnn_r50_fpn_1x_voc0712/latest.pth'

model = init_detector(config_file, checkpoint_file, device='cuda:0')

# 挑选4张图片
img_files = ['data/VOCdevkit/VOC2007/JPEGImages/000001.jpg', ...]

# 可视化
for img in img_files:
    result = inference_detector(model, img)
    show_result_pyplot(model, img, result, score_thr=0.3)

In [None]:
# 初始化 YOLO V3 模型
config_file_yolo = 'configs/yolo/yolov3_d53_mstrain-608_273e_coco.py'
checkpoint_file_yolo = 'work_dirs/yolov3_d53_mstrain-608_273e_coco/latest.pth'
model_yolo = init_detector(
    config_file_yolo, checkpoint_file_yolo, device='cuda:0')

# 三张非VOC图片
non_voc_imgs = ['path/to/image1.jpg',
                'path/to/image2.jpg', 'path/to/image3.jpg']

# 可视化并比较
for img in non_voc_imgs:
    result_faster_rcnn = inference_detector(model, img)
    result_yolo = inference_detector(model_yolo, img)

    print(f"Results for {img} with Faster R-CNN:")
    show_result_pyplot(model, img, result_faster_rcnn, score_thr=0.3)

    print(f"Results for {img} with YOLO V3:")
    show_result_pyplot(model_yolo, img, result_yolo, score_thr=0.3)