# Checking Versions

In [1]:
!nvcc --version
!gcc --version
!python --version
!nvidia-smi

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:41:10_Pacific_Daylight_Time_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
gcc (tdm64-1) 10.3.0
Copyright (C) 2020 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

Python 3.10.16
Fri Dec  6 23:57:37 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.94                 Driver Version: 560.94         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         | 

# Installing Libraries

In [None]:
!pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0 --index-url https://download.pytorch.org/whl/cu118
!pip install mmcv==2.1.0 -f https://download.openmmlab.com/mmcv/dist/cu118/torch2.1.0/index.html
!pip install mmdet

!git clone https://github.com/open-mmlab/mmdetection.git

In [None]:
from torch import __version__ as torch_version
print(torch_version)

from mmcv import __version__ as mmcv_version
print(mmcv_version)

from mmdet import __version__ as mmdet_version
print(mmdet_version)

from mmyolo import __version__ as mmyolo_version
print(mmyolo_version)

2.0.0+cu118
2.0.1
3.3.0
0.6.0


# Checking the inference

In [12]:
from mmdet.apis import init_detector, inference_detector

model = init_detector("/home/jayant/Projects/mmdetection/latest/mmdetection/configs/yolo/yolov3_d53_8xb8-ms-608-273e_coco.py", "/home/jayant/Projects/mmdetection/latest/mmdetection/checkpoints/yolov3_d53_mstrain-608_273e_coco_20210518_115020-a2c3acb8.pth")

Loads checkpoint by local backend from path: /home/jayant/Projects/mmdetection/latest/mmdetection/checkpoints/yolov3_d53_mstrain-608_273e_coco_20210518_115020-a2c3acb8.pth


In [16]:
results = inference_detector(model, "/home/jayant/Projects/mmdetection/latest/images/5851546454_302f9aa261_b.jpg",)

In [25]:
from mmdet.apis import DetInferencer

model1 = DetInferencer(model="yolov3_d53_mstrain-608_273e_coco", weights="mmdetection/checkpoints/yolov3_d53_mstrain-608_273e_coco_20210518_115020-a2c3acb8.pth")

Loads checkpoint by local backend from path: mmdetection/checkpoints/yolov3_d53_mstrain-608_273e_coco_20210518_115020-a2c3acb8.pth




In [27]:
res1 = model1("images/5851546454_302f9aa261_b.jpg",out_dir="/home/jayant/Projects/mmdetection/latest/outputs", show=True, )

# Traning on custom data

## modifying the configuration file

In [23]:
config = """
_base_ = './yolov3_d53_8xb8-ms-608-273e_coco.py'

model = dict(
    type='YOLOV3',
    data_preprocessor=_base_.data_preprocessor,
    backbone=dict(
        type='Darknet',
        depth=53,
        out_indices=(3, 4, 5),
        init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://darknet53')),
    neck=dict(
        type='YOLOV3Neck',
        num_scales=3,
        in_channels=[1024, 512, 256],
        out_channels=[512, 256, 128]),
    bbox_head=dict(
        type='YOLOV3Head',
        num_classes=1,
        in_channels=[512, 256, 128],
        out_channels=[1024, 512, 256],
        anchor_generator=dict(
            type='YOLOAnchorGenerator',
            base_sizes=[[(116, 90), (156, 198), (373, 326)],
                        [(30, 61), (62, 45), (59, 119)],
                        [(10, 13), (16, 30), (33, 23)]],
            strides=[32, 16, 8]),
        bbox_coder=dict(type='YOLOBBoxCoder'),
        featmap_strides=[32, 16, 8],
        loss_cls=dict(
            type='CrossEntropyLoss',
            use_sigmoid=True,
            loss_weight=1.0,
            reduction='sum'),
        loss_conf=dict(
            type='CrossEntropyLoss',
            use_sigmoid=True,
            loss_weight=1.0,
            reduction='sum'),
        loss_xy=dict(
            type='CrossEntropyLoss',
            use_sigmoid=True,
            loss_weight=2.0,
            reduction='sum'),
        loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum')),
    # training and testing settings
    train_cfg=dict(
        assigner=dict(
            type='GridAssigner',
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0)),
    test_cfg=dict(
        nms_pre=1000,
        min_bbox_size=0,
        score_thr=0.05,
        conf_thr=0.005,
        nms=dict(type='nms', iou_threshold=0.45),
        max_per_img=100))

data_root = '/home/jayant/Projects/mmdetection/latest/mmdetection/data/boxes/'

metainfo = {
    'classes': ('Box', ),
    'palette': [
        (220, 20, 60),
    ]
}

train_dataloader = dict(
    batch_size=4,
    num_workers=2,
    persistent_workers=True,
    sampler=dict(type='DefaultSampler', shuffle=True),
    batch_sampler=dict(type='AspectRatioBatchSampler'),
    dataset=dict(
        type=_base_.dataset_type,
        data_root=data_root,
        metainfo=metainfo,
        ann_file='train.json',
        data_prefix=dict(img='train/'),
        filter_cfg=dict(filter_empty_gt=True, min_size=32),
        pipeline=_base_.train_pipeline,
        backend_args=_base_.backend_args))

val_dataloader = dict(
    batch_size=1,
    num_workers=1,
    persistent_workers=True,
    drop_last=False,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
        type=_base_.dataset_type,
        data_root=data_root,
        metainfo=metainfo,
        ann_file='val.json',
        data_prefix=dict(img='valid/'),
        test_mode=True,
        pipeline=_base_.test_pipeline,
        backend_args=_base_.backend_args))

test_dataloader = dict(
    batch_size=1,
    num_workers=1,
    persistent_workers=True,
    drop_last=False,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
        type=_base_.dataset_type,
        data_root=data_root,
        metainfo=metainfo,
        ann_file='test.json',
        data_prefix=dict(img='test/'),
        test_mode=True,
        pipeline=_base_.test_pipeline,
        backend_args=_base_.backend_args))

val_evaluator = dict(
    type='CocoMetric',
    ann_file=data_root + 'val.json',
    metric='bbox',
    backend_args=_base_.backend_args)

test_evaluator = dict(
    type='CocoMetric',
    ann_file=data_root + 'test.json',
    metric='bbox',
    backend_args=_base_.backend_args)

train_cfg = dict(max_epochs=200, val_interval=7)

optim_wrapper = dict(
    type='OptimWrapper',
    optimizer=dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005),
    clip_grad=dict(max_norm=35, norm_type=2))

default_hooks = dict(
                checkpoint=dict(
                    type='CheckpointHook', 
                    interval=7,
                    max_keep_ckpts=2,  # only keep latest 2 checkpoints
                    save_best='auto'
                    ),
                logger=dict(type='LoggerHook', interval=1)
                )

auto_scale_lr = dict(base_batch_size=8)

load_from = '/home/jayant/Projects/mmdetection/latest/mmdetection/checkpoints/yolov3_d53_mstrain-608_273e_coco_20210518_115020-a2c3acb8.pth'

visualizer = dict(vis_backends=[dict(type='LocalVisBackend'),dict(type='TensorboardVisBackend')])

"""
with open('./configs/yolo/yolo_d53_1xb8-320-273e_boxes.py', 'w') as f:
    f.write(config)

In [24]:
!python tools/train.py configs/yolo/yolo_d53_1xb8-320-273e_boxes.py --work-dir work_dirs/yolo_d53_custom_boxes

03/01 00:50:42 - mmengine - [4m[97mINFO[0m - 
------------------------------------------------------------
System environment:
    sys.platform: linux
    Python: 3.10.12 (main, Nov 20 2023, 15:14:05) [GCC 11.4.0]
    CUDA available: True
    MUSA available: False
    numpy_random_seed: 1459617158
    GPU 0: NVIDIA GeForce RTX 3060 Laptop GPU
    CUDA_HOME: /usr/local/cuda
    NVCC: Cuda compilation tools, release 11.7, V11.7.99
    GCC: x86_64-linux-gnu-gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
    PyTorch: 2.1.0+cu118
    PyTorch compiling details: PyTorch built with:
  - GCC 9.3
  - C++ Version: 201703
  - Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - LAPACK is enabled (usually provided by MKL)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 11.8
  - NVCC architecture f

In [2]:
!pwd
import os 
os.chdir('/home/jayant/Projects/mmdetection/latest/mmdetection')

/home/jayant/Projects/mmdetection/latest


In [29]:
from mmdet.apis import DetInferencer

cfg  = '/home/jayant/Projects/mmdetection/latest/mmdetection/configs/yolo/yolo_d53_1xb8-320-273e_boxes.py'
checkpoint = '/home/jayant/Projects/mmdetection/latest/mmdetection/work_dirs/yolo_d53_custom_boxes/epoch_200.pth'

model = DetInferencer(model=cfg, weights=checkpoint, device='cuda:0')

model("/home/jayant/Projects/mmdetection/latest/images/test.jpg", show=True, out_dir="/home/jayant/Projects/mmdetection/latest/outputs")

Loads checkpoint by local backend from path: /home/jayant/Projects/mmdetection/latest/mmdetection/work_dirs/yolo_d53_custom_boxes/epoch_200.pth


Output()

{'predictions': [{'labels': [0, 0],
   'scores': [0.6768184900283813, 0.2996211051940918],
   'bboxes': [[37.557640075683594,
     28.643756866455078,
     190.65536499023438,
     311.2362060546875],
    [251.84432983398438,
     23.60582733154297,
     394.109619140625,
     317.5643615722656]]}],
 'visualization': [array([[[251, 251, 253],
          [251, 251, 253],
          [251, 251, 253],
          ...,
          [255, 255, 255],
          [255, 255, 255],
          [255, 255, 255]],
  
         [[251, 251, 253],
          [251, 251, 253],
          [251, 251, 253],
          ...,
          [255, 255, 255],
          [255, 255, 255],
          [255, 255, 255]],
  
         [[251, 251, 253],
          [251, 251, 253],
          [251, 251, 253],
          ...,
          [255, 255, 255],
          [255, 255, 255],
          [255, 255, 255]],
  
         ...,
  
         [[255, 255, 255],
          [255, 255, 255],
          [255, 255, 255],
          ...,
          [255, 255, 255],