In [1]:
import torch, torchvision
import mmdet
from mmdet.apis import inference_detector, init_detector, show_result_pyplot
import random
import numpy as np

def set_random_seed(seed, deterministic=False):
    """Set random seed.

    Args:
        seed (int): Seed to be used.
        deterministic (bool): Whether to set the deterministic option for
            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
            to True and `torch.backends.cudnn.benchmark` to False.
            Default: False.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    if deterministic:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        
set_random_seed(123)

In [2]:
def check_dataset(dataset):
    print('checking dataset')
    
    drop_sample= []
    for i in range(len(dataset)):

        data= dataset[i]
        img_shape= Image.open(data['image_path']).size
        bbox= data['bbox']

        drop_indx= []
        for j in range(len(bbox)):
            if bbox[j][2]>=img_shape[0]: bbox[j][2]= img_shape[0] - 10
            if bbox[j][3]>=img_shape[1]: bbox[j][3]= img_shape[1] - 10
            if bbox[j][0]<=0: bbox[j][0]= 0
            if bbox[j][1]<=0: bbox[j][1]= 0
            if bbox[j][2]<=bbox[j][0] or bbox[j][3]<=bbox[j][1]:
                drop_indx.append(j)

        data['bbox']= np.delete(data['bbox'], drop_indx, axis= 0)
        data['label']= np.delete(data['label'], drop_indx, axis= 0)

        if len(data['bbox'])==0:
            drop_sample.append(i)

    dataset= np.delete(dataset, drop_sample, axis= 0)
    if drop_sample!=[]: print('remove empty bboxes data: {}'.format(len(drop_sample)))
    
    return dataset

In [3]:
import copy
import json
import os.path as osp
from PIL import Image

import mmcv
import numpy as np

from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset

@DATASETS.register_module()
class DefectDataset(CustomDataset):

    CLASSES = ('1', '2', '3', '4', '5')

    def load_annotations(self, ann_file):
        
        all_dataset= []
        with open('Data/train_1.json', 'r', encoding="utf-8") as f:
            label= json.load(f)
        for i in range(len(label['images'])):
            data= {}
            data['image_path']= 'Data/train_img_1/' + label['images'][i]['file_name']
            data['bbox']= []
            data['label']= []
            id= label['images'][i]['id']

            for j in range(len(label['annotations'])):
                if id==label['annotations'][j]['image_id']:
                    box= label['annotations'][j]['bbox']
                    box[2]+= box[0]
                    box[3]+= box[1]
                    data['bbox'].append(box)
                    data['label'].append(label['annotations'][j]['category_id']-1)
            all_dataset.append(data)
            
        with open('Data/train_2.json', 'r', encoding="utf-8") as f:
            label= json.load(f)
        for i in range(len(label['images'])):
            data= {}
            data['image_path']= 'Data/train_img_2/' + label['images'][i]['file_name']
            data['bbox']= []
            data['label']= []
            id= label['images'][i]['id']

            for j in range(len(label['annotations'])):
                if id==label['annotations'][j]['image_id']:
                    box= label['annotations'][j]['bbox']
                    box[2]+= box[0]
                    box[3]+= box[1]
                    data['bbox'].append(box)
                    data['label'].append(label['annotations'][j]['category_id']-1)
            all_dataset.append(data)
            
        # check dataset
        all_dataset= check_dataset(all_dataset)
            
        # remove too many box img
        drop_indx= []
        for i, data in enumerate(all_dataset):

            bbox= data['bbox']
            bbox= np.array(bbox).astype(np.int)

            if len(bbox)>100:
                drop_indx.append(i)
                continue

        vali_dataset= np.array(all_dataset)[drop_indx]
        train_dataset= np.delete(all_dataset, drop_indx, axis= 0)
        
        if self.ann_file=='val.txt':
            train_dataset= vali_dataset
            
        # make mmdetection custom data format
        data_infos= []
        for data in train_dataset:
            (width, height)= Image.open(data['image_path']).size
            info= dict(filename= data['image_path'], width= width, height= height)
            
            info['ann']= {}
            info['ann']['bboxes']= np.array(data['bbox']).astype(np.float16)
            info['ann']['labels']= np.array(data['label']).astype(np.int)
            data_infos.append(info)

        return data_infos

In [4]:
from mmcv import Config
cfg = Config.fromfile('configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py')

from mmdet.apis import set_random_seed

# Modify dataset type and path
cfg.dataset_type = 'DefectDataset'
cfg.data_root = './'

cfg.data.train.type = 'DefectDataset'
cfg.data.train.data_root = ''
cfg.data.train.ann_file = 'train.txt'
cfg.data.train.img_prefix = ''


cfg.data.val.type = 'DefectDataset'
cfg.data.val.data_root = ''
cfg.data.val.ann_file = 'val.txt'
cfg.data.val.img_prefix = ''

# modify num classes of the model in box head
cfg.model.roi_head.bbox_head[0].num_classes = 5
cfg.model.roi_head.bbox_head[1].num_classes = 5
cfg.model.roi_head.bbox_head[2].num_classes = 5

cfg.load_from = 'model/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth'

# Set up working dir to save files and logs.
cfg.work_dir = './tutorial_exps'

# The original learning rate (LR) is set for 8-GPU training.
# We divide it by 8 since we only use one GPU.
cfg.optimizer= dict(type='Adam', lr=0.0003, weight_decay=0)
cfg.optimizer.lr = 3e-5
cfg.lr_config.warmup = None
cfg.lr_config.min_lr=3e-5
cfg.log_config.interval = 100

# Change the evaluation metric since we use customized dataset.
cfg.evaluation.metric = 'mAP'
# We can set the evaluation interval to reduce the evaluation times
cfg.evaluation.interval = 1
# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 1

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

cfg.data.samples_per_gpu= 1
cfg.data.workers_per_gpu= 1
cfg.runner.max_epochs= 200

# custom setting
# cfg.model.rpn_head.loss_cls= dict(type='FocalLoss', use_sigmoid=True, loss_weight=1.0)
# cfg.model.roi_head.bbox_head[0].loss_cls= dict(type='FocalLoss', use_sigmoid=True, loss_weight=1.0)
# cfg.model.roi_head.bbox_head[1].loss_cls= dict(type='FocalLoss', use_sigmoid=True, loss_weight=1.0)
# cfg.model.roi_head.bbox_head[2].loss_cls= dict(type='FocalLoss', use_sigmoid=True, loss_weight=1.0)

cfg.model.rpn_head.reg_decoded_bbox= True
cfg.model.rpn_head.loss_bbox= dict(type='GIoULoss', loss_weight=5.0)
cfg.model.roi_head.bbox_head[0].reg_decoded_bbox= True
cfg.model.roi_head.bbox_head[0].loss_bbox= dict(type='GIoULoss', loss_weight=5.0)
cfg.model.roi_head.bbox_head[1].reg_decoded_bbox= True
cfg.model.roi_head.bbox_head[1].loss_bbox= dict(type='GIoULoss', loss_weight=5.0)
cfg.model.roi_head.bbox_head[2].reg_decoded_bbox= True
cfg.model.roi_head.bbox_head[2].loss_bbox= dict(type='GIoULoss', loss_weight=5.0)

# cfg.model.train_cfg.rcnn[0].sampler= dict(type='OHEMSampler',
#                                     num=512,
#                                     pos_fraction=0.25,
#                                     neg_pos_ub=-1,
#                                     add_gt_as_proposals=True)
# cfg.model.train_cfg.rcnn[1].sampler= dict(type='OHEMSampler',
#                                     num=512,
#                                     pos_fraction=0.25,
#                                     neg_pos_ub=-1,
#                                     add_gt_as_proposals=True)
# cfg.model.train_cfg.rcnn[2].sampler= dict(type='OHEMSampler',
#                                     num=512,
#                                     pos_fraction=0.25,
#                                     neg_pos_ub=-1,
#                                     add_gt_as_proposals=True)

standard_img_size= 1200
cfg.train_pipeline[2].img_scale= [(4096, standard_img_size+100), (4096, standard_img_size-100)]
cfg.train_pipeline[2].multiscale_mode='range'
cfg.test_pipeline[1].img_scale= [(4096, standard_img_size+100),
                                 (4096, standard_img_size),
                                 (4096, standard_img_size-100)]

cfg.data.train.pipeline= cfg.train_pipeline
cfg.data.val.pipeline= cfg.test_pipeline
cfg.data.test.pipeline= cfg.test_pipeline

# cfg.model.backbone.norm_cfg= dict(type='GN', num_groups=32, requires_grad=True)
cfg.model.test_cfg.rcnn.max_per_img= 300
cfg.model.rpn_head.anchor_generator.scales= [4]
cfg.model.rpn_head.anchor_generator.ratios= [0.5, 1.0, 2.0]
cfg.model.backbone.dcn= dict(type='DCN', deformable_groups=1, fallback_on_stride=False)
cfg.model.backbone.stage_with_dcn=(False, True, True, True)
# cfg.model.backbone.gcb=dict(ratio=1./ 4.)
# cfg.model.backbone.stage_with_gcb= (False, True, True, True)

print(f'Config:\n{cfg.pretty_text}')

Config:
model = dict(
    type='CascadeRCNN',
    backbone=dict(
        type='ResNeXt',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch',
        init_cfg=dict(
            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'),
        groups=32,
        base_width=4,
        dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False),
        stage_with_dcn=(False, True, True, True)),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[4],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='DeltaXYW

In [5]:
albu_train_transforms = [
    dict(type="RandomRotate90", p=1.0),
    dict(
        type='ShiftScaleRotate',
        shift_limit=0.2,
        scale_limit=0.2,
        rotate_limit=180,
        interpolation=1,
        p=0.7),
    dict(
        type="OneOf",
        transforms=[
            dict(type="HueSaturationValue", hue_shift_limit=10, sat_shift_limit=35, val_shift_limit=25),
            dict(type="RandomGamma"),
            dict(type="CLAHE"),
        ],
        p=0.5,
    ),
    dict(
        type="OneOf",
        transforms=[
            dict(type="RandomBrightnessContrast", brightness_limit=0.4, contrast_limit=0.4),
            dict(type="RGBShift", r_shift_limit=15, g_shift_limit=15, b_shift_limit=15),
        ],
        p=0.5,
    ),
    dict(
        type="OneOf",
        transforms=[
            dict(type="Blur"),
            dict(type="MotionBlur"),
            dict(type="GaussNoise"),
            dict(type="ImageCompression", quality_lower=75),
        ],
        p=0.4,
    ),
]


transforms= dict(
        type='Albu',
        transforms= albu_train_transforms,
        bbox_params=dict(
            type='BboxParams',
            format='pascal_voc',
            label_fields=['gt_labels'],
            min_visibility=0.0,
            filter_lost_elements=True),
        keymap={
            'img': 'image',
            'gt_masks': 'masks',
            'gt_bboxes': 'bboxes',
        })
cfg.data.train.pipeline.insert(4, transforms)
# mixup= dict(type='MixUp',p=0.5, lambd=0.5)
# cfg.data.train.pipeline.insert(2, mixup)

print(f'Config:\n{cfg.pretty_text}')

Config:
model = dict(
    type='CascadeRCNN',
    backbone=dict(
        type='ResNeXt',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        norm_cfg=dict(type='BN', requires_grad=True),
        norm_eval=True,
        style='pytorch',
        init_cfg=dict(
            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d'),
        groups=32,
        base_width=4,
        dcn=dict(type='DCN', deformable_groups=1, fallback_on_stride=False),
        stage_with_dcn=(False, True, True, True)),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    rpn_head=dict(
        type='RPNHead',
        in_channels=256,
        feat_channels=256,
        anchor_generator=dict(
            type='AnchorGenerator',
            scales=[4],
            ratios=[0.5, 1.0, 2.0],
            strides=[4, 8, 16, 32, 64]),
        bbox_coder=dict(
            type='DeltaXYW

In [6]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

# Build dataset
datasets = [build_dataset(cfg.data.train)]

# Build the detector
model = build_detector(
    cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
# Add an attribute for visualization convenience
model.CLASSES = datasets[0].CLASSES

# Create work_dir
mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
train_detector(model, datasets, cfg, distributed=False, validate=True)

checking dataset


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  bbox= np.array(bbox).astype(np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  info['ann']['labels']= np.array(data['label']).astype(np.int)
    Found GPU%d %s which is of cuda capability %d.%d.
    PyTorch no longer supports this GPU because it is too old.
    The minimum cuda capability supported by this library is %d.%d.
    
2021-10-01 15:39:03,167 - mmdet - INFO - load checkpoint from model/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth
2021-10-01 15:39:03,167 - mmdet - INFO - Use load_from_local loader


checking dataset



size mismatch for roi_head.bbox_head.0.fc_cls.weight: copying a param with shape torch.Size([81, 1024]) from checkpoint, the shape in current model is torch.Size([6, 1024]).
size mismatch for roi_head.bbox_head.0.fc_cls.bias: copying a param with shape torch.Size([81]) from checkpoint, the shape in current model is torch.Size([6]).
size mismatch for roi_head.bbox_head.1.fc_cls.weight: copying a param with shape torch.Size([81, 1024]) from checkpoint, the shape in current model is torch.Size([6, 1024]).
size mismatch for roi_head.bbox_head.1.fc_cls.bias: copying a param with shape torch.Size([81]) from checkpoint, the shape in current model is torch.Size([6]).
size mismatch for roi_head.bbox_head.2.fc_cls.weight: copying a param with shape torch.Size([81, 1024]) from checkpoint, the shape in current model is torch.Size([6, 1024]).
size mismatch for roi_head.bbox_head.2.fc_cls.bias: copying a param with shape torch.Size([81]) from checkpoint, the shape in current model is torch.Size([6]

RuntimeError: CUDA out of memory. Tried to allocate 160.00 MiB (GPU 0; 10.76 GiB total capacity; 7.59 GiB already allocated; 56.44 MiB free; 8.14 GiB reserved in total by PyTorch)

# Inference

In [None]:
import matplotlib.pyplot as plt
import cv2
import os
from tqdm import tqdm

color= [
    (255, 0, 0), #紅
    (0, 255, 0), #綠
    (0, 0, 255), #藍
    (255, 97, 0), #澄
    (255, 0, 255), #紫
]

checkpoint_file = 'tutorial_exps/epoch_72.pth'

cfg.data.test.pipeline[1].flip= True
cfg.model.test_cfg.rcnn= dict(score_thr=0.0001,
                              nms=dict(type='soft_nms',
                                       iou_thr=0.5,
                                       min_score=0.0001),
                              max_per_img=300)
# build the model from a config file and a checkpoint file
model = init_detector(cfg, checkpoint_file, device='cuda:0')

test_name= os.listdir('Data/test_img')
test_name= ['Data/test_img/'+name for name in test_name]

pred= []
for name in tqdm(test_name[:]):
    p= {}
    p['img_name']= name
    # test a single image
    result = inference_detector(model, name)

    img= cv2.imread(name)
    img= cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    all_bbox= []
    all_label= []
    all_score= []
    for i in range(len(result)):
        for box in result[i]:
            score= box[-1]
            all_score.append(score)
            box= np.array(box[:4]).astype(np.int)
            all_bbox.append(box)
            all_label.append(i+1)
#             if score>=0.1:
#                 cv2.rectangle(img,
#                               (box[0], box[1]),
#                               (box[2], box[3]),
#                               color[i], 10)

#     plt.imshow(img)
#     plt.show()
    
    p['bbox']= all_bbox
    p['label']= all_label
    p['score']= all_score
    pred.append(p)

In [None]:
import pandas as pd

submit= pd.DataFrame()

keep_classes= [1,2,3,4,5]
for pred in pred:
    name= pred['img_name'].split('/')[-1]
    bbox= pred['bbox']
    label= pred['label']
    score= pred['score']
    for i in range(len(bbox)):
        if label[i] not in keep_classes: continue
        r= []
        bbox[i][2]-= bbox[i][0]
        bbox[i][3]-= bbox[i][1]
        r+= [name]
        r+= [label[i]]
        r+= bbox[i].astype(np.int).tolist()
        r+= [score[i]]
        submit= submit.append([r], ignore_index= True)


submit.columns= ['image_filename', 
              'label_id',
              'x',
              'y',
              'w',
              'h',
              'confidence']
submit.to_csv('submission.csv', index= False)
submit