# Thanks to 
- https://github.com/open-mmlab/mmdetection/blob/master/demo/MMDet_Tutorial.ipynb
- https://www.kaggle.com/sreevishnudamodaran/siim-effnetv2-l-cascadercnn-mmdetection-infer

In [None]:
## MMDetection compatible torch installation
!pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torch-1.7.0+cu110-cp37-cp37m-linux_x86_64.whl' --no-deps
!pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torchvision-0.8.1+cu110-cp37-cp37m-linux_x86_64.whl' --no-deps
!pip install '/kaggle/input/pytorch-170-cuda-toolkit-110221/torchaudio-0.7.0-cp37-cp37m-linux_x86_64.whl' --no-deps

## Compatible Cuda Toolkit installation
!mkdir -p /kaggle/tmp && cp /kaggle/input/pytorch-170-cuda-toolkit-110221/cudatoolkit-11.0.221-h6bb024c_0 /kaggle/tmp/cudatoolkit-11.0.221-h6bb024c_0.tar.bz2 && conda install /kaggle/tmp/cudatoolkit-11.0.221-h6bb024c_0.tar.bz2 -y --offline

## MMDetection Offline Installation
!pip install '/kaggle/input/mmdetectionv2140/addict-2.4.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/yapf-0.31.0-py2.py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/terminal-0.4.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/terminaltables-3.1.0-py3-none-any.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/mmcv_full-1_3_8-cu110-torch1_7_0/mmcv_full-1.3.8-cp37-cp37m-manylinux1_x86_64.whl' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/pycocotools-2.0.2/pycocotools-2.0.2' --no-deps
!pip install '/kaggle/input/mmdetectionv2140/mmpycocotools-12.0.3/mmpycocotools-12.0.3' --no-deps

!cp -r /kaggle/input/mmdetectionv2140/mmdetection-2.14.0 /kaggle/working/
!mv /kaggle/working/mmdetection-2.14.0 /kaggle/working/mmdetection
%cd /kaggle/working/mmdetection
!pip install -e . --no-deps
%cd /kaggle/working/

In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
# 아래를 수행하기 전에 kernel을 restart 해야 함. 
from mmdet.apis import init_detector, inference_detector
import mmcv

## 간략히 이미지 먼저 보기

In [None]:

import matplotlib.pyplot as plt
import cv2

img = cv2.cvtColor(cv2.imread('/kaggle/input/siim-covid19-resized-to-512px-png/train/000a312787f2.png'), cv2.COLOR_BGR2RGB)
plt.figure(figsize=(3, 3))
plt.imshow(img)

### COVID Dataset

In [None]:
# 우리 클래스는 opacity 한 클래스임. - 간략한 예시 코드
CLASSES = ('opacity',)
opacity2label = {k:i for i, k in enumerate(CLASSES)}
print(opacity2label)
opacity2label['opacity']


In [None]:
import pandas as pd
pd.read_csv('/kaggle/input/all-ann-meta/train_meta.csv').tail()

In [None]:
IMG_SIZE = 512

def get_bbox(row):
    bboxes = []
    bbox = []
    for i, l in enumerate(row.label.split(' ')):
        if (i % 6 == 0) | (i % 6 == 1):
            continue
        bbox.append(float(l))
        if i % 6 == 5:
            bboxes.append(bbox)
            bbox = []  
            
    return bboxes

# Scale the bounding boxes according to the size of the resized image. 
def scale_bbox(row, bboxes):
    # Get scaling factor
    scale_x = IMG_SIZE/row.dim1
    scale_y = IMG_SIZE/row.dim0
    
    scaled_bboxes = []
    for bbox in bboxes:
        x = int(np.round(bbox[0]*scale_x, 4))
        y = int(np.round(bbox[1]*scale_y, 4))
        x1 = int(np.round(bbox[2]*(scale_x), 4))
        y1= int(np.round(bbox[3]*scale_y, 4))

        scaled_bboxes.append([x, y, x1, y1]) # xmin, ymin, xmax, ymax
        
    return scaled_bboxes


In [None]:
import copy
import os.path as osp
import cv2

import mmcv
import numpy as np

from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset

@DATASETS.register_module(force=True)
class CovidDataset(CustomDataset):
    CLASSES = ('opacity',)
    def load_annotations(self, ann_file):
        print('##### self.data_root:', self.data_root, 'self.ann_file:', self.ann_file, 'self.img_prefix:', self.img_prefix)
        print('#### ann_file:', ann_file)
        opacity2label = {k:i for i, k in enumerate(self.CLASSES)}

        df = pd.read_csv(self.ann_file) # ann_file = all-ann-meta/total_meta.csv # data_root = /kaggle/input/
#         image_list = list(ann.image_id)

        data_infos = []
        
        for i in range(len(df)):
            row = df.loc[i]
            image_id = row.image_id
            train_val = row.train_val
            label = row.label
            
            if label != 'none 1 0 0 1 1':
                filename = '{0:}/{1:}.png'.format(self.img_prefix, image_id) 
                image = cv2.imread(filename)
                height, width = image.shape[:2]
                data_info = {'filename': str(image_id) + '.png',
                           'width': width, 'height': height} # 어차피 512
        
            
                bboxes = get_bbox(row)
                scale_bboxes = scale_bbox(row, bboxes)
                
                gt_bboxes = []
                gt_labels = []
                gt_bboxes_ignore = []
                gt_labels_ignore = []
                
                for bbox in scale_bboxes:
                    bbox_name = 'opacity'
                    gt_bboxes.append(bbox)
                    gt_labels.append(opacity2label[bbox_name])     
                    
                    data_anno = {
                      'bboxes': np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
                      'labels': np.array(gt_labels, dtype=np.long),
                      'bboxes_ignore': np.array(gt_bboxes_ignore, dtype=np.float32).reshape(-1, 4),
                      'labels_ignore': np.array(gt_labels_ignore, dtype=np.long)
                    }
                
               
                data_info.update(ann=data_anno)
                data_infos.append(data_info)
        return data_infos

In [None]:

import copy
import os.path as osp
import cv2

import mmcv
import numpy as np

from mmdet.datasets.builder import DATASETS
from mmdet.datasets.custom import CustomDataset

@DATASETS.register_module(force=True)
class CovidDataset_debug(CustomDataset):
    CLASSES = ('opacity',)
    
    def __init__(self, data_root, ann_file, img_prefix):
        self.data_root = data_root
        self.ann_file = osp.join(data_root, ann_file)
        self.img_prefix = osp.join(data_root, img_prefix)
        self.data_infos = self.load_annotations(self.ann_file)

    def load_annotations(self, ann_file):
        print('##### self.data_root:', self.data_root, 'self.ann_file:', self.ann_file, 'self.img_prefix:', self.img_prefix)
        print('#### ann_file:', ann_file)
        opacity2label = {k:i for i, k in enumerate(self.CLASSES)}
        df = pd.read_csv(self.ann_file)
        data_infos = []
        
        for i in range(len(df)):
            row = df.loc[i]
            image_id = row.image_id
            train_val = row.train_val
            label = row.label
            
            if label != 'none 1 0 0 1 1':
                
                filename = '{0:}/{1:}.png'.format(self.img_prefix, image_id) 
                image = cv2.imread(filename)
                height, width = image.shape[:2]
                data_info = {'filename': str(image_id) + '.png',
                           'width': width, 'height': height} # 어차피 512
        
            
                bboxes = get_bbox(row)
                scale_bboxes = scale_bbox(row, bboxes)
                
                gt_bboxes = []
                gt_labels = []
                gt_bboxes_ignore = []
                gt_labels_ignore = []
                
                for bbox in scale_bboxes:
                    bbox_name = 'opacity'
                    gt_bboxes.append(bbox)
                    gt_labels.append(opacity2label[bbox_name])     
                    
                    data_anno = {
                      'bboxes': np.array(gt_bboxes, dtype=np.float32).reshape(-1, 4),
                      'labels': np.array(gt_labels, dtype=np.long),
                      'bboxes_ignore': np.array(gt_bboxes_ignore, dtype=np.float32).reshape(-1, 4),
                      'labels_ignore': np.array(gt_labels_ignore, dtype=np.long)
                    }
            
                data_info.update(ann=data_anno)
                data_infos.append(data_info)
        return data_infos

In [None]:
# 디버깅 용
train_ds = CovidDataset_deb(data_root='/kaggle/input', ann_file='all-ann-meta/train_meta.csv', img_prefix='siim-covid19-resized-to-512px-png/train')
print(train_ds.data_infos[:10])

In [None]:
# 해당 Config 파일 지정, 및  다운로드된 Pretrained 모델
config_file = '/kaggle/working/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'
checkpoint_file = '/kaggle/working/mmdetection/checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'

In [None]:
from mmcv import Config

cfg = Config.fromfile(config_file)
print(cfg.pretty_text)

In [None]:
from mmdet.apis import set_random_seed

# dataset에 대한 환경 파라미터 수정. 
cfg.dataset_type = 'CovidDataset'
cfg.data_root = '/kaggle/input/'

# train, val, test dataset에 대한 type, data_root, ann_file, img_prefix 환경 파라미터 수정. 
cfg.data.train.type = 'CovidDataset'
cfg.data.train.data_root = '/kaggle/input/'
cfg.data.train.ann_file = 'all-ann-meta/train_meta.csv'
cfg.data.train.img_prefix = 'siim-covid19-resized-to-512px-png/train'

cfg.data.val.type = 'CovidDataset'
cfg.data.val.data_root = '/kaggle/input/'
cfg.data.val.ann_file = 'all-ann-meta/val_meta.csv'
cfg.data.val.img_prefix = 'siim-covid19-resized-to-512px-png/train'

cfg.data.test.type = 'CovidDataset'
cfg.data.test.data_root = '/kaggle/input/'
cfg.data.test.ann_file = 'all-ann-meta/val_meta.csv'
cfg.data.test.img_prefix = 'siim-covid19-resized-to-512px-png/train'

# class 수 
cfg.model.roi_head.bbox_head.num_classes = 1
# pretrained 모델
cfg.load_from = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'

# 학습 weight 파일로 로그를 저장하기 위한 디렉토리 설정. 
cfg.work_dir = '/kaggle/input'

# 학습율 변경 환경 파라미터 설정. 
cfg.optimizer.lr = 0.02 / 8

cfg.lr_config.warmup = None
cfg.log_config.interval = 10

cfg.runner.max_epochs = 15           # ep 5번만 돌겠다. # schedule 참고 기본 12
# config 수행 시마다 policy값이 없어지는 bug로 인하여 설정. 
cfg.lr_config.policy = 'step'

# Change the evaluation metric since we use customized dataset.
cfg.evaluation.metric = 'mAP'
# We can set the evaluation interval to reduce the evaluation times
cfg.evaluation.interval = 1
# We can set the checkpoint saving interval to reduce the storage cost
cfg.checkpoint_config.interval = 1

# Set seed thus the results are more reproducible
cfg.seed = 0
set_random_seed(0, deterministic=False)
cfg.gpu_ids = range(1)

# 학습 시 Batch size 설정(단일 GPU 별 Batch size로 설정됨)    # 이게 배치사이즈 설정!
# cfg.data.samples_per_gpu = 4        # gpu가 2개면 bsize는 자동으로 8개가 되겠다. 근데 여기선 gpu T4 1개...

# 총 3300개 정도 train이미지를 학습할때, bsize=4이면 4장 이미지 가져와 4장마다 loss구해가면서 대략 800번 iteration돌아
# batch size 클수록 학습속도가 빠르다 : 여러 이미지를 한번에 학습하여 loss를 구하니까
# 그러나, batch size크면 gpu memory가 커져 죽을 수 있다...


# We can initialize the logger for training and have a look
# at the final config used for training
print(f'Config:\n{cfg.pretty_text}')

### Config에서 설정한 Dataset, Model, Pipeline 등에 따라 모델 학습 수행. 

* train용 Dataset을 생성하고 이를 이용하여 학습 수행. 

In [None]:
from mmdet.datasets import build_dataset
from mmdet.models import build_detector
from mmdet.apis import train_detector

# train용 Dataset 생성. 
datasets = [build_dataset(cfg.data.train)]

In [None]:
datasets[0].CLASSES

In [None]:
model = build_detector(cfg.model, train_cfg=cfg.get('train_cfg'), test_cfg=cfg.get('test_cfg'))
model.CLASSES = datasets[0].CLASSES

In [None]:
# 주의, config에 pretrained 모델 지정이 상대 경로로 설정됨 cfg.load_from = 'checkpoints/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
# 아래와 같이 %cd mmdetection 지정 필요. 
 
%cd mmdetection 

mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))

# epochs는 config의 runner 파라미터로 지정됨. 기본 12회 
train_detector(model, datasets, cfg, distributed=False, validate=True)

In [None]:
# !ls /kaggle/working/mmdetection/tutorial_exps/

In [None]:
# !cp /kaggle/working/mmdetection/tutorial_exps/epoch_10.pth /kaggle/working
# !cp /kaggle/working/mmdetection/tutorial_exps/epoch_20.pth /kaggle/working
# !cp /kaggle/working/mmdetection/tutorial_exps/epoch_30.pth /kaggle/working

### 학습된 model 로드하여 inference 수행. 

In [None]:
cfg.model.test_cfg.rcnn.score_thr = 0.001

WEIGHTS_FILE = '/kaggle/input/model-faster-rcnn/epoch_10.pth'
options = dict(classes = ("Covid_Abnormality"))
model = init_detector(cfg, WEIGHTS_FILE, device='cuda:0')

In [None]:
from mmdet.apis import inference_detector, init_detector, show_result_pyplot

# BGR Image 사용 
img = cv2.imread('/kaggle/input/siim-covid19-resized-to-512px-png/train/000a312787f2.png')

model.cfg = cfg

result = inference_detector(model, img)
show_result_pyplot(model, img, result)

In [None]:
cfg.data.test

In [None]:
from mmdet.datasets import build_dataloader, build_dataset, replace_ImageToTensor


# test용 Dataset과 DataLoader 생성. 
# build_dataset()호출 시 list로 감싸지 않는 것이 train용 dataset 생성시와 차이. 

cfg.data.samples_per_gpu = 1 

dataset = build_dataset(cfg.data.val)
data_loader = build_dataloader(
        dataset,
        # 반드시 아래 samples_per_gpu 인자값은 1로 설정
        samples_per_gpu=cfg.data.samples_per_gpu,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False)

# 반드시 아래 코드에서 'img' 키값이 tensor로 출력되어야 함. 
# next(iter(data_loader))

from mmdet.apis import inference_detector, init_detector, show_result_pyplot
from mmdet.apis import multi_gpu_test, single_gpu_test
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmdet.apis import inference_detector, init_detector, show_result_pyplot


checkpoint_file = '../input/model-faster-rcnn/epoch_10.pth'

# checkpoint 저장된 model 파일을 이용하여 모델을 생성, 이때 Config는 위에서 update된 config 사용. 
model_ckpt = init_detector(cfg, checkpoint_file, device='cuda:0')


model_ckpt = MMDataParallel(model_ckpt, device_ids=[0])
# single_gpu_test() 를 호출하여 test데이터 세트의 interence 수행. 반드시 batch size는 1이 되어야 함. 
# 위에서 만든 /content/show_test_output 디렉토리에 interence 결과가 시각화된 이미지가 저장됨. 
outputs = single_gpu_test(model_ckpt, data_loader, False)

metric = dataset.evaluate(outputs, metric='mAP')
print(metric)