In [1]:
import argparse
import os
import os.path as osp
import tempfile
import warnings

import mmcv
import numpy as np
import pandas as pd
import torch
from df2coco import df2coco
from mmcv import Config, DictAction
from mmcv.cnn import fuse_conv_bn
from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
from mmcv.runner import get_dist_info, init_dist, load_checkpoint, wrap_fp16_model
from mmdet.apis import multi_gpu_test, single_gpu_test
from mmdet.datasets import build_dataloader, build_dataset, replace_ImageToTensor
from mmdet.models import build_detector
from tqdm.notebook import tqdm

In [56]:
config_path = "/home/semyon/projects/mmdetection/work_dirs/config_2x/config_2x.py"
checkpoint_path = "/home/semyon/projects/mmdetection/work_dirs/config_2x/epoch_12.pth"

csv_path = (
    "/home/semyon/data/VinBigData/custom_dfs/weighted_boxes_fusion_iou-0.20_fold-0.csv"
)
prefix = "/home/semyon/data/VinBigData/train/"

In [57]:
def inference_on_csv(csv, img_root, config, checkpoint_path, output_path):
    '''
    
    :param csv_path: path or pandas.DataFrame object
    :param config_path: path or mmcv.utils.config.Config object
    :param checkpoint_path: pth to model checkpoint
    :return: df with predicted bboxes in the format of challenge
    '''
    if isinstance(csv, pd.DataFrame):
        tmp_csv_path = tempfile.mktemp()
        csv.to_csv(tmp_csv_path, index=False)
        csv = tmp_csv_path
        
    json_path = tempfile.mktemp() + ".json"
    df2coco(csv, json_path, img_prefix=img_root)
    

    if not isinstance(config, Config):
        config = Config.fromfile(config)
        
    config.merge_from_dict(dict(data=dict(test=dict(ann_file=json_path, img_prefix=img_root))))
    cfg = config
    # ======================= predict ==============
    # ==============================================
    dataset = build_dataset(cfg.data.test)

    data_loader = build_dataloader(
        dataset,
        samples_per_gpu=1,
        workers_per_gpu=cfg.data.workers_per_gpu,
        dist=False,
        shuffle=False,
    )

    # build the model and load checkpoint
    cfg.model.train_cfg = None
    model = build_detector(cfg.model, test_cfg=cfg.get("test_cfg"))
    fp16_cfg = cfg.get("fp16", None)
    if fp16_cfg is not None:
        wrap_fp16_model(model)
    checkpoint = load_checkpoint(model, checkpoint_path, map_location="cpu")
    # old versions did not save class info in checkpoints, this walkaround is
    # for backward compatibility
    if "CLASSES" in checkpoint["meta"]:
        model.CLASSES = checkpoint["meta"]["CLASSES"]
    else:
        model.CLASSES = dataset.CLASSES
    CLASSES = model.CLASSES

    model = MMDataParallel(model, device_ids=[0])
    outputs = single_gpu_test(model, data_loader)

    image_ids = [i["filename"].split(".npy")[0] for i in data_loader.dataset.data_infos]

    total_image_id = []
    total_class_names = []
    total_rad_id = []
    total_x_min = []
    total_y_min = []
    total_x_max = []
    total_y_max = []

    for image_id, result in zip(image_ids, outputs):
        for class_name, per_class_result in zip(CLASSES, result):
            bboxes = per_class_result[:, :-1]
            scores = per_class_result[:, -1]
            bboxes = np.around(bboxes).astype(np.uint64)
            for score, bbox in zip(scores, bboxes):
                total_image_id.append(image_id)
                total_class_names.append(class_name)
                total_rad_id.append(score)
                total_x_min.append(bbox[0])
                total_y_min.append(bbox[1])
                total_x_max.append(bbox[2])
                total_y_max.append(bbox[3])

    new_df = pd.DataFrame(
        dict(
            image_id=total_image_id,
            class_name=total_class_names,
            class_id=-1,
            rad_id=total_rad_id,
            x_min=total_x_min,
            y_min=total_y_min,
            x_max=total_x_max,
            y_max=total_y_max,
        )
    )

    # pathology ids corresponding to challenge
    class2id = {
        "Aortic enlargement": 0,
        "Atelectasis": 1,
        "Calcification": 2,
        "Cardiomegaly": 3,
        "Consolidation": 4,
        "ILD": 5,
        "Infiltration": 6,
        "Lung Opacity": 7,
        "No finding": 14,
        "Nodule/Mass": 8,
        "Other lesion": 9,
        "Pleural effusion": 10,
        "Pleural thickening": 11,
        "Pneumothorax": 12,
        "Pulmonary fibrosis": 13,
    }

    new_df.class_id = new_df.class_name.apply(lambda x: class2id[x])
    # ==============================================
    # ===================end========================
    
    new_df.to_csv(output_path, index=False)

    return new_df

In [58]:
inference_on_csv(csv_path, prefix, config_path, checkpoint_path, 'tmp.csv')

100%|██████████| 885/885 [00:05<00:00, 175.59it/s]


loading annotations into memory...
Done (t=0.01s)
creating index...
index created!


2021-02-17 11:26:54,752 - mmdet - INFO - load model from: torchvision://resnet50

unexpected key in source state_dict: fc.weight, fc.bias



[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 885/885, 17.0 task/s, elapsed: 52s, ETA:     0s

Unnamed: 0,image_id,class_name,class_id,rad_id,x_min,y_min,x_max,y_max
0,1c32170b4af4ce1a3030eb8167753b06,Aortic enlargement,0,0.742828,1242,745,1494,1037
1,1c32170b4af4ce1a3030eb8167753b06,Cardiomegaly,3,0.148044,1042,1593,1905,2215
2,1c32170b4af4ce1a3030eb8167753b06,ILD,5,0.107955,230,468,1127,2390
3,1c32170b4af4ce1a3030eb8167753b06,ILD,5,0.062743,334,1272,958,2148
4,1c32170b4af4ce1a3030eb8167753b06,Pleural thickening,11,0.174364,485,443,653,696
...,...,...,...,...,...,...,...,...
17543,1224f07d895107573588225f692e94f9,Aortic enlargement,0,0.098755,924,670,1316,1133
17544,1224f07d895107573588225f692e94f9,Cardiomegaly,3,0.684686,727,1329,1543,1672
17545,1224f07d895107573588225f692e94f9,Pleural thickening,11,0.059977,1151,395,1417,471
17546,1224f07d895107573588225f692e94f9,Pleural thickening,11,0.057828,627,391,862,477


In [59]:
from map_at_th import get_mean_average_precision

In [60]:
get_mean_average_precision(
    csv_path,
    'tmp.csv'
)

(0.26065609737323636,
 {'Aortic enlargement': (0.9259009144523968, 620.0),
  'Atelectasis': (0.13100775193798453, 43.0),
  'Calcification': (0.042668325246989026, 142.0),
  'Cardiomegaly': (0.928416919560368, 464.0),
  'Consolidation': (0.27810105737228985, 82.0),
  'ILD': (0.1291875373052414, 125.0),
  'Infiltration': (0.1635433656860694, 172.0),
  'Lung Opacity': (0.20503180012040106, 368.0),
  'Nodule/Mass': (0.11512736682510119, 388.0),
  'Other lesion': (0.0209477213938567, 323.0),
  'Pleural effusion': (0.37163289452730724, 285.0),
  'Pleural thickening': (0.16837390919731493, 679.0),
  'Pneumothorax': (0.005291005291005291, 21.0),
  'Pulmonary fibrosis': (0.16395479430898377, 539.0)})

### Check 2 outputs

In [13]:
import pickle

RESULT_FILE = "/tmp/tmpzgzncoat.pkl"
outputs2 = pickle.load(open(RESULT_FILE, "rb"))

In [17]:
import numpy as np

In [23]:
o1 = np.concatenate([np.concatenate(i) for i in outputs])
o2 = np.concatenate([np.concatenate(i) for i in outputs2])

In [24]:
np.allclose(o1, o2)

True

In [26]:
np.equal(o1, o2).all()

True