In [93]:
import os
import json
import numpy as np
from tifffile import imread
import cv2
import skimage.io as sio

import albumentations as A
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision.models.detection import MaskRCNN, FasterRCNN_ResNet50_FPN_Weights, MaskRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models import ResNet50_Weights
from torchvision.ops import box_convert
import torchvision.transforms as T
import torch.nn.functional as F

import torch
from torch.optim import SGD, lr_scheduler

import pathlib
import json

from tqdm.auto import tqdm
from pycocotools.coco import COCO
from pycocotools import mask as coco_mask
from pycocotools.cocoeval import COCOeval

from PIL import Image

In [94]:
# !gdown https://drive.google.com/file/d/1B0qWNzQZQmfQP7x7o4FDdgb9GvPDoFzI/view --fuzzy
# !mkdir ../dataset
# !tar -xzf hw3-data-release.tar.gz
# !mv test_release/ ../dataset
# !mv train/ ../dataset/
# !mv test_image_name_to_ids.json ../dataset/

In [95]:
import numpy as np
import skimage.io as sio
from pycocotools import mask as mask_utils


def decode_maskobj(mask_obj):
    return mask_utils.decode(mask_obj)


def encode_mask(binary_mask):
    arr = np.asfortranarray(binary_mask).astype(np.uint8)
    rle = mask_utils.encode(arr)
    rle['counts'] = rle['counts'].decode('utf-8')
    return rle


def read_maskfile(filepath):
    mask_array = sio.imread(filepath)
    return mask_array

In [96]:
class MedicalDataset(Dataset):
    def __init__(self, root_dir, transform=None, data_type='Train'):
        self.root = root_dir
        self.transform = transform
        self.data_type = data_type
        if self.data_type not in ['Train', 'Valid', 'Test']:
            raise ValueError('Data type should be in [Train, Valid, Test]')
        self.samples = self._load_samples()

        self.train_coco_path = os.path.join(pathlib.Path(root_dir).parent, 'train_coco.json')
        self.val_coco_path = os.path.join(pathlib.Path(root_dir).parent, 'val_coco.json')
        if not os.path.exists(self.train_coco_path) or not os.path.exists(self.val_coco_path):
            # self.generate_coco(self.train_coco_path)
            self.generate_coco_split(self.train_coco_path, self.val_coco_path, split_ratio=0.8)
        self.train_coco = COCO(self.train_coco_path)
        self.val_coco = COCO(self.val_coco_path)
        self.num_classes = len(self.train_coco.loadCats(self.train_coco.getCatIds()))

    def _load_samples(self):
        samples = []
        for img_dir in os.listdir(self.root):
            tmp_dir = os.path.join(self.root, img_dir)

            if self.data_type == 'Train' or self.data_type == 'Valid':
                img_path = os.path.join(tmp_dir, 'image.tif')

                mask_paths = [
                    entry.name for entry in pathlib.Path(tmp_dir).iterdir()
                    if entry.name.startswith("class") and entry.is_file()
                ]

                samples.append({'image': img_path, 'masks': mask_paths})
            elif self.data_type == 'Test':
                test_img_json_path = os.path.join(pathlib.Path(self.root).parent, 'test_image_name_to_ids.json')
                with open(test_img_json_path, 'r') as f:
                    samples = json.load(f)

            else:
                raise ValueError('Wrong data type')

                # for idx in range(len(samples)):
                #     samples[idx]['file_name'] = os.path.join(self.root, samples[idx]['file_name'])
        return samples

    def mask_to_polygons(self, mask, epsilon=1.0):
        contours,_ = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
        polygons = []
        for contour in contours:
            if len(contour) > 2:
                poly = contour.reshape(-1).tolist()
                if len(poly) > 4: #Ensures valid polygon
                    polygons.append(poly)
        return polygons

    def generate_coco(self, output_dir, train=True):
        annotations = []
        images = []
        categories = []
        all_labels = []
        ann_id = 0

        for img_id, sample in enumerate(self.samples):
            print(f'({img_id}/{len(self.samples)})')
            img_path, mask_paths = sample['image'], sample['masks']
            img = cv2.imread(img_path)
            masks = [cv2.imread(os.path.join(pathlib.Path(img_path).parent, mask_path), cv2.IMREAD_UNCHANGED) for mask_path in mask_paths]

            images.append({
                "id": img_id,
                "file_name": img_path,
                "height": img.shape[0],
                "width": img.shape[1]
            })

            for mask in masks:
                unique_values = np.unique(mask)
                all_labels.append(unique_values)
                for value in unique_values:
                    if value == 0:  # Ignore background
                        continue

                    object_mask = (mask == value).astype(np.uint8) * 255
                    polygons = self.mask_to_polygons(object_mask)

                    for poly in polygons:
                        ann_id += 1
                        annotations.append({
                            "id": ann_id,
                            "image_id": img_id,
                            "category_id": 1,  # Only one category: Nuclei
                            "segmentation": [poly],
                            "area": cv2.contourArea(np.array(poly).reshape(-1, 2)),
                            "bbox": list(cv2.boundingRect(np.array(poly).reshape(-1, 2))),
                            "iscrowd": 0
                        })

        all_labels = np.unique(np.concatenate(all_labels).tolist())

        for idx, label in enumerate(all_labels):
            categories.append({"id": idx+1, "name": int(label)})

        coco_input = {
            "images": images,
            "annotations": annotations,
            "categories": categories
        }

        print(f'Saving train coco json')

        with open(output_dir, 'w') as f:
            json.dump(coco_input, f)

    def generate_coco_split(self, train_coco_path, val_coco_path, split_ratio=0.8):
        train_data = {"images": [], "annotations": [], "categories": []}
        val_data = {"images": [], "annotations": [], "categories": []}
        all_labels = []
        ann_id = 0
        train_ann = 0
        val_ann = 0

        # 隨機分離樣本索引
        indices = list(range(len(self.samples)))
        import random
        seed = 123
        random.Random(seed).shuffle(indices)
        split_point = int(len(indices) * split_ratio)
        train_indices = indices[:split_point]
        val_indices = indices[split_point:]

        # 類別統一管理 (避免訓練/驗證類別不一致)
        global_categories = {}

        for dataset_type, indices in [("train", train_indices), ("val", val_indices)]:
            target_data = train_data if dataset_type == "train" else val_data

            for idx in indices:
                sample = self.samples[idx]
                img_path, mask_paths = sample['image'], sample['masks']
                img = cv2.imread(img_path)
                masks = [cv2.imread(os.path.join(pathlib.Path(img_path).parent, mask_path), cv2.IMREAD_UNCHANGED) for mask_path in mask_paths]

                image_entry = {
                    "id": idx,
                    "file_name": img_path,
                    "height": img.shape[0],
                    "width": img.shape[1]
                }
                target_data["images"].append(image_entry)

                for mask in masks:
                    unique_values = np.unique(mask)
                    all_labels.append(unique_values)
                    for value in unique_values:
                        if value == 0:  # Ignore background
                            continue

                        object_mask = (mask == value).astype(np.uint8) * 255
                        polygons = self.mask_to_polygons(object_mask)

                        for poly in polygons:
                            # ann_id += 1
                            if dataset_type == 'train':
                                train_ann += 1
                                ann_id = train_ann
                            else:
                                val_ann += 1
                                ann_id = val_ann

                            target_data["annotations"].append({
                                "id": ann_id,
                                "image_id": idx,
                                "category_id": int(value),  # Only one category: Nuclei
                                "segmentation": [poly],
                                "area": cv2.contourArea(np.array(poly).reshape(-1, 2)),
                                "bbox": list(cv2.boundingRect(np.array(poly).reshape(-1, 2))),
                                "iscrowd": 0
                            })

        all_labels = np.unique(np.concatenate(all_labels).tolist())

        categories = []
        for idx, label in enumerate(all_labels):
            categories.append({"id": idx+1, "name": int(label)})
        train_data["categories"] = categories
        val_data["categories"] = categories


        # coco_input = {
        #     "images": images,
        #     "annotations": annotations,
        #     "categories": categories
        # }

        print(f'Saving  coco json')

        with open(train_coco_path, 'w') as f:
            json.dump(train_data, f)
        with open(val_coco_path, 'w') as f:
            json.dump(val_data, f)


    def poly2mask(self, segmentation, img_size):
        """
        多邊形標註轉二值掩碼
        :param segmentation: COCO格式的多邊形坐標列表 [[x1,y1,x2,y2,...]]
        :param img_size: 目標圖像尺寸 (height, width)
        """
        # 自動檢測標註類型
        if isinstance(segmentation, dict):
            # 處理RLE格式
            return coco_mask.decode(segmentation)
        else:
            # 處理多邊形格式
            rle = coco_mask.frPyObjects(segmentation, img_size[0], img_size[1])
            return coco_mask.decode(rle)

    def __getitem__(self, index):
        if self.data_type == 'Train' or self.data_type == 'Valid':
            coco_file = self.train_coco if self.data_type == 'Train' else self.val_coco
            img_id = coco_file.dataset['images'][index]['id']
            img_ids = coco_file.getImgIds(imgIds=img_id)
            img_info = coco_file.loadImgs(img_ids)
            # image = cv2.imread(img_info[0]['file_name']) / 255.0
            # print(img_info)
            image = Image.open(img_info[0]['file_name']).convert("RGB")
            image = self.transform(image) if self.transform is not None else image
            img_size = [img_info[0]['height'], img_info[0]['width']]


            boxes = []
            masks = []
            labels = []
            ann_ids = coco_file.getAnnIds(imgIds=img_ids)
            annotations = coco_file.loadAnns(ann_ids)
            for ann in annotations:
                boxes.append(ann['bbox'])
                tmp_mask = self.poly2mask(ann['segmentation'], img_size).squeeze()
                # mask_ = F.interpolate(
                #     tmp_mask,
                #     size=(224, 224),
                #     mode='nearest-exact'  # PyTorch 1.10+ 專用選項
                # )
                mask_ = cv2.resize(
                    tmp_mask,
                    (224, 224),
                    interpolation=cv2.INTER_NEAREST_EXACT  # 精確最近鄰算法
                )
                masks.append(mask_)
                labels.append(ann["category_id"])

            boxes = self.resize_box(boxes, img_size, target_size=[224,224])
            boxes = box_convert(torch.tensor(boxes, dtype=torch.float32), in_fmt='xywh', out_fmt='xyxy')
            masks = torch.as_tensor(np.array(masks), dtype=torch.bool)

            target = {'boxes': torch.as_tensor(boxes, dtype=torch.float32),
                      'masks': masks,
                      'labels': torch.as_tensor(np.array(labels), dtype=torch.int64)}

            return img_id, image, target
        else:
            raise ValueError('This is test, not yet implement')

    def resize_box(self, boxes, orig_size, target_size):
        # Eat xywh
        scale_w = target_size[1] / orig_size[1]
        scale_h = target_size[0] / orig_size[0]

        for box in boxes:
            box[0] *= scale_w  # x
            box[1] *= scale_h  # y
            box[2] *= scale_w  # w
            box[3] *= scale_h  # h

        return boxes

    def __len__(self):
        coco_file = self.train_coco if self.data_type == 'Train' else self.val_coco
        return len(coco_file.dataset['images'])

In [97]:
project_root = '..'
train_dir = os.path.join(project_root, 'dataset/train')
test_dir = os.path.join(project_root, 'dataset/test_release')

In [98]:
# train_coco_path = f'/home/bhg/visual_dl/lab3/dataset'
# val_coco_path = f'/home/bhg/visual_dl/lab3/dataset'
# train_set = MedicalDataset(root_dir=train_dir, data_type='Train')
# val_transform=T.Compose([
#     T.ToTensor(),
#     T.Resize(size=[224,224], antialias=True),
#     # T.CenterCrop(size=224),
#     # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
# ])
# val_set = MedicalDataset(root_dir=train_dir, data_type='Valid', transform=val_transform)

# print(val_set[1])

In [99]:
# train_transform=T.Compose([
#     T.ToTensor(),
#     T.Resize(size=[224,224], antialias=True),
#     # T.CenterCrop(size=224),
#     # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
# ])
# train_set = MedicalDataset(root_dir=train_dir, transform=train_transform)
# img, target = train_set[1]
# print(f"box: {target['boxes'].shape}")
# print(f"mask: {target['masks'].shape}")
# print(f"label: {target['labels'].shape}")
# print(target['boxes'][0])

# print(img, img.shape)

In [100]:
# train_transform = A.Compose([
#     A.HorizontalFlip(p=0.5),
#     A.VerticalFlip(p=0.3),
#     A.Rotate(limit=15, p=0.4),
#     A.CLAHE(p=0.5),
#     A.GridDistortion(p=0.2),
#     A.RandomBrightnessContrast(p=0.3)
# ], additional_targets={'mask': 'mask'})
train_transform=T.Compose([
    T.ToTensor(),
    T.Resize(size=[224, 224], antialias=True),
    # T.CenterCrop(size=224),
    # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = train_transform

train_set = MedicalDataset(root_dir=train_dir, transform=train_transform, data_type='Train')
val_set = MedicalDataset(root_dir=train_dir, transform=val_transform, data_type='Valid')
img_id, img, tar = val_set[0]
print(img_id, tar)

loading annotations into memory...
Done (t=0.94s)
creating index...
index created!
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
loading annotations into memory...
Done (t=0.30s)
creating index...
index created!
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
122 {'boxes': tensor([[193.0353,  34.9504, 210.8235,  82.6099],
        [ 11.8588, 162.0426,  30.3059, 209.7021]]), 'masks': tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, Fals

In [101]:
max_choices = 100

def custom_collate(batch):
    img_ids = []
    images = []
    targets = []

    # print(batch[0][1])

    for img_id, img, target in batch:
        img_ids.append(img_id)
        images.append(img)
        # print(type(target['boxes']))
        # keep_idx = torch.randperm(target['boxes'].shape[0])[:max_choices]
        n = target['boxes'].shape[0]
        targets.append({
            'boxes': target['boxes'][torch.randperm(n)[:max_choices]],
            'labels': target['labels'][torch.randperm(n)[:max_choices]],
            'masks': target['masks'][torch.randperm(n)[:max_choices]]
        })

    images = torch.stack(images, dim=0)
    return img_ids, images, targets


BATCH_SIZE = 8
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=8, pin_memory=False, collate_fn=custom_collate)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=8, pin_memory=False, collate_fn=custom_collate)

In [102]:
def evaluate_model(model, val_loader, val_coco, epoch, device):
    # 初始化 COCO 格式儲存器
    coco_gt = val_coco  # 需提前加載驗證集註解文件
    coco_results = []

    model.eval()
    with torch.no_grad():
        bar = tqdm(val_loader, desc='Eval', leave=False)
        for img_ids, images, targets in bar:
            images = [img.to(device) for img in images]
            outputs = model(images)

            # 轉換預測結果到 COCO 格式
            for i in range(len(outputs)):
                image_id = img_ids[i]
                output = outputs[i]

                # 處理每個實例預測
                for j in range(len(output["boxes"])):
                    box = output["boxes"][j].cpu().numpy()
                    score = output["scores"][j].item()
                    label = output["labels"][j].item()
                    mask = output["masks"][j][0].cpu().numpy()  # (H,W)

                    # 生成 RLE 編碼 (COCO 要求格式)
                    rle = encode_mask(mask > 0.5)  # 閾值處理

                    coco_results.append({
                        "image_id": image_id,
                        "category_id": label,
                        "segmentation": rle,
                        "bbox": [box[0], box[1], box[2]-box[0], box[3]-box[1]],  # xywh
                        "score": score
                    })
            bar.update()

        bar.close()

    with open(f'../results/{epoch}_res.json', 'w') as f:
        json.dump(coco_results, f)

    # 評估計算
    coco_dt = coco_gt.loadRes(coco_results)
    coco_eval = COCOeval(coco_gt, coco_dt, 'segm')
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    return coco_eval.stats  # 返回 AP 系列指標

In [103]:
num_classes=train_set.num_classes
print(num_classes)

773


In [104]:
def build_model(num_classes):
    from torchvision.models.detection import MaskRCNN_ResNet50_FPN_V2_Weights
    from torchvision.models.detection import MaskRCNN_ResNet50_FPN_Weights
    model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights=MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT)

    # 2. 替換分類器
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)  # 自定義類別數

    # 3. 替換掩碼分類器
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_features_mask, hidden_layer, num_classes
    )

    return model

In [105]:
os.makedirs('../ckpt', exist_ok=True)
os.makedirs('../results', exist_ok=True)

In [106]:
val_coco = COCO('../dataset/val_coco.json')

loading annotations into memory...
Done (t=0.05s)
creating index...
index created!


In [107]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

In [108]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = build_model(num_classes).to(device)

params = [p for p in model.parameters() if p.requires_grad]
# optimizer = SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
num_epochs = 1000

for epoch in tqdm(range(num_epochs), desc="Epochs"):
    model.train()
    bar = tqdm(train_loader, desc=f"Training")

    loss_per_epoch = []
    for img_ids, images, targets in bar:
        # print("1")
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # print(targets[0]['boxes'].shape)
        # print(img_ids)

        loss_dict = model(images, targets)
        sum_of_loss = sum(loss for loss in loss_dict.values())
        loss_per_epoch.append(sum_of_loss.detach().cpu())

        optimizer.zero_grad()
        sum_of_loss.backward()
        optimizer.step()
        lr_scheduler.step()

        bar.set_postfix(loss=np.mean(loss_per_epoch))
        bar.update()


    bar.close()

    # eval = evaluate_model(model, val_loader, val_coco, epoch, device)
    # print(eval)
    # with open(f'../results/{epoch}.json', 'w') as f:
    #     json.dump(eval, f)

    if epoch % 10 == 0:
      torch.save(model.state_dict(), f'../ckpt/{epoch}.pth')

torch.save(model.state_dict(), '../ckpt/last.pth')

Epochs:   0%|          | 0/1000 [00:00<?, ?it/s]

Training:   0%|          | 0/21 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 556.00 MiB. GPU 0 has a total capacity of 22.16 GiB of which 159.38 MiB is free. Process 22111 has 22.00 GiB memory in use. Of the allocated memory 21.19 GiB is allocated by PyTorch, and 584.27 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
ckpt_path = f'../ckpt/last.pth'
model = build_model(num_classes).to(device)
model.load_state_dict(torch.load(ckpt_path))
model.eval()

from tqdm import tqdm
from torchvision.ops import box_convert

bar = tqdm(train_loader, desc="Inference", total=len(train_loader))
for img_ids, images, targets in bar:
    images = list(image.to(device) for image in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    with torch.no_grad():
        predictions = model(images)

    print(predictions)

    # for i, prediction in enumerate(predictions):
    #     boxes = prediction['boxes'].cpu().numpy()
    #     masks = prediction['masks'].cpu().numpy()
    #     labels = prediction['labels'].cpu().numpy()

    #     # Process the predictions as needed
    #     print(f"Image {i}:")
    #     print("Boxes:", boxes)
    #     print("Masks:", masks)
    #     print("Labels:", labels)

    bar.update()

Inference:   2%|▏         | 1/42 [00:00<00:29,  1.38it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:   7%|▋         | 3/42 [00:00<00:10,  3.76it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  12%|█▏        | 5/42 [00:02<00:22,  1.64it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  14%|█▍        | 6/42 [00:03<00:18,  1.99it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  19%|█▉        | 8/42 [00:03<00:15,  2.14it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  21%|██▏       | 9/42 [00:04<00:13,  2.46it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  24%|██▍       | 10/42 [00:07<00:40,  1.26s/it]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  29%|██▊       | 12/42 [00:10<00:39,  1.32s/it]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  31%|███       | 13/42 [00:11<00:38,  1.32s/it]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  33%|███▎      | 14/42 [00:12<00:34,  1.24s/it]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  36%|███▌      | 15/42 [00:14<00:37,  1.37s/it]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  40%|████      | 17/42 [00:16<00:28,  1.14s/it]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  43%|████▎     | 18/42 [00:16<00:22,  1.09it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  45%|████▌     | 19/42 [00:17<00:19,  1.17it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  48%|████▊     | 20/42 [00:19<00:25,  1.15s/it]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  50%|█████     | 21/42 [00:21<00:28,  1.35s/it]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  55%|█████▍    | 23/42 [00:21<00:15,  1.24it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  57%|█████▋    | 24/42 [00:24<00:23,  1.33s/it]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  60%|█████▉    | 25/42 [00:24<00:17,  1.04s/it]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  62%|██████▏   | 26/42 [00:24<00:13,  1.21it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  64%|██████▍   | 27/42 [00:24<00:09,  1.53it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  67%|██████▋   | 28/42 [00:25<00:10,  1.35it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  71%|███████▏  | 30/42 [00:26<00:05,  2.19it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  74%|███████▍  | 31/42 [00:26<00:05,  2.19it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  76%|███████▌  | 32/42 [00:26<00:03,  2.53it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  79%|███████▊  | 33/42 [00:28<00:05,  1.54it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  81%|████████  | 34/42 [00:28<00:04,  1.89it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  83%|████████▎ | 35/42 [00:28<00:03,  2.27it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  86%|████████▌ | 36/42 [00:28<00:02,  2.66it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  90%|█████████ | 38/42 [00:30<00:02,  1.88it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  93%|█████████▎| 39/42 [00:31<00:01,  1.66it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  95%|█████████▌| 40/42 [00:31<00:01,  1.99it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference:  98%|█████████▊| 41/42 [00:31<00:00,  2.06it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference: 100%|██████████| 42/42 [00:32<00:00,  2.13it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference: 43it [00:32,  2.52it/s]                        

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference: 44it [00:33,  1.68it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference: 45it [00:33,  2.06it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference: 47it [00:33,  3.18it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference: 48it [00:34,  3.37it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference: 49it [00:34,  3.55it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]


Inference: 51it [00:36,  1.85it/s]

[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}, {'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'masks': tensor([], device='cuda:0', size=(0, 1, 224, 224))}]
[{'boxes': tensor([], device='cuda:0', size=(0, 4)), 'labels': tensor([], device='cuda:0', dtype=torch.int64), 'scores': tensor([], device='cuda:0'), 'mask

Inference: 100%|██████████| 42/42 [00:36<00:00,  1.16it/s]


In [None]:
model = torchvision.models.get_model(
        args.model, weights=args.weights, weights_backbone=args.weights_backbone, num_classes=num_classes, **kwargs
    )
model.roi_heads.box_predictor.cls_score = nn.Linear(in_features=1024, out_features=len(class_names),bias=True)
model.roi_heads.box_predictor.bbox_pred = nn.Linear(in_features=1024, out_features=len(class_names)*4,bias=True)
model.roi_heads.mask_predictor.mask_fcn_logits = nn.Conv2d(256, len(class_names),kernel_size=(1,1),stride=(1,1))

model.to(device)

In [None]:
def masks_to_coco(results, image_ids):
    coco_results = []
    for img_id, output in zip(image_ids, results):
        for score, mask, label in zip(output['scores'], output['masks'], output['labels']):
            rle = binary_mask_to_rle(mask)
            coco_results.append({
                "image_id": img_id,
                "category_id": label.item(),
                "segmentation": rle,
                "score": score.item()
            })
    return coco_results

def binary_mask_to_rle(mask):
    # RLE編碼實現
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return {'size': list(mask.shape[-2:]), 'counts': runs.tolist()}


In [None]:
model.eval()
test_loader = DataLoader(test_set, batch_size=2, shuffle=False)

results = []
with torch.no_grad():
    for batch in test_loader:
        outputs = model(batch.to(device))
        results.extend(outputs)

# 生成最終提交文件
with open('test-results.json', 'w') as f:
    json.dump(masks_to_coco(results, test_set.image_ids), f)

print("Submission file generated!")