In [20]:
import os
import json
import numpy as np
from tifffile import imread
import cv2
import skimage.io as sio

import albumentations as A
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision.models.detection import MaskRCNN, FasterRCNN_ResNet50_FPN_Weights, MaskRCNN_ResNet50_FPN_Weights
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
from torchvision.models import ResNet50_Weights
from torchvision.ops import box_convert
import torchvision.transforms as T
import torch.nn.functional as F

import torch
from torch.optim import SGD, lr_scheduler

import pathlib
import json

from tqdm.auto import tqdm
from pycocotools.coco import COCO
from pycocotools import mask as coco_mask
from pycocotools.cocoeval import COCOeval

from PIL import Image

In [21]:
# !gdown https://drive.google.com/file/d/1B0qWNzQZQmfQP7x7o4FDdgb9GvPDoFzI/view --fuzzy
# !mkdir ../dataset
# !tar -xzf hw3-data-release.tar.gz
# !mv test_release/ ../dataset
# !mv train/ ../dataset/
# !mv test_image_name_to_ids.json ../dataset/

In [22]:
import numpy as np
import skimage.io as sio
from pycocotools import mask as mask_utils


def decode_maskobj(mask_obj):
    return mask_utils.decode(mask_obj)


def encode_mask(binary_mask):
    arr = np.asfortranarray(binary_mask).astype(np.uint8)
    rle = mask_utils.encode(arr)
    rle['counts'] = rle['counts'].decode('utf-8')
    return rle


def read_maskfile(filepath):
    mask_array = sio.imread(filepath)
    return mask_array

In [23]:
all_img_size = [446, 512]

In [24]:
class MedicalDataset(Dataset):
    def __init__(self, root_dir, transform=None, data_type='Train'):
        self.root = root_dir
        self.transform = transform
        self.data_type = data_type
        if self.data_type not in ['Train', 'Valid', 'Test']:
            raise ValueError('Data type should be in [Train, Valid, Test]')
        self.samples = self._load_samples()

        self.train_coco_path = os.path.join(pathlib.Path(root_dir).parent, 'train_coco.json')
        self.val_coco_path = os.path.join(pathlib.Path(root_dir).parent, 'val_coco.json')
        if not os.path.exists(self.train_coco_path) or not os.path.exists(self.val_coco_path):
            # self.generate_coco(self.train_coco_path)
            self.generate_coco_split(self.train_coco_path, self.val_coco_path, split_ratio=0.8)
        self.train_coco = COCO(self.train_coco_path)
        self.val_coco = COCO(self.val_coco_path)
        self.num_classes = len(self.train_coco.loadCats(self.train_coco.getCatIds()))

    def _load_samples(self):
        samples = []
        for img_dir in os.listdir(self.root):
            tmp_dir = os.path.join(self.root, img_dir)

            if self.data_type == 'Train' or self.data_type == 'Valid':
                img_path = os.path.join(tmp_dir, 'image.tif')

                mask_paths = [
                    entry.name for entry in pathlib.Path(tmp_dir).iterdir()
                    if entry.name.startswith("class") and entry.is_file()
                ]

                samples.append({'image': img_path, 'masks': mask_paths})
            elif self.data_type == 'Test':
                test_img_json_path = os.path.join(pathlib.Path(self.root).parent, 'test_image_name_to_ids.json')
                with open(test_img_json_path, 'r') as f:
                    samples = json.load(f)

            else:
                raise ValueError('Wrong data type')

                # for idx in range(len(samples)):
                #     samples[idx]['file_name'] = os.path.join(self.root, samples[idx]['file_name'])
        return samples

    def mask_to_polygons(self, mask, epsilon=1.0):
        contours,_ = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
        polygons = []
        for contour in contours:
            if len(contour) > 2:
                poly = contour.reshape(-1).tolist()
                if len(poly) > 4: #Ensures valid polygon
                    polygons.append(poly)
        return polygons

    def generate_coco(self, output_dir, train=True):
        annotations = []
        images = []
        categories = []
        all_labels = []
        ann_id = 0

        for img_id, sample in enumerate(self.samples):
            print(f'({img_id}/{len(self.samples)})')
            img_path, mask_paths = sample['image'], sample['masks']
            img = cv2.imread(img_path)
            masks = [cv2.imread(os.path.join(pathlib.Path(img_path).parent, mask_path), cv2.IMREAD_UNCHANGED) for mask_path in mask_paths]

            images.append({
                "id": img_id,
                "file_name": img_path,
                "height": img.shape[0],
                "width": img.shape[1]
            })

            for mask in masks:
                unique_values = np.unique(mask)
                all_labels.append(unique_values)
                for value in unique_values:
                    if value == 0:  # Ignore background
                        continue

                    object_mask = (mask == value).astype(np.uint8) * 255
                    polygons = self.mask_to_polygons(object_mask)

                    for poly in polygons:
                        ann_id += 1
                        annotations.append({
                            "id": ann_id,
                            "image_id": img_id,
                            "category_id": 1,  # Only one category: Nuclei
                            "segmentation": [poly],
                            "area": cv2.contourArea(np.array(poly).reshape(-1, 2)),
                            "bbox": list(cv2.boundingRect(np.array(poly).reshape(-1, 2))),
                            "iscrowd": 0
                        })

        all_labels = np.unique(np.concatenate(all_labels).tolist())

        for idx, label in enumerate(all_labels):
            categories.append({"id": idx+1, "name": int(label)})

        coco_input = {
            "images": images,
            "annotations": annotations,
            "categories": categories
        }

        print(f'Saving train coco json')

        with open(output_dir, 'w') as f:
            json.dump(coco_input, f)

    def generate_coco_split(self, train_coco_path, val_coco_path, split_ratio=0.8):
        train_data = {"images": [], "annotations": [], "categories": []}
        val_data = {"images": [], "annotations": [], "categories": []}
        all_labels = []
        ann_id = 0
        train_ann = 0
        val_ann = 0

        # 隨機分離樣本索引
        indices = list(range(len(self.samples)))
        import random
        seed = 123
        random.Random(seed).shuffle(indices)
        split_point = int(len(indices) * split_ratio)
        train_indices = indices[:split_point]
        val_indices = indices[split_point:]

        # 類別統一管理 (避免訓練/驗證類別不一致)
        global_categories = {}

        for dataset_type, indices in [("train", train_indices), ("val", val_indices)]:
            target_data = train_data if dataset_type == "train" else val_data

            for idx in indices:
                sample = self.samples[idx]
                img_path, mask_paths = sample['image'], sample['masks']
                img = cv2.imread(img_path)
                masks = [cv2.imread(os.path.join(pathlib.Path(img_path).parent, mask_path), cv2.IMREAD_UNCHANGED) for mask_path in mask_paths]

                image_entry = {
                    "id": idx,
                    "file_name": img_path,
                    "height": img.shape[0],
                    "width": img.shape[1]
                }
                target_data["images"].append(image_entry)

                for mask in masks:
                    unique_values = np.unique(mask)
                    # all_labels.append(unique_values)
                    for value in unique_values:
                        if value == 0:  # Ignore background
                            continue

                        object_mask = (mask == value).astype(np.uint8) * 255
                        polygons = self.mask_to_polygons(object_mask)

                        for poly in polygons:
                            # ann_id += 1
                            if dataset_type == 'train':
                                train_ann += 1
                                ann_id = train_ann
                            else:
                                val_ann += 1
                                ann_id = val_ann

                            target_data["annotations"].append({
                                "id": ann_id,
                                "image_id": idx,
                                # "category_id": int(value),  # Only one category: Nuclei
                                "category_id": 1,  # Only one category: Nuclei
                                "segmentation": [poly],
                                "area": cv2.contourArea(np.array(poly).reshape(-1, 2)),
                                "bbox": list(cv2.boundingRect(np.array(poly).reshape(-1, 2))),
                                "iscrowd": 0
                            })

        # all_labels = np.unique(np.concatenate(all_labels).tolist())

        categories = [{"id": 1, "name": "Nuclei"}]
        # for idx, label in enumerate(all_labels):
        #     categories.append({"id": idx+1, "name": int(label)})
        train_data["categories"] = categories
        val_data["categories"] = categories


        # coco_input = {
        #     "images": images,
        #     "annotations": annotations,
        #     "categories": categories
        # }

        print(f'Saving  coco json')

        with open(train_coco_path, 'w') as f:
            json.dump(train_data, f)
        with open(val_coco_path, 'w') as f:
            json.dump(val_data, f)


    def poly2mask(self, segmentation, img_size):
        """
        多邊形標註轉二值掩碼
        :param segmentation: COCO格式的多邊形坐標列表 [[x1,y1,x2,y2,...]]
        :param img_size: 目標圖像尺寸 (height, width)
        """
        # 自動檢測標註類型
        if isinstance(segmentation, dict):
            # 處理RLE格式
            return coco_mask.decode(segmentation)
        else:
            # 處理多邊形格式
            rle = coco_mask.frPyObjects(segmentation, img_size[0], img_size[1])
            return coco_mask.decode(rle)

    def __getitem__(self, index):
        if self.data_type == 'Train' or self.data_type == 'Valid':
            coco_file = self.train_coco if self.data_type == 'Train' else self.val_coco
            img_id = coco_file.dataset['images'][index]['id']
            img_ids = coco_file.getImgIds(imgIds=img_id)
            img_info = coco_file.loadImgs(img_ids)
            # image = cv2.imread(img_info[0]['file_name']) / 255.0
            # print(img_info)
            image = Image.open(img_info[0]['file_name']).convert("RGB")
            image = self.transform(image) if self.transform is not None else image
            img_size = [img_info[0]['height'], img_info[0]['width']]


            boxes = []
            masks = []
            labels = []
            ann_ids = coco_file.getAnnIds(imgIds=img_ids)
            annotations = coco_file.loadAnns(ann_ids)
            for ann in annotations:
                boxes.append(ann['bbox'])
                tmp_mask = self.poly2mask(ann['segmentation'], img_size).squeeze()

                mask_ = cv2.resize(
                    tmp_mask,
                    (all_img_size[0], all_img_size[1]),
                    interpolation=cv2.INTER_NEAREST_EXACT  # 精確最近鄰算法
                )
                masks.append(mask_)
                labels.append(ann["category_id"])

            boxes = self.resize_box(boxes, img_size, target_size=all_img_size)
            boxes = box_convert(torch.tensor(boxes, dtype=torch.float32), in_fmt='xywh', out_fmt='xyxy')
            masks = torch.as_tensor(np.array(masks), dtype=torch.bool)

            target = {'boxes': torch.as_tensor(boxes, dtype=torch.float32),
                      'masks': masks,
                      'labels': torch.as_tensor(np.array(labels), dtype=torch.int64)}

            return img_id, image, target
        else:
            raise ValueError('This is test, not yet implement')

    def resize_box(self, boxes, orig_size, target_size):
        # Eat xywh
        scale_w = target_size[1] / orig_size[1]
        scale_h = target_size[0] / orig_size[0]

        for box in boxes:
            box[0] *= scale_w  # x
            box[1] *= scale_h  # y
            box[2] *= scale_w  # w
            box[3] *= scale_h  # h

        return boxes

    def __len__(self):
        coco_file = self.train_coco if self.data_type == 'Train' else self.val_coco
        return len(coco_file.dataset['images'])

In [25]:
project_root = '..'
train_dir = os.path.join(project_root, 'dataset/train')
test_dir = os.path.join(project_root, 'dataset/test_release')

In [26]:
# train_coco_path = f'/home/bhg/visual_dl/lab3/dataset'
# val_coco_path = f'/home/bhg/visual_dl/lab3/dataset'
# train_set = MedicalDataset(root_dir=train_dir, data_type='Train')
# val_transform=T.Compose([
#     T.ToTensor(),
#     T.Resize(size=[224,224], antialias=True),
#     # T.CenterCrop(size=224),
#     # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
# ])
# val_set = MedicalDataset(root_dir=train_dir, data_type='Valid', transform=val_transform)

# print(val_set[1])

In [27]:
# train_transform=T.Compose([
#     T.ToTensor(),
#     T.Resize(size=[224,224], antialias=True),
#     # T.CenterCrop(size=224),
#     # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
# ])
# train_set = MedicalDataset(root_dir=train_dir, transform=train_transform)
# img, target = train_set[1]
# print(f"box: {target['boxes'].shape}")
# print(f"mask: {target['masks'].shape}")
# print(f"label: {target['labels'].shape}")
# print(target['boxes'][0])

# print(img, img.shape)

In [28]:
# train_transform = A.Compose([
#     A.HorizontalFlip(p=0.5),
#     A.VerticalFlip(p=0.3),
#     A.Rotate(limit=15, p=0.4),
#     A.CLAHE(p=0.5),
#     A.GridDistortion(p=0.2),
#     A.RandomBrightnessContrast(p=0.3)
# ], additional_targets={'mask': 'mask'})
train_transform=T.Compose([
    T.ToTensor(),
    T.Resize(size=all_img_size, antialias=True),
    # T.CenterCrop(size=224),
    # T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

val_transform = train_transform

train_set = MedicalDataset(root_dir=train_dir, transform=train_transform, data_type='Train')
val_set = MedicalDataset(root_dir=train_dir, transform=val_transform, data_type='Valid')
img_id, img, tar = val_set[0]
print(img_id, tar)

loading annotations into memory...
Done (t=0.69s)
creating index...
index created!
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
loading annotations into memory...
Done (t=0.29s)
creating index...
index created!
loading annotations into memory...
Done (t=0.05s)
creating index...
index created!
122 {'boxes': tensor([[441.2235,  69.5887, 481.8824, 164.4823],
        [ 27.1059, 322.6383,  69.2706, 417.5319]]), 'masks': tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, Fals

In [29]:
max_choices = 100

def custom_collate(batch):
    img_ids = []
    images = []
    targets = []

    # print(batch[0][1])

    for img_id, img, target in batch:
        img_ids.append(img_id)
        images.append(img)
        # print(type(target['boxes']))
        # keep_idx = torch.randperm(target['boxes'].shape[0])[:max_choices]
        n = target['boxes'].shape[0]
        targets.append({
            'boxes': target['boxes'][torch.randperm(n)[:max_choices]],
            'labels': target['labels'][torch.randperm(n)[:max_choices]],
            'masks': target['masks'][torch.randperm(n)[:max_choices]]
        })

    images = torch.stack(images, dim=0)
    return img_ids, images, targets


BATCH_SIZE = 8
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=6, pin_memory=False, persistent_workers=True, collate_fn=custom_collate)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=6, pin_memory=False, persistent_workers=True, collate_fn=custom_collate)

In [30]:
def evaluate_model(model, val_loader, val_coco, epoch, device):
    # 初始化 COCO 格式儲存器
    coco_gt = val_coco  # 需提前加載驗證集註解文件
    coco_results = []

    model.eval()
    with torch.no_grad():
        bar = tqdm(val_loader, desc='Eval', leave=False)
        for img_ids, images, targets in bar:
            images = [img.to(device) for img in images]
            outputs = model(images)

            # 轉換預測結果到 COCO 格式
            for i in range(len(outputs)):
                image_id = img_ids[i]
                output = outputs[i]

                # 處理每個實例預測
                for j in range(len(output["boxes"])):
                    box = output["boxes"][j].cpu().numpy()
                    score = output["scores"][j].item()
                    label = output["labels"][j].item()
                    mask = output["masks"][j][0].cpu().numpy()  # (H,W)

                    # 生成 RLE 編碼 (COCO 要求格式)
                    rle = encode_mask(mask > 0.5)  # 閾值處理

                    coco_results.append({
                        "image_id": image_id,
                        "category_id": label,
                        "segmentation": rle,
                        "bbox": [box[0], box[1], box[2]-box[0], box[3]-box[1]],  # xywh
                        "score": score
                    })
            bar.update()

        bar.close()

    print(coco_results)
    # with open(f'../results/{epoch}_res.txt', 'w') as f:
    #     json.dump(coco_results, f)

    # 評估計算
    coco_dt = coco_gt.loadRes(coco_results)
    coco_eval = COCOeval(coco_gt, coco_dt, 'segm')
    coco_eval.evaluate()
    coco_eval.accumulate()
    coco_eval.summarize()

    with open(f'../results/{epoch}_coco_stats.json', 'w') as f:
      json.dump(coco_eval.stats.tolist(), f)

    return coco_eval.stats  # 返回 AP 系列指標

In [31]:
# num_classes=train_set.num_classes
num_classes = 2
print(num_classes)

2


In [32]:
def build_model(num_classes):
    from torchvision.models.detection import MaskRCNN_ResNet50_FPN_V2_Weights
    from torchvision.models.detection import MaskRCNN_ResNet50_FPN_Weights
    model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2(weights=MaskRCNN_ResNet50_FPN_V2_Weights.DEFAULT)

    # 2. 替換分類器
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)  # 自定義類別數

    # 3. 替換掩碼分類器
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    model.roi_heads.mask_predictor = MaskRCNNPredictor(
        in_features_mask, hidden_layer, num_classes
    )

    return model

In [33]:
os.makedirs('../ckpt', exist_ok=True)
os.makedirs('../results', exist_ok=True)

In [34]:
val_coco = COCO('../dataset/val_coco.json')

loading annotations into memory...
Done (t=0.05s)
creating index...
index created!


In [35]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True

In [36]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'
model = build_model(num_classes).to(device)

params = [p for p in model.parameters() if p.requires_grad]
# optimizer = SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005)
# optimizer = torch.optim.Adam(params, lr=0.001, weight_decay=0.0005)
# lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


lr = 5e-4
num_epochs = 1000
optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=lr, total_steps=num_epochs*len(train_loader))

for epoch in tqdm(range(num_epochs), desc="Epochs"):
    model.train()
    bar = tqdm(train_loader, desc=f"Training")

    loss_per_epoch = []
    for img_ids, images, targets in bar:
        # print("1")
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        # print(targets[0])
        # print(img_ids)

        loss_dict = model(images, targets)
        sum_of_loss = sum(loss for loss in loss_dict.values())
        loss_per_epoch.append(sum_of_loss.detach().cpu().item())

        optimizer.zero_grad()
        sum_of_loss.backward()
        optimizer.step()
        lr_scheduler.step()

        bar.set_postfix(loss=np.mean(loss_per_epoch))
        bar.update()


    bar.close()

    eval = evaluate_model(model, val_loader, val_coco, epoch, device)
    # print(eval)
    # with open(f'../results/{epoch}.txt', 'w') as f:
    #     f.write(eval)

    if epoch % 10 == 0:
      torch.save(model.state_dict(), f'../ckpt/{epoch}.pth')

torch.save(model.state_dict(), '../ckpt/last.pth')

Epochs:   0%|          | 0/1000 [00:00<?, ?it/s]

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'k^b2130O010V>1eNOUC0O0o=0PB20O:OG0j<0aC0`=0_O0mm34idKLS10c<0ZB2_<4aCJo00h<3jN6`CGO0g00\\<0mB100O0106Od<1VCO010O010O010OT=1kB01O10O1O0010OX=OjB0O1O10Of=O\\B0O0ocQ4'}, 'bbox': [np.float32(189.84465), np.float32(137.35875), np.float32(25.292679), np.float32(47.612488)], 'score': 0.6940698623657227}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'oQh5120R>0kb20SoL1O0ck0Olc20doL2`MO^COf=0ZB0i=1WBO00c=4\\BM1Og00YO0o<1PC01O?0A0g<0]C4KMa0Od<0VC0F040\\=2_BOh=OYB0Qgj0'}, 'bbox': [np.float32(422.1725), np.float32(258.71695), np.float32(29.96051), np.float32(45.37909)], 'score': 0.6600767374038696}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'ea]31^=0eB90GiY11VfN0[Y1OPV21e`L050KOSY10mfN21N31LOd<0W10YC0mN0f=0ZB0_<0[10hB0K0C0_<0hD0[;1n0ObB0`00b<0lB0O0d=0]B0O1Q10];1bCO00O10Ob=OkB0E0;0l<0iB0dk00iVR3'}, 'bbox': [np.float32(250.80685), np.float

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(251.95941), np.float32(205.25465), np.float32(34.672974), np.float32(52.78853)], 'score': 0.7354900240898132}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(289.871), np.float32(299.69083), np.float32(28.424774), np.float32(41.11847)], 'score': 0.6984439492225647}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(166.92725), np.float32(237.26418), np.float32(31.560364), np.float32(46.148453)], 'score': 0.680638313293457}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(185.50043), np.float32(289.8038), np.float32(13.84729), np.float32(27.256744)], 'score': 0.6741438508033752}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float3

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(290.54346), np.float32(296.78784), np.float32(27.709229), np.float32(44.626526)], 'score': 0.4130929410457611}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(231.36699), np.float32(245.22665), np.float32(16.819107), np.float32(16.904053)], 'score': 0.3582439720630646}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(434.78778), np.float32(172.3403), np.float32(35.55841), np.float32(76.74931)], 'score': 0.325084924697876}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(346.12943), np.float32(121.44677), np.float32(11.9149475), np.float32(32.691902)], 'score': 0.30993276834487915}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.f

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(290.9564), np.float32(296.4887), np.float32(27.645355), np.float32(46.12393)], 'score': 0.3035275936126709}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(434.9537), np.float32(170.77356), np.float32(35.539062), np.float32(77.96895)], 'score': 0.29267269372940063}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(14.871169), np.float32(175.48468), np.float32(34.269764), np.float32(68.09558)], 'score': 0.28032466769218445}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(423.9331), np.float32(255.97815), np.float32(28.31549), np.float32(55.380157)], 'score': 0.27326682209968567}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.floa

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(249.25844), np.float32(212.7692), np.float32(16.369888), np.float32(31.537064)], 'score': 0.23573412001132965}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(434.65024), np.float32(171.17003), np.float32(35.69867), np.float32(77.129364)], 'score': 0.20153695344924927}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(291.29367), np.float32(298.19208), np.float32(29.21637), np.float32(51.749817)], 'score': 0.19815777242183685}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(422.7368), np.float32(256.81348), np.float32(29.46112), np.float32(54.980286)], 'score': 0.19428418576717377}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(247.76886), np.float32(210.74937), np.float32(15.15802), np.float32(30.837479)], 'score': 0.33593419194221497}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(195.15878), np.float32(389.91672), np.float32(14.491623), np.float32(29.142426)], 'score': 0.30556243658065796}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(253.23033), np.float32(198.93277), np.float32(46.06743), np.float32(99.496155)], 'score': 0.2772934138774872}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(232.27313), np.float32(211.5787), np.float32(17.943207), np.float32(28.860458)], 'score': 0.2620117962360382}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(443.22668), np.float32(56.9747), np.float32(34.206757), np.float32(104.04033)], 'score': 0.35559919476509094}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(247.7022), np.float32(211.45801), np.float32(15.540604), np.float32(30.098053)], 'score': 0.31925177574157715}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(253.12132), np.float32(200.31384), np.float32(47.44969), np.float32(99.53046)], 'score': 0.27181416749954224}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(31.505466), np.float32(335.56757), np.float32(29.171612), np.float32(71.50177)], 'score': 0.25929778814315796}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.f

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(31.198753), np.float32(339.32434), np.float32(29.03347), np.float32(68.45767)], 'score': 0.19154290854930878}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(21.347277), np.float32(295.75177), np.float32(58.09836), np.float32(150.24823)], 'score': 0.14056119322776794}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(252.75128), np.float32(201.05865), np.float32(46.71576), np.float32(98.94147)], 'score': 0.13821296393871307}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(442.66058), np.float32(55.28145), np.float32(32.798615), np.float32(92.51198)], 'score': 0.12905988097190857}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.fl

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(31.33803), np.float32(341.2113), np.float32(29.435884), np.float32(66.62152)], 'score': 0.08649846166372299}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(82.799164), np.float32(18.094896), np.float32(91.90579), np.float32(248.66374)], 'score': 0.08330031484365463}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(443.56375), np.float32(56.705082), np.float32(33.76703), np.float32(111.31131)], 'score': 0.08040495216846466}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(251.9782), np.float32(200.65681), np.float32(48.79213), np.float32(101.781296)], 'score': 0.0752425342798233}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.fl

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(154.06662), np.float32(369.59073), np.float32(18.711868), np.float32(44.292603)], 'score': 0.07098302990198135}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(84.55734), np.float32(23.173544), np.float32(116.65546), np.float32(252.43285)], 'score': 0.06657369434833527}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(193.21327), np.float32(0.0), np.float32(207.78633), np.float32(282.38217)], 'score': 0.06507224589586258}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(242.17575), np.float32(213.49846), np.float32(29.240204), np.float32(34.90004)], 'score': 0.06452033668756485}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.flo

Training:   0%|          | 0/21 [00:00<?, ?it/s]

Eval:   0%|          | 0/6 [00:00<?, ?it/s]

[{'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(272.76025), np.float32(17.063604), np.float32(239.23975), np.float32(414.74805)], 'score': 0.14177528023719788}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(15.225337), np.float32(4.5438004), np.float32(206.59714), np.float32(281.87207)], 'score': 0.10424094647169113}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(220.29057), np.float32(8.585514), np.float32(144.78526), np.float32(436.7616)], 'score': 0.09458280354738235}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np.float32(5.851088), np.float32(288.91748), np.float32(70.28726), np.float32(157.08252)], 'score': 0.05468738451600075}, {'image_id': 122, 'category_id': 1, 'segmentation': {'size': [446, 512], 'counts': 'PPo6'}, 'bbox': [np

Training:   0%|          | 0/21 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
ckpt_path = f'../ckpt/last.pth'
model = build_model(num_classes).to(device)
model.load_state_dict(torch.load(ckpt_path))
model.eval()

from tqdm import tqdm
from torchvision.ops import box_convert

bar = tqdm(train_loader, desc="Inference", total=len(train_loader))
for img_ids, images, targets in bar:
    images = list(image.to(device) for image in images)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    with torch.no_grad():
        predictions = model(images)

    print(predictions)

    # for i, prediction in enumerate(predictions):
    #     boxes = prediction['boxes'].cpu().numpy()
    #     masks = prediction['masks'].cpu().numpy()
    #     labels = prediction['labels'].cpu().numpy()

    #     # Process the predictions as needed
    #     print(f"Image {i}:")
    #     print("Boxes:", boxes)
    #     print("Masks:", masks)
    #     print("Labels:", labels)

    bar.update()

In [None]:
model = torchvision.models.get_model(
        args.model, weights=args.weights, weights_backbone=args.weights_backbone, num_classes=num_classes, **kwargs
    )
model.roi_heads.box_predictor.cls_score = nn.Linear(in_features=1024, out_features=len(class_names),bias=True)
model.roi_heads.box_predictor.bbox_pred = nn.Linear(in_features=1024, out_features=len(class_names)*4,bias=True)
model.roi_heads.mask_predictor.mask_fcn_logits = nn.Conv2d(256, len(class_names),kernel_size=(1,1),stride=(1,1))

model.to(device)

In [None]:
def masks_to_coco(results, image_ids):
    coco_results = []
    for img_id, output in zip(image_ids, results):
        for score, mask, label in zip(output['scores'], output['masks'], output['labels']):
            rle = binary_mask_to_rle(mask)
            coco_results.append({
                "image_id": img_id,
                "category_id": label.item(),
                "segmentation": rle,
                "score": score.item()
            })
    return coco_results

def binary_mask_to_rle(mask):
    # RLE編碼實現
    pixels = mask.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return {'size': list(mask.shape[-2:]), 'counts': runs.tolist()}


In [None]:
model.eval()
test_loader = DataLoader(test_set, batch_size=2, shuffle=False)

results = []
with torch.no_grad():
    for batch in test_loader:
        outputs = model(batch.to(device))
        results.extend(outputs)

# 生成最終提交文件
with open('test-results.json', 'w') as f:
    json.dump(masks_to_coco(results, test_set.image_ids), f)

print("Submission file generated!")