In [1]:
import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
import numpy as np
import json
import os
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from tqdm import tqdm
from PIL import Image

In [10]:
model_path = "../../model/detection.pth"
val_json = "../../data/detection/labeled/train/_annotations.coco.json"
val_img_dir = "../../data/detection/labeled/train"
with open("../../model/label_mapping.json", "r") as f:
    labels = json.load(f)
num_classes = len(labels.keys()) + 1

In [3]:
class CustomDataset(Dataset):
    def __init__(self, json_file, img_dir, transforms=None):
        with open(json_file) as f:
            self.data = json.load(f) 
        self.img_dir = img_dir
        self.transforms = transforms

    def __getitem__(self, idx):
        img_info = self.data['images'][idx]
        img_path = os.path.join(self.img_dir, img_info['file_name'])
        img = Image.open(img_path).convert("L")
        
        img = img.convert("RGB")

        annotations = [ann for ann in self.data['annotations'] if ann['image_id'] == img_info['id']]
        
        boxes = []
        labels = []
        
        for ann in annotations:
            xmin, ymin, width, height = ann['bbox']
            xmax = xmin + width
            ymax = ymin + height
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(ann['category_id']) 

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.data['images'])

In [11]:
import torch
from torch.utils.data import Dataset
from PIL import Image
import os
import json
import numpy as np
from torchvision import transforms as T

class CustomDataset(Dataset):
    def __init__(self, json_file, img_dir, transforms=None):
        with open(json_file) as f:
            self.data = json.load(f) 
        self.img_dir = img_dir
        self.transforms = transforms if transforms is not None else T.ToTensor()

    def __getitem__(self, idx):
        img_info = self.data['images'][idx]
        img_path = os.path.join(self.img_dir, img_info['file_name'])

        # Baca dalam grayscale mode
        img = Image.open(img_path).convert("L")  # Tetap grayscale, 1 channel

        if self.transforms:
            img = self.transforms(img)  # Akan jadi shape [1, H, W]

        annotations = [ann for ann in self.data['annotations'] if ann['image_id'] == img_info['id']]
        
        boxes = []
        labels = []

        for ann in annotations:
            xmin, ymin, width, height = ann['bbox']
            xmax = xmin + width
            ymax = ymin + height
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(ann['category_id']) 

        boxes = torch.tensor(boxes, dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([img_info['id']])
        }

        return img, target

    def __len__(self):
        return len(self.data['images'])


In [12]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes)
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes)

device = torch.device("cuda") if False else torch.device("cpu")
checkpoint = torch.load('../../model/detection.pth', map_location=device)
model.load_state_dict(checkpoint)
model.eval()

  model.load_state_dict(torch.load(model_path, map_location=device))
  checkpoint = torch.load('../../model/detection.pth', map_location=device)


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

In [15]:
from torchvision import transforms as T
transform = T.Compose([T.Resize((512, 512)), T.ToTensor()])
val_dataset = CustomDataset(json_file=val_json, img_dir=val_img_dir, transforms=transform)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))

In [16]:
coco_gt = COCO(val_json)

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [17]:
results = []
img_ids_used = set()

for images, targets in tqdm(val_loader):
    images = list(img.to(device) for img in images)
    outputs = model(images)

    for target, output in zip(targets, outputs):
        image_id = int(target["image_id"].item()) if "image_id" in target else None
        if image_id is None:
            continue
        
        boxes = output["boxes"].detach().cpu().numpy()
        scores = output["scores"].detach().cpu().numpy()
        labels = output["labels"].detach().cpu().numpy()

        for box, score, label in zip(boxes, scores, labels):
            x_min, y_min, x_max, y_max = box
            width = x_max - x_min
            height = y_max - y_min

            results.append({
                "image_id": image_id,
                "category_id": int(label),
                "bbox": [x_min, y_min, width, height],
                "score": float(score)
            })
        img_ids_used.add(image_id)

100%|██████████| 40/40 [06:10<00:00,  9.25s/it]


In [19]:
print(results)

[{'image_id': 0, 'category_id': 1, 'bbox': [np.float32(181.91368), np.float32(169.91357), np.float32(95.75285), np.float32(57.334366)], 'score': 0.5261618494987488}, {'image_id': 0, 'category_id': 4, 'bbox': [np.float32(176.51573), np.float32(166.70598), np.float32(112.90837), np.float32(59.799957)], 'score': 0.32127314805984497}, {'image_id': 0, 'category_id': 2, 'bbox': [np.float32(181.1362), np.float32(168.10925), np.float32(96.38019), np.float32(58.180817)], 'score': 0.2529548704624176}, {'image_id': 0, 'category_id': 4, 'bbox': [np.float32(170.50964), np.float32(150.51573), np.float32(134.93808), np.float32(101.730545)], 'score': 0.18643422424793243}, {'image_id': 0, 'category_id': 1, 'bbox': [np.float32(166.37701), np.float32(152.65195), np.float32(126.19647), np.float32(97.724335)], 'score': 0.18637646734714508}, {'image_id': 0, 'category_id': 1, 'bbox': [np.float32(408.56876), np.float32(4.454829), np.float32(103.431244), np.float32(49.10884)], 'score': 0.18514007329940796}, {'

In [25]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import json
import os

# Path ke JSON dan direktori gambar
json_path = "../../data/detection/labeled/train/_annotations.coco.json"
img_dir = "../../data/detection/labeled/train"

# Load data COCO
with open(json_path) as f:
    data = json.load(f)

# Mapping image_id ke file_name
image_id = 0  # ganti dengan image_id dari hasil prediksi kamu
img_info = next(img for img in data['images'] if img['id'] == image_id)
img_path = os.path.join(img_dir, img_info['file_name'])

# Load gambar
img = Image.open(img_path).convert("L")  # kalau kamu pakai grayscale

# Prediksi hasil dari model kamu

for result in results:

    prediction = result
    
    if(prediction['score'] > 0.8):

        # Load label mapping (jika kamu punya label_mapping.json)
        with open("../../model/label_mapping.json") as f:
            label_map = json.load(f)

        # Plot gambar dan bounding box
        fig, ax = plt.subplots(1, figsize=(8, 6))
        ax.imshow(img, cmap='gray')  # cmap='gray' untuk grayscale

        # Bounding box format: [xmin, ymin, width, height]
        bbox = prediction['bbox']
        xmin, ymin, w, h = bbox
        rect = patches.Rectangle((xmin, ymin), w, h, linewidth=2, edgecolor='r', facecolor='none')
        ax.add_patch(rect)

        # Tampilkan label dan skor
        label_id = str(prediction['category_id'])
        label_name = label_map.get(label_id, f"class {label_id}")
        score = prediction['score']
        ax.text(xmin, ymin - 5, f"{label_name} ({score:.2f})", color='red', fontsize=12, backgroundcolor='white')

        plt.axis('off')
        plt.tight_layout()
        plt.show()


In [27]:
def convert(obj):
    if isinstance(obj, np.float32) or isinstance(obj, np.float64):
        return float(obj)
    elif isinstance(obj, list):
        return [convert(i) for i in obj]
    elif isinstance(obj, dict):
        return {k: convert(v) for k, v in obj.items()}
    else:
        return obj

converted_results = [convert(pred) for pred in results]

In [30]:
with open("../../model/predictions.json", "w") as f:
    json.dump(converted_results, f)

coco_dt = coco_gt.loadRes("../../model/predictions.json")
coco_eval = COCOeval(coco_gt, coco_dt, iouType="bbox")
coco_eval.params.imgIds = list(img_ids_used)
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()


Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.07s).
Accumulating evaluation results...
DONE (t=0.05s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=1