In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os

In [2]:
!pip install pycocotools

Collecting pycocotools
  Downloading pycocotools-2.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Downloading pycocotools-2.0.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (426 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m426.2/426.2 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pycocotools
Successfully installed pycocotools-2.0.7


In [3]:
import json
def combine_coco_jsons(json_path1, json_path2, output_path):
    # Load the first JSON file
    with open(json_path1, 'r') as f:
        data1 = json.load(f)

    # Load the second JSON file
    with open(json_path2, 'r') as f:
        data2 = json.load(f)
    
    # Initialize the combined data structure
    combined_data = {
        "images": [],
        "annotations": [],
        "categories": data1["categories"]
    }

    # Combine images
    image_id_map = {}
    new_image_id = 1
    for image in data1["images"]:
        image_id_map[image["id"]] = new_image_id
        image["id"] = new_image_id
        combined_data["images"].append(image)
        new_image_id += 1
    
    for image in data2["images"]:
        image_id_map[image["id"]] = new_image_id
        image["id"] = new_image_id
        combined_data["images"].append(image)
        new_image_id += 1

    # Combine annotations
    new_annotation_id = 1
    for annotation in data1["annotations"]:
        annotation["id"] = new_annotation_id
        annotation["image_id"] = image_id_map[annotation["image_id"]]
        combined_data["annotations"].append(annotation)
        new_annotation_id += 1
    
    for annotation in data2["annotations"]:
        annotation["id"] = new_annotation_id
        annotation["image_id"] = image_id_map[annotation["image_id"]]
        combined_data["annotations"].append(annotation)
        new_annotation_id += 1

    # Save the combined JSON
    with open(output_path, 'w') as f:
        json.dump(combined_data, f, indent=4)

# Example usage
json_path1 = '/kaggle/input/semis-od-coco-10/instances_train2017_labeled.json'
json_path2 = '/kaggle/input/semis-od-coco-10/yolov9semi/instances_train2017_unlabeled_predicted.json'
output_path = '/kaggle/working/train_semi2017.json'

combine_coco_jsons(json_path1, json_path2, output_path)

In [4]:
import torch
import torchvision
from torchvision import transforms
from pycocotools.coco import COCO
from PIL import Image
import os

class COCODataset(torch.utils.data.Dataset):
    def __init__(self, img_dir, ann_file, transform=None):
        self.img_dir = img_dir
        self.coco = COCO(ann_file)
        self.ids = list(self.coco.imgs.keys())
        self.transform = transform

    def __getitem__(self, index):
        img_id = self.ids[index]
        ann_ids = self.coco.getAnnIds(imgIds=img_id)
        coco_annotations = self.coco.loadAnns(ann_ids)

        img_path = os.path.join(self.img_dir, self.coco.imgs[img_id]['file_name'])
        image = Image.open(img_path).convert("RGB")

        boxes = []
        labels = []
        for annotation in coco_annotations:
            x, y, w, h = annotation['bbox']
            boxes.append([x, y, x + w, y + h])
            labels.append(annotation['category_id'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels

        if self.transform:
            image, target = self.transform(image, target)

        return image, target, img_id

    def __len__(self):
        return len(self.ids)


class ToTensor(object):
    def __call__(self, image, target):
        image = transforms.ToTensor()(image)
        return image, target

class RandomHorizontalFlip(object):
    def __init__(self, flip_prob):
        self.flip_prob = flip_prob

    def __call__(self, image, target):
        if torch.rand(1) < self.flip_prob:
            image = transforms.functional.hflip(image)
            bbox = target["boxes"]
            bbox[:, [0, 2]] = image.size(2) - bbox[:, [2, 0]]
            target["boxes"] = bbox
        return image, target


In [5]:
from torchvision.transforms import functional as F

class Compose(object):
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target
    
def get_transform(train):
    transforms = []
    transforms.append(ToTensor())
    if train:
        transforms.append(RandomHorizontalFlip(0.5))
    return Compose(transforms)

train_dataset = COCODataset('/kaggle/input/coco-2017-dataset/coco2017/train2017',
                            '/kaggle/working/train_semi2017.json',
                            transform=get_transform(train=True))

# train_dataset = COCODataset('/kaggle/input/coco-2017-dataset/coco2017/train2017',
#                             '/kaggle/input/semis-od-coco-10/instances_train2017_labeled.json',
#                             transform=get_transform(train=True))

val_dataset = COCODataset('/kaggle/input/coco-2017-dataset/coco2017/train2017',
                          '/kaggle/input/semis-od-coco-10/instances_val2017.json',
                          transform=get_transform(train=False))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=4, collate_fn=lambda x: tuple(zip(*x)))


loading annotations into memory...
Done (t=0.49s)
creating index...
index created!
loading annotations into memory...
Done (t=0.04s)
creating index...
index created!


In [6]:
from torchvision.models.detection import ssd300_vgg16

model = ssd300_vgg16(pretrained=True)
model.head.classification_head.num_classes = 9  # classes + background

Downloading: "https://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pth" to /root/.cache/torch/hub/checkpoints/ssd300_vgg16_coco-b556d3b4.pth
100%|██████████| 136M/136M [00:03<00:00, 43.0MB/s]


In [7]:
import torch.optim as optim

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

num_epochs = 15

for epoch in range(num_epochs):
    total_loss = 0.0
    model.train()
    for images, targets, image_ids in train_loader:
        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        #print(loss_dict)
        #print(loss_dict.values())
        losses = sum(loss for loss in loss_dict.values())
        total_loss += losses.item()

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
    print(f"Epoch #{epoch + 1} loss: {total_loss / len(train_loader)}")


torch.save(model.state_dict(), 'ssd_vgg16.pth')

Epoch #1 loss: 4.013419828536061
Epoch #2 loss: 3.300997937491181
Epoch #3 loss: 3.028830558965975
Epoch #4 loss: 2.8565238184212123
Epoch #5 loss: 2.6956853338253692
Epoch #6 loss: 2.583518311349792
Epoch #7 loss: 2.4639912302761577
Epoch #8 loss: 2.403932278730033
Epoch #9 loss: 2.31091904059715
Epoch #10 loss: 2.2300804149288482
Epoch #11 loss: 2.1481233589566755
Epoch #12 loss: 2.0997883832698654
Epoch #13 loss: 2.0399920302709393
Epoch #14 loss: 2.003468743073915
Epoch #15 loss: 1.9364942769055309


In [8]:
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

In [9]:
def evaluate_model(model, data_loader, device):
    model.eval()
    results = []
    with torch.no_grad():
        for images, targets, image_ids in data_loader:
            images = list(img.to(device) for img in images)
            outputs = model(images)
            for img_id, output in zip(image_ids, outputs):
                boxes = output['boxes'].cpu().numpy()
                scores = output['scores'].cpu().numpy()
                labels = output['labels'].cpu().numpy()
                for box, score, label in zip(boxes, scores, labels):
                    result = {
                        'image_id': img_id,
                        'category_id': label,
                        'bbox': [box[0], box[1], box[2] - box[0], box[3] - box[1]],
                        'score': score
                    }
                    results.append(result)
    return results

results = evaluate_model(model, val_loader, device)

# Load ground truth annotations
coco_gt = COCO('/kaggle/input/semis-od-coco-10/instances_val2017.json')
coco_dt = coco_gt.loadRes(results)

# Initialize COCOeval
coco_eval = COCOeval(coco_gt, coco_dt, iouType='bbox')
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.67s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=17.66s).
Accumulating evaluation results...
DONE (t=2.89s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.267
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.466
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.276
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.074
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.206
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.436
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.254
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.363
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDet

In [10]:
import torch
import torchvision.transforms as T
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from torchvision.models.detection import ssd300_vgg16

# Load the trained model
model = ssd300_vgg16(pretrained=False)
model.head.classification_head.num_classes = 9  # classes + background
model.load_state_dict(torch.load('/kaggle/input/weightsod/ssd_vgg16_coco.pth'))
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
model.eval()

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat'
]

# Function to preprocess the image
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    transform = T.Compose([
        T.ToTensor()
    ])
    return transform(image).unsqueeze(0)

# Function to make predictions on a single image
def predict_image(image_path, model, device):
    image = preprocess_image(image_path).to(device)
    with torch.no_grad():
        outputs = model(image)
    return outputs

# Function to display the image with bounding boxes and labels
def display_predictions(image_path, predictions, threshold=0.5):
    image = Image.open(image_path).convert("RGB")
    plt.figure(figsize=(12, 12))
    plt.imshow(image)
    ax = plt.gca()

    boxes = predictions[0]['boxes'].cpu().numpy()
    scores = predictions[0]['scores'].cpu().numpy()
    labels = predictions[0]['labels'].cpu().numpy()
    labels -= 1
    for box, score, label in zip(boxes, scores, labels):
        if score >= threshold:  # Apply confidence threshold for visualization
            x_min, y_min, x_max, y_max = box
            width, height = x_max - x_min, y_max - y_min
            rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            ax.text(x_min, y_min, f'{COCO_INSTANCE_CATEGORY_NAMES[label]}: {score:.2f}', bbox=dict(facecolor='yellow', alpha=0.5), fontsize=12, color='black')
    
    plt.axis('off')
    plt.show()

# Path to the image
image_path = "/kaggle/input/testimage/boat.jpg"

# Make predictions
predictions = predict_image(image_path, model, device)

# Display the image with predictions
display_predictions(image_path, predictions, threshold=0.5)


Downloading: "https://download.pytorch.org/models/vgg16_features-amdegroot-88682ab5.pth" to /root/.cache/torch/hub/checkpoints/vgg16_features-amdegroot-88682ab5.pth
100%|██████████| 528M/528M [00:08<00:00, 69.0MB/s]


FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/testimage/boat.jpg'