In [None]:
# # This Python 3 environment comes with many helpful analytics libraries installed
# # It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# # For example, here's several helpful packages to load

# import numpy as np # linear algebra
# import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# # Input data files are available in the read-only "../input/" directory
# # For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# # You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# # You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
root_datapath = '/kaggle/input/coco-2017-dataset/coco2017'

In [None]:
import os
for *_, filenames in os.walk(f'{root_datapath}/annotations'):
    for filename in filenames:
        print(filename)

In [None]:
import json

# Path to annotation file
ann_file = f"{root_datapath}/annotations/instances_train2017.json"

# Load JSON
with open(ann_file, 'r') as f:
    coco_data = json.load(f)

In [None]:
#id to cat
coco_id_map = {cat['id']: cat['name'] for cat in coco_data['categories']}
print(coco_id_map.get(333))

In [None]:
from collections import defaultdict

category_name = 'hot dog'
category_list = coco_data["categories"]
category_id = next(cat["id"] for cat in category_list if cat["name"] == category_name)

# --- Group all boxes for each image ---
img_to_bboxes = defaultdict(list)
for ann in coco_data["annotations"]:
    if ann["category_id"] == category_id:
        img_to_bboxes[ann["image_id"]].append(ann["bbox"])


from collections import defaultdict

img_to_anns = defaultdict(list)
for ann in coco_data['annotations']:
    img_to_anns[ann['image_id']].append(ann)


from pathlib import Path

def get_annotations(img_path: Path) -> dict:
    img_file_name = img_path.stem + '.jpg'
    img_info = next((img for img in coco_data['images'] if img['file_name'] == img_file_name), None)
    if img_info is None:
        return {"boxes": [], "labels": []}

    anns = img_to_anns.get(img_info['id'], [])
    boxes_list = [ann['bbox'] for ann in anns]
    
    labels_list = [ann['category_id'] for ann in anns]

    return {"boxes": boxes_list, "labels": labels_list}

In [None]:

import glob
from pathlib import Path
target_dir_train = Path(f'{root_datapath}/train2017')
target_dir_test = Path(f'{root_datapath}/test2017')

train_dir_path = sorted(list(target_dir_train.glob('*.jpg')))
test_dir_path = sorted(list(target_dir_test.glob('*.jpg')))

# train_dir_path[:5] ,train_dir_path[1].stem

In [None]:
from torchvision import tv_tensors
import torch


def change_box(boxes, labels, img_tvtensor):
    if boxes is None or len(boxes) == 0:
        return (tv_tensors.BoundingBoxes(
                    data=torch.zeros((0, 4), dtype=torch.float32),
                    format="XYXY",
                    canvas_size=img_tvtensor.shape[-2:]
                ),
                torch.zeros((0,), dtype=torch.int64)
        )

    box_xywh = torch.tensor(boxes, dtype=torch.float32)
    box_xyxy = box_convert(box_xywh, in_fmt='xywh', out_fmt='xyxy')
    labels = torch.tensor(labels, dtype=torch.int64)

    return tv_tensors.BoundingBoxes(
        data=box_xyxy,
        format="XYXY",
        canvas_size=img_tvtensor.shape[-2:]
    ), labels



from torchvision.transforms import v2


test_transform =  v2.Compose([ 
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True), 
])

In [None]:
from torchvision.ops import box_convert
from torch.utils.data import Dataset
from torchvision import tv_tensors
from PIL import Image
import torch


class detectionDatasetCustom(Dataset):
    def __init__(self , directory_path , transform = None):
        self.paths = directory_path
        self.transform = transform
        self.annotations = [get_annotations(p) for p in self.paths] #if working with small dataset(few thousand images) | other wise ram will be blown

    def __len__(self):
        return len(self.paths)

    def load_img(self ,img_path):
        return Image.open(img_path).convert('RGB')



    def __getitem__ (self , idx):
        img_path = self.paths[idx]
        # ann = get_annotations(img_path)
        ann = self.annotations[idx] #for small dataset

        img = self.load_img(img_path)

        img_tvtensor = tv_tensors.Image(img)

        boxes , labels = ann['boxes'] , ann['labels']


        # box = change_box(boxes, img_tvtensor)
        box, labels = change_box(ann['boxes'], ann['labels'], img_tvtensor)


        # target = {'boxes' : box, 'labels' : torch.tensor(labels , dtype = torch.int64)}
        target = {'boxes': box, 'labels': labels}



        if self.transform:

            img_tvtensor , target = self.transform(img_tvtensor , target)

            
        return img_tvtensor , target

In [None]:

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
from torchvision.utils import draw_bounding_boxes

def prdicion_with_real(predictions, img_tensor , target = None, threshold=0.5):

    plt.figure(figsize=(16, 10))
    img = img_tensor
    i = 1
    if target is not None:
        
        # Ground truth boxes & labels
        real_boxes = target['boxes'].to('cpu') if 'boxes' in target else torch.zeros((0, 4))
        real_labels = target['labels'].to('cpu') if 'labels' in target else torch.zeros((0,), dtype=torch.int64)
        real_labels_names = [coco_id_map[val.item()] for val in real_labels]

        # Draw boxes
        img_with_boxes_actual = draw_bounding_boxes(
            image=img,
            boxes=real_boxes,
            labels=real_labels_names,
            colors='lime',
            width=3,
            # fill_labels=True,
        )

        plt.subplot(1, 2, i)
        i += 1
        plt.imshow(img_with_boxes_actual.permute(1, 2, 0))
        plt.axis('off')
        plt.title('Ground Truth (lime)')
    
    
    img_bbox_tuple = []
    
    for pred in predictions:
        boxes, labels, scores = [], [], []
        for bbox, lb, sc in zip(pred['boxes'], pred['labels'], pred['scores']):
            if sc >= threshold and coco_id_map.get(lb.item()) is not None:
                boxes.append(bbox.to('cpu'))
                scores.append(sc.to('cpu'))
                labels.append(lb.to('cpu'))
        img_bbox_tuple.append((boxes, labels, scores))


    # Predictions
    pred_boxes, pred_labels, scores = img_bbox_tuple[0]

    pred_boxes = torch.stack(pred_boxes) if len(pred_boxes) > 0 else torch.zeros((0, 4), dtype=torch.float)

    pred_labels_name = [
        f"{coco_id_map[val.item()]} {sc.item()*100:.0f}%"
        for val, sc in zip(pred_labels, scores)
    ]

    print(pred_labels_name)

    img_with_boxes_pred = draw_bounding_boxes(
        image=img,
        boxes=pred_boxes,
        labels=pred_labels_name,
        colors='red',
        width=3,
        # fill_labels=True,
    )    

    plt.subplot(1, 2, i)
    plt.imshow(img_with_boxes_pred.permute(1, 2, 0))
    plt.axis('off')
    plt.title('Predictions (red, with scores)')

    plt.show()

In [None]:
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn, FasterRCNN_ResNet50_FPN_Weights

model = torchvision.models.detection.fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.COCO_V1)
model.to(device)

In [None]:
import matplotlib.pyplot as plt
import random


train_dataset_plot = detectionDatasetCustom(random.sample(train_dir_path, 5), test_transform)
for data in train_dataset_plot:
    img_tensor , target = data
    model.eval()
    with torch.inference_mode():
        predictions = model(img_tensor.unsqueeze(dim = 0).to(device))
    prdicion_with_real( predictions , img_tensor, target)

## detecting obj from a random downloaded img

In [None]:
import requests
from pathlib import Path

downloaded_img = Path('Img')
downloaded_img.mkdir(parents = True, exist_ok= True)

img_path = downloaded_img /'my_img.jpg'

with open( img_path, 'wb') as f:
    req = requests.get('https://images.squarespace-cdn.com/content/v1/574512d92eeb81676262d877/1652159560047-7ECWQB6E0GRW2WXVKLKZ/LosAngeles2022-88.jpg?format=2500w')
    f.write(req.content)

In [None]:
from PIL import Image
img = Image.open(img_path).convert('RGB')
img


In [None]:

img_transformed = test_transform(img)

model.eval()
with torch.inference_mode():
    pred = model(img_transformed.unsqueeze(dim = 0).to(device))
prdicion_with_real(pred , img_transformed)
