# **1. DATASET DOWNLOAD**

In [8]:
# @title **1.1 Dataset download and extraction**

import os
import gdown

%cd /content

dataset_dir = "/content/datasets/linemod"  # absolute path

# Check if dataset is already present
if os.path.exists(dataset_dir):
    print("Dataset ready to use")
else:
    # Create dataset directory if not exists
    !mkdir -p datasets/linemod/
    %cd datasets/linemod/

    # Download dataset zip from Google Drive
    !gdown --fuzzy https://drive.google.com/file/d/1qQ8ZjUI6QauzFsiF8EpaaI2nKFWna_kQ/view?usp=drive_link -O Linemod_preprocessed.zip
    !unzip Linemod_preprocessed.zip

    print("Dataset downloaded and extracted")


/content
Dataset ready to use


# **2. CUSTOM DATASET**

In [16]:
# @title **2.1 Custom dataset #1 code - Single object loader**

import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np
import yaml
import os

class LineModDetectionDataset(Dataset):
  def __init__(self, img_dir, gt_path, transform=None):
    """
    Args:
        img_dir (string): Path to the folder containing images (e.g. '.../data/01/rgb')
        gt_path (string): Full path to the gt.yml file (e.g. '.../data/01/gt.yml')
        transform (callable, optional): Optional transform to be applied on a sample.
    """
    self.img_dir = img_dir
    self.transform = transform

    # 1. Load the Ground Truth
    if not os.path.exists(gt_path):
      raise FileNotFoundError(f"gt.yml not found at {gt_path}")

    with open(gt_path, 'r') as f:
      # Load dictionary and force keys to be integers
      self.gt_data = {int(k): v for k, v in yaml.safe_load(f).items()}

    # 2. Create a list of valid Image IDs
    self.image_ids = sorted(list(self.gt_data.keys()))

  def __len__(self):
    # Returns the total number of samples
    return len(self.image_ids)

  def __getitem__(self, idx):
    # This method retrieves the 'idx'-th sample from the dataset

    # A. Determine Image ID
    img_id = self.image_ids[idx]

    # B. Load Image using self.img_dir
    # Filenames are like '0000.png', '0001.png'.
    # We format the ID with ':04d', which means: pad the imgID with zeros until it is 4 digits long
    img_name = f"{img_id:04d}.png"

    # Join the specific image folder with the filename
    img_path = os.path.join(self.img_dir, img_name)

    # Read with OpenCV
    image = cv2.imread(img_path)
    if image is None:
        raise FileNotFoundError(f"Image not found: {img_path}")

    # Convert BGR to RGB
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # B. Get Bounding Box Label as NumPy Array
    # bbox -> [x_top_left, y_top_left, width, height]
    obj_data = self.gt_data[img_id][0]
    bbox = np.array(obj_data['obj_bb'], dtype=np.float32)

    # C. Apply Transforms
    if self.transform:
      # The 'ToTensor' conversion will be applied inside the transform chain
      image = self.transform(image)
    else:
      # 'ToTensor' conversion
      image = torch.from_numpy(image).permute(2, 0, 1).float() / 255.0

    # D. Return the Python Dictionary of the sample
    return {
        'image_id': img_id,             # Integer
        'image': image,                 # PyTorch Tensor [3, Height, Width] -> [3, 480, 640]
        'bbox': bbox,                   # NumPy Array                       -> [x_top_left, y_top_left, width, height]
        'obj_id': obj_data['obj_id']    # Integer
    }

In [17]:
# @title **2.2 Dataset instance**

# Define the separate paths
path_to_images = "/content/datasets/linemod/Linemod_preprocessed/data/01/rgb"
path_to_gt = "/content/datasets/linemod/Linemod_preprocessed/data/01/gt.yml"

# Create the dataset
train_dataset = LineModDetectionDataset(
    img_dir=path_to_images,
    gt_path=path_to_gt
)

# Create the loader
train_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True)

print(f"Successfully loaded {len(train_dataset)} images.")

Successfully loaded 1236 images.


In [18]:
# @title **2.3 Dataset test**

# Get one sample to check
sample = train_dataset[0]
print(f"ImageID:\t {sample['image_id']}")
print(f"ObjectID:\t {sample['obj_id']}")
print(f"Image Shape:\t {sample['image'].shape}")
print(f"Bounding Box:\t {sample['bbox']}")

ImageID:	 0
ObjectID:	 1
Image Shape:	 torch.Size([3, 480, 640])
Bounding Box:	 [244. 150.  44.  58.]


In [22]:
# @title **2.4 Custom dataset #2 code - Multi objects loader**

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import cv2
import numpy as np
import yaml
import os

class MultiObjectLineModDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        """
        Args:
            root_dir (string): Path to the main 'data' folder containing subfolders '01', '02', etc.
            transform (callable, optional): Optional transform.
        """
        self.root_dir = root_dir
        self.transform = transform

        # This list will store metadata for EVERY image found in ALL folders
        # Format: [ {'path': '/.../01/rgb/0000.png', 'bbox': [...], 'obj_id': 1}, ... ]
        self.all_samples = []

        print("Scanning dataset folders...")

        # 1. Loop through possible object folders (1 to 15)
        # LineMOD typically has 15 objects. We iterate to find them.
        for i in range(1, 16):
            folder_name = f"{i:02d}" # Converts 1 -> "01", 2 -> "02"
            folder_path = os.path.join(root_dir, folder_name)

            # Check if folder exists (This automatically skips missing Object 3)
            if not os.path.exists(folder_path):
                continue

            print(f"Loading Object {folder_name}...")

            # 2. Load the GT file for THIS specific folder
            gt_path = os.path.join(folder_path, "gt.yml")
            if not os.path.exists(gt_path):
                print(f"Warning: gt.yml missing in {folder_name}")
                continue

            with open(gt_path, 'r') as f:
                # Load and force integer keys
                folder_gt = yaml.safe_load(f)

            # 3. Process all images in this folder
            # We iterate through the keys in the GT file (which correspond to image IDs)
            for img_id, objects in folder_gt.items():
                # LineMOD images usually have 1 object per image, but gt is a list.
                obj_data = objects[0]

                # Construct full image path
                img_filename = f"{img_id:04d}.png"
                img_full_path = os.path.join(folder_path, "rgb", img_filename)

                # Extract Data
                bbox = np.array(obj_data['obj_bb'], dtype=np.float32)
                obj_id = int(obj_data['obj_id'])

                # Store everything needed to load this sample later
                self.all_samples.append({
                    'path': img_full_path,
                    'bbox': bbox,
                    'obj_id': obj_id,
                    'original_img_id': img_id # Useful for debugging
                })

        #print(f"Total images loaded: {len(self.all_samples)}")

    def __len__(self):
        return len(self.all_samples)

    def __getitem__(self, idx):

        # 1. Retrieve the metadata we stored in __init__
        sample_info = self.all_samples[idx]

        # 2. Load Image from the stored full path
        image = cv2.imread(sample_info['path'])
        if image is None:
            raise FileNotFoundError(f"Image read error: {sample_info['path']}")

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # 3. Apply Transforms
        if self.transform:
            image = self.transform(image)
        else:
            image = torch.from_numpy(image).permute(2, 0, 1).float() / 255.0

        # 4. Return
        return {
            'image': image,                             # The torch.FloatTensor ->  [3, 480, 640]
            'bbox': sample_info['bbox'],                # NumPy Array           ->  [x_top_left, y_top_left, width, height]
            'obj_id': sample_info['obj_id'],            # The class (e.g., 1 for Ape)
            'image_id': sample_info['original_img_id'], # The image id for the class (e.g., 0)
            'image_path': sample_info['path']           # The file location
        }


In [21]:
# @title **2.5 Multiobject dataset instance**

dataset_root = "/content/datasets/linemod/Linemod_preprocessed/data"

# 1. Define the Transform Pipeline
# ToTensor(): Converts Numpy (H,W,C) 0-255 -> Tensor (C,H,W) 0.0-1.0
# Normalize(): Subtracts Mean and divides by Std (Standard ImageNet values)
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# 2. Create Dataset
full_train_dataset = MultiObjectLineModDataset(
    root_dir=dataset_root,
    transform=data_transform
)

# 3. Create Loader
train_loader = DataLoader(full_train_dataset, batch_size=4, shuffle=True, num_workers=2)

# 4. Output
print(f"Successfully loaded {len(full_train_dataset)} images.")

Scanning dataset folders...
Loading Object 01...
Loading Object 02...
Loading Object 04...
Loading Object 05...
Loading Object 06...
Loading Object 08...
Loading Object 09...
Loading Object 10...
Loading Object 11...
Loading Object 12...
Loading Object 13...
Loading Object 14...
Loading Object 15...
Total images loaded: 15800
Successfully loaded 15800 images.


In [40]:
# @title **2.6 Multiobject dataset test**

random_sample_id = torch.randint(low=0, high=len(full_train_dataset), size=(1,))

# Get one sample to check
sample = full_train_dataset[random_sample_id]
print(f"ImageID:\t {sample['image_id']}")
print(f"ObjectID:\t {sample['obj_id']}")
print(f"Image Shape:\t {sample['image'].shape}")
print(f"Bounding Box:\t {sample['bbox']}")
print(f"Image path:\t {sample['image_path']}")

ImageID:	 227
ObjectID:	 15
Image Shape:	 torch.Size([3, 480, 640])
Bounding Box:	 [298. 156.  84. 112.]
Image path:	 /content/datasets/linemod/Linemod_preprocessed/data/15/rgb/0227.png
