# 6D Pose Estimation

## Set up the project

We will work with a portion of this dataset, which you can find here: https://drive.google.com/drive/folders/19ivHpaKm9dOrr12fzC8IDFczWRPFxho7

Set some variables to conditionally run some codes. First download the project and change directory to ```6DPose_Estimation```

In [None]:
MOUNT_DRIVE = False
COMET_ML = False

In [None]:
if MOUNT_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    %cd /content/drive/MyDrive/6DPose_Estimation/

Install all dependencies of PyTorch dependencies

In [None]:
!pip install torch torchvision torchaudio

In [None]:
%%capture
import os
import torch

%env TORCH=$torch.__version__
!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-cluster -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-spline-conv -f https://data.pyg.org/whl/torch-${TORCH}.html

Install all packages, you may need to restart the runtime before continuing

In [None]:
!pip install -r ./requirements.txt
print("Restart runtime")

In [None]:
import os
import yaml
import torch
import torchvision
import open3d as o3d
import itertools
import shutil
import ultralytics
from torch.utils.data import Dataset
from torch import nn, optim
import torch.nn.functional as F
from PIL import Image
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import matplotlib.patches as patches
import wandb
from scipy.spatial.transform import Rotation as R
from torchvision import models
import cv2
from torch.optim import Adam
import quaternion
from torch.optim.lr_scheduler import StepLR, ReduceLROnPlateau
from tqdm import tqdm
from torch.cuda.amp import GradScaler, autocast
from ultralytics import YOLO
from torchvision.transforms import v2
import trimesh

# install PyTorch Geometric after installation and restart
import torch_geometric
from torch import Tensor
from torch_geometric.nn import knn_interpolate, MessagePassing
from torch_geometric.nn.pool import fps, radius

# import comet-ml
import comet_ml
from comet_ml import Experiment
from comet_ml.integration.pytorch import watch

from utils.data_exploration import load_image
from utils.installation_checker import check_torch_geometric

from data.CustomDatasetPose import IMG_WIDTH, IMG_HEIGHT

# check if everything works
check_torch_geometric()

Set seed

In [None]:
from utils.init import set_seed

set_seed(42)

Set device

In [None]:
from utils.init import set_device

device = set_device()

## Download dataset

In [None]:
# Step 1: Download the dataset (LineMOD)
# Download LineMOD dataset
# create directory structure without errors
!mkdir -p datasets/linemod/
%cd datasets/linemod/

In [None]:
!mkdir -p DenseFusion/
%cd DenseFusion/

In [None]:
# Download dataset (which includes a portion of the LimeMOD dataset)
!gdown --folder "https://drive.google.com/drive/folders/19ivHpaKm9dOrr12fzC8IDFczWRPFxho7"

In [None]:
if MOUNT_DRIVE:
    !cp /content/drive/MyDrive/6DPose_Estimation /content/ # move to content for faster access to files
    %cd /content/6DPose_Estimation/datasets/linemod/DenseFusion

!unzip Linemod_preprocessed.zip
!rm Linemod_preprocessed.zip
%cd ../../../ # change directory to 6D_pose_estimation

Get working directory

In [None]:
path = !pwd
path = path[0]

## Modify Dataset

Copy ground truth files to ```Linemod_preprocessed```

In [None]:
from utils.data_exploration import get_class_names
from utils.preprocessing import copy_gt_file, change_02gt, quaternion_gt

folder_names = get_class_names()
copy_gt_file(folder_names)

Change ```02_gt.yml``` to take only one object

In [None]:
change_02gt("./datasets/linemod/DenseFusion/Linemod_preprocessed/02_gt.yml")

Add quaternion

In [None]:
quaternion_gt("./datasets/linemod/DenseFusion/Linemod_preprocessed")

## Data Exploration

Load an image

In [None]:
load_image(label=1, object=0)

Check if camera intrinsics is same for all images

In [None]:
root_path = "./datasets/linemod/DenseFusion/Linemod_preprocessed/data"

from utils.data_exploration import check_cam_K_equal

cam_K = check_cam_K_equal(root_path)

## Define CustomDataset

In [None]:
from data.CustomDatasetPose import CustomDatasetPose

dataset_root = "./datasets/linemod/DenseFusion/Linemod_preprocessed/"

train_dataset = CustomDatasetPose(dataset_root, split="train", device=device, cam_K = cam_K)
image_mean, image_std = train_dataset.get_image_mean_std()
print(f"Training samples: {len(train_dataset)}")

val_dataset = CustomDatasetPose(dataset_root, split="validation", device=device, cam_K = cam_K, img_mean = image_mean, img_std = image_std)
print(f"Validation samples: {len(val_dataset)}")

test_dataset = CustomDatasetPose(dataset_root, split="test", device=device, cam_K = cam_K, img_mean = image_mean, img_std = image_std)
print(f"Testing samples: {len(test_dataset)}")

## Data Preprocessing

Structure the data for YOLO such that
```
datasets/
├── data.yaml
│
├── train/
│   ├── images/
│   │
│   └── labels/
│  
├── val/
│
└── test/
```

In [None]:
# divide the dataset into training, validation and testing set
train_samples = train_dataset.get_samples_id()
validation_samples = val_dataset.get_samples_id()
test_samples = test_dataset.get_samples_id() # test folder is optional for training YOLO

Create a new folder containing all the info, we just need the rgb image and a text file with the label and bounding box.
The ```Linemod_preprocessed``` is not removed, as it contains info about translation and rotation that are needed for pose estimation, but not for object detection model.

The working directory is in the ```6DPose_Estimation```

Create YOLO yaml

In [None]:
from utils.preprocessing import create_YOLO_yaml, create_dataset_YOLO

number_classes, class_names = create_YOLO_yaml(path, folder_names)

While creating the folder structure, we have to change the class id by using the index in the array written in the ```data.yaml```

In [None]:
# create a dictionary to have easily access to the index
index_dict = dict()
for index, el in enumerate(class_names):
    index_dict[int(el)] = index

Create the folders. Note that each image may contain multiple objects. For instance in ```data/02/gt.yml``` for one image there are multiple objects, but just consider the object of that class

In [None]:
counter_df = create_dataset_YOLO(number_classes, train_samples, validation_samples, test_samples, index_dict, path, train_dataset)

Visualize dataset distribution

In [None]:
from utils.data_exploration import load_dataset_distribution

load_dataset_distribution(counter_df, index_dict, number_classes)

### Visualize data

Visualize depth image

In [None]:
from utils.data_exploration import load_depth_image

folder = "02"
object_name = "0101"
img = load_depth_image(f"./datasets/linemod/DenseFusion/Linemod_preprocessed/data/{folder}/depth/{object_name}.png")

Plot the patch of first object of the image, it reads from the ground truth file containing also multiple objects in one image

In [None]:
from utils.data_exploration import load_depth_patch

load_depth_patch(path, folder, object_name, img)

Get data loader

In [None]:
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False)
print(f"Training loader: {len(train_loader)}")
print(f"Validation loader: {len(val_loader)}")
print(f"Test loader: {len(test_loader)}")

Plot one batch of data

In [None]:
from utils.data_exploration import plot_batch_data

plot_batch_data(train_loader, val_loader, test_loader)

## Training Object Detection model

In [None]:
from train_YOLO import train_YOLO

epochs = 50
batch_size = 64
IMG_SIZE = 640

train_YOLO(path, epochs, batch_size, device, IMG_SIZE) # train model and save it to checkpoints

Validate model on test set

In [None]:
from evaluate_YOLO import evaluate_YOLO

evaluate_YOLO(path, epochs, batch_size, IMG_SIZE, device)

## Pose Estimator Module

In [None]:
from models.PosePredictorModel import PosePredictorModel
from PoseEstimationTrainer import PoseEstimationTrainer
from models.ADDMetric import ADDMetric
from utils.pose_plot import plotPose

In [None]:
config = {
    "project_name": "baseline_quaternion",
    "experiment_name": "mse_loss_step_optim",
    "batch_size": 32,
    "num_epochs": 25,
    "learning_rate": 1e-4,
    "weight_decay": 1e-5,
    "backbone": "resnet18",
    "hidden_dim": 512,
    "img_size": 224,
    "alpha": 1.0,
    "beta": 1.0,
    "add_threshold": 0.1,
    "symmetric_objects": ["10"],
    "name_saved_file": "mse_loss_step"
}

MODELS_DIR = "./datasets/linemod/DenseFusion/Linemod_preprocessed/models"
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f"Using device: {DEVICE}")
print(f"Configuration: {config}")

# Dataloader
train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=config["batch_size"], shuffle=False)


# # --------------------------
# from torch.utils.data import DataLoader, Subset

# # Number of samples in subset
# subset_size = 10
# subset_indices = list(range(subset_size))

# # create subset of original dataset
# train_subset = Subset(train_loader.dataset, subset_indices)
# val_subset = Subset(val_loader.dataset, subset_indices)
# test_subset = Subset(test_loader.dataset, subset_indices)

# # create new DataLoader from subset
# train_loader = DataLoader(train_subset, batch_size=config["batch_size"], shuffle=True)
# val_loader = DataLoader(val_subset, batch_size=config["batch_size"], shuffle=False)
# test_loader = DataLoader(test_subset, batch_size=config["batch_size"], shuffle=False)
# # ----------------------


# Model
model = PosePredictorModel(
    backbone=config["backbone"],
    hidden_dim=config["hidden_dim"]
).to(device)

total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

experiment = comet_ml.start(
    api_key="<YOUR_API>",
    project_name=config['project_name'],
    experiment_config=comet_ml.ExperimentConfig(
        name=config["experiment_name"],
        parse_args=False)
)

experiment.log_parameters(config)

trainer = PoseEstimationTrainer(model, train_loader, val_loader, device=device, config=config, experiment=experiment)
trainer.train(num_epochs=config["num_epochs"])

checkpoint = torch.load(f"{path}/checkpoints/baseline/{config['name_saved_file']}_{config['backbone']}_bs{config['batch_size']}.pth", map_location=device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

add_metric = ADDMetric(
    model=model,
    class_names=class_names,
    test_loader=test_loader,
    models_3D_dir=MODELS_DIR,
    symmetric_objects=config["symmetric_objects"],
    device=DEVICE,
    experiment=experiment,
    config=config
)

print("Evaluating with ADD metric...")
add_score, accuracy, detailed_results = add_metric.evaluate_model_with_add()


print(f"\nFinal Results:\nADD Score: {add_score:.4f}\nAccuracy: {accuracy:.4f}")

test_batch = next(iter(test_loader))

for idx, batch in enumerate(test_loader):
    images = batch['rgb'].to(device)
    gt_trans = batch['translation']
    gt_rot = batch['rotation']
    object_ids = batch['obj_id']
    sample_id = batch["sample_id"]

    with torch.no_grad():
        pred_trans, pred_rot = model(images)
        pred_trans = pred_trans
        pred_rot = pred_rot
       
        for i in range(len(images)):
            if i == 0:
                img_path = f"{path}/datasets/linemod/DenseFusion/Linemod_preprocessed/data/{sample_id[i][0]:02d}/rgb/{sample_id[i][1]:04d}.png"

                plotPose(img_path, gt_trans[i], gt_rot[i], pred_trans[i], pred_rot[i], experiment, cam_K)
print(f"Plot saved on comet_ml in project: {config['project_name']}, experiment: {config['experiment_name']}")

experiment.end()

## Extension

Compare images in ```rgb``` and ```mask``` and analyze if there are images that are only in one of the folders

In [None]:
from utils.data_exploration import compare_rgb_mask_in_data

compare_rgb_mask_in_data("./datasets/linemod/DenseFusion/Linemod_preprocessed/data/")

Create dataset

In [None]:
from data.CustomDataset import CustomDataset

dataset_root = "./datasets/linemod/DenseFusion/Linemod_preprocessed/"

train_dataset = CustomDataset(dataset_root, split='train', device=device, cam_K=cam_K)
image_mean, image_std = train_dataset.get_image_mean_std()
print(f'Training samples: {len(train_dataset)}')

val_dataset = CustomDataset(dataset_root, split='validation', device=device, cam_K = cam_K, img_mean = image_mean, img_std = image_std)
print(f'Validation samples: {len(val_dataset)}')

test_dataset = CustomDataset(dataset_root, split='test', device=device, cam_K = cam_K, img_mean = image_mean, img_std = image_std)
print(f'Testing samples: {len(test_dataset)}')

Get dataloader

In [None]:
from data.CustomDataLoader import pointcloud_collate_fn

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=pointcloud_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=pointcloud_collate_fn)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False, collate_fn=pointcloud_collate_fn)