In [1]:
import torch

print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
    print("Torch version:", torch.__version__)


CUDA available: True
GPU: NVIDIA GeForce GTX 1080
Torch version: 2.1.2+cu118


In [2]:
import sys
import os

# Add the root folder to path (assuming /Notebooks/ is in /DL_Final_Project/)
project_root = os.path.abspath("..")
sys.path.append(project_root)

In [None]:
# # Cell 1: Visualize training losses
# import json
# import numpy as np
# import matplotlib.pyplot as plt

# print("Device", os.environ.get("CUDA_VISIBLE_DEVICES", "CPU"))

# extension = '4y'

# # Load both JSON files.
# with open(f"../trans_loss_{extension}", "r") as f:
#     trans_losses = json.load(f)

# with open(f"../rot_loss_{extension}", "r") as f:
#     rot_losses = json.load(f)

# episodes = np.arange(1, len(trans_losses) + 1)

# plt.figure(figsize=(10, 4))
# plt.plot(rot_losses, label="Rotation Loss", color="orange")
# plt.plot(trans_losses, label="Translation Loss", color="blue")
# plt.xlabel("Iteration")
# plt.ylabel("Loss")
# plt.title("Training Loss Curves")
# plt.legend()
# plt.grid(True)
# plt.show()


Device CPU


FileNotFoundError: [Errno 2] No such file or directory: '../trans_loss_4y'

In [4]:
# --- Imports ---

import json
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
from torch.autograd import Variable
from tqdm import tqdm
import os
import yaml
import sys


In [56]:
 # --- Config ---

from Classes.Dataset.IPDDataset_render import IPDDatasetMounted
from Models.PoseEstimator.PoseEstimation import PoseEstimator


config_path = "../Config/config_fusenet.yaml"

with open(config_path, "r") as f:
    config = yaml.safe_load(f)

remote_base_url = f"../{config['dataset']['remote_base_url']}"
cam_ids = config["dataset"]["cam_ids"]
modalities = config["dataset"].get("modality", ["rgb", "depth"])

# For seen CAD models, use the training split and same allowed objects as in training.
train_scene_ids = {f"{i:06d}" for i in range(0, 25)}
train_obj_ids = {0, 8, 18, 19, 20}
batch_size = 1
num_workers = 4

dataset = IPDDatasetMounted(
    remote_base_url,
    cam_ids,
    modalities,
    split=config["dataset"].get("val_split", "train"),  # using training split
    # allowed_scene_ids=train_scene_ids,
    # allowed_obj_ids=train_obj_ids
)

sensory_channels = {mod: 1 for mod in modalities}
encoder_type = config["training"].get("encoder", "fusenet")
fusion_type = config["training"].get("fusion", "concat")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
renderer_config = config.get("renderer", {"width": 640, "height": 480, "device": device})

model = PoseEstimator(
    sensory_channels, renderer_config,
    encoder_type=encoder_type,
    fusion_type=fusion_type,
    n_views=len(cam_ids)
).to(device)

model_path = f"../weights/model_{encoder_type}_{fusion_type}.pt"

In [54]:
def move_sample_to_device(sample, device):
    X, Y = sample["X"], sample["Y"]
    # Move each view data to the device
    for view in X["views"]:
        for k in view:
            view[k] = view[k].to(device)
    X["K"] = X["K"].to(device)
    # Move CAD model tensors
    for item in X["available_cads"]:
        item["verts"] = item["verts"].to(device)
        item["faces"] = item["faces"].to(device)
    # Move ground-truth pose tensors
    for pose in Y["gt_poses"]:
        pose["R"] = pose["R"].to(device)
        pose["t"] = pose["t"].to(device)
    return sample

In [55]:
import os
import cv2
import torch
import numpy as np
from torchvision.transforms import ToPILImage
import matplotlib.pyplot as plt

from Models.helpers import quaternion_to_matrix
from Models.PoseEstimator.PoseEstimation import PoseEstimator
from Metrics.visualization import draw_pose_axes, draw_bbox_from_pose
from Models.KaolinRenderer import KaolinRenderer

# Load model weights (using your predefined path)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(f"Loading model weights from {model_path}...")
if os.path.exists(model_path):
    print("Loading model weights...")
    # model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

# --- Load a sample from dataset ---
# Assuming dataset is defined somewhere (e.g., `dataset = IPDDatasetMounted(...)`)
sample = dataset[4]  # Choose a sample index
sample = move_sample_to_device(sample, device)
X, Y = sample["X"], sample["Y"]

# Count the number of ground truth poses per sample
gt_poses = Y["gt_poses"]

# Build a lookup table for available CAD models (seen during training)
cad_model_lookup = {
    item["obj_id"]: {"verts": item["verts"], "faces": item["faces"]}
    for item in X["available_cads"]
}

# Perform inference to get detections and computed poses
# with torch.no_grad():
    # detections, _, _ = model(
    #     x_dict_views=X["views"],
    #     K_list=X["K"],
    #     cad_model_lookup=cad_model_lookup
    # )

# --- Visualize prediction vs ground truth ---
# Choose the render mode: "axes" or "bbox"
render_mode = "bbox"

# Assume the camera intrinsics (K) is stored in X["K"][0]
K = X["K"][0].cpu().numpy()

# Get the number of views available in X["views"]
num_views = len(X["views"])

# Create subplots to display all views side by side
fig, axes = plt.subplots(1, num_views, figsize=(5 * num_views, 5))
# If there is only one view, wrap axes in a list
if num_views == 1:
    axes = [axes]

# Loop through each view and render predictions on the image
for idx, view in enumerate(X["views"]):
    # Convert the image tensor to a PIL image, then to a NumPy array in RGB
    tensor = view["rgb"][0].cpu()
    img_pil = ToPILImage()(tensor)
    img_rgb = np.array(img_pil.convert("RGB"))
    
    # Copy image to draw on it
    vis = img_rgb.copy()

    # Draw predicted poses on each view
    for i, det in enumerate(detections):
        # Get the predicted pose (convert quaternion to rotation matrix)
        R_pred = quaternion_to_matrix(det["quat"].unsqueeze(0))[0].cpu().numpy()
        t_pred = det["trans"].cpu().numpy()
        
        if render_mode == "axes":
            vis = draw_pose_axes(vis, R_pred, t_pred, K, label=f"Pred_{i}", color=(0, 255, 0))
        elif render_mode == "bbox":
            vis = draw_bbox_from_pose(vis, R_pred, t_pred, K, label=f"Pred_{i}", color=(0, 255, 0))
    
    # Optionally, add ground truth information here if needed

    axes[idx].imshow(vis)
    axes[idx].set_title(f"View {idx}")
    axes[idx].axis("off")

plt.tight_layout()
plt.show()


Loading model weights from ../weights/model_fusenet_concat.pt...
Loading model weights...


TypeError: 'NoneType' object is not subscriptable