In [None]:
%load_ext autoreload
%autoreload 2

## Imports

In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [None]:
from run_attachment import *

In [None]:
from evaluate import *

In [None]:
os.environ['PYOPENGL_PLATFORM'] = 'egl'

In [None]:
try:
    import pyrender
except:
    print('running it again')

import pyrender

In [None]:
# Coordinate system transformation (OpenCV cam to OpenGL cam)
cvcam_in_glcam = np.array([[1, 0, 0, 0],
                          [0, -1, 0, 0],
                          [0, 0, -1, 0],
                          [0, 0, 0, 1]])

def render_mesh_at_pose(mesh, pose, K, H, W, zfar=100):
    # Transform mesh to OpenGL camera frame
    mesh_transformed = mesh.copy()
    mesh_transformed.apply_transform(cvcam_in_glcam @ pose)
    
    # Setup pyrender scene
    scene = pyrender.Scene(ambient_light=[1., 1., 1.], bg_color=[0, 0, 0])
    
    # Add camera
    camera = pyrender.IntrinsicsCamera(
        fx=K[0, 0], fy=K[1, 1],
        cx=K[0, 2], cy=K[1, 2],
        znear=0.1, zfar=zfar
    )
    scene.add(camera, pose=np.eye(4))
    
    # Add mesh
    pyrender_mesh = pyrender.Mesh.from_trimesh(mesh_transformed, smooth=False)
    scene.add(pyrender_mesh, pose=np.eye(4))
    
    # Render
    renderer = pyrender.OffscreenRenderer(W, H)
    color, depth = renderer.render(scene)
    renderer.delete()
    
    return color, depth

def visualize_comparison(mesh, pose, scene_dir, i:int=0):
    reader = Ho3dReader(video_dir=scene_dir, root_dir='/Experiments/simonep01/ho3d')
    rgb = reader.get_color(i)
    
    # Check if mesh is a file path or trimesh object
    if isinstance(mesh, str):
        mesh = trimesh.load(mesh)

    H, W = rgb.shape[:2]
    # Render mesh at gt_pose
    rendered, depth = render_mesh_at_pose(mesh, pose, reader.K, H, W)
    
    # Create visualization
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    # Original image
    axes[0].imshow(rgb)
    axes[0].set_title('Original RGB Image')
    axes[0].axis('off')
    
    # Rendered mesh
    axes[1].imshow(rendered)
    axes[1].set_title('Rendered Mesh at GT Pose')
    axes[1].axis('off')
    
    # Overlay
    overlay = rgb.copy()
    mask = rendered.sum(axis=2) > 0
    overlay[mask] = rendered[mask] * 0.6 + rgb[mask] * 0.4
    axes[2].imshow(overlay)
    axes[2].set_title('Overlay (60% mesh, 40% RGB)')
    axes[2].axis('off')
    
    plt.tight_layout()
    plt.show()

In [None]:
def visualize_confidence_pointcloud(CMesh):
    """Visualize mesh vertices as colored point cloud by confidence
    
    Colors: red (0) → orange (0-0.25) → yellow (0.25-0.5) → blue (0.5-1) → green (1)
    """
    import plotly.graph_objects as go
    
    # Assign colors based on confidence
    colors = np.zeros((len(CMesh.confidence), 3), dtype=np.uint8)
    
    mask_zero = CMesh.confidence == 0
    mask_low = (CMesh.confidence > 0) & (CMesh.confidence < 0.25)
    mask_mid = (CMesh.confidence >= 0.25) & (CMesh.confidence < 0.5)
    mask_high = (CMesh.confidence >= 0.5) & (CMesh.confidence < 1)
    mask_one = CMesh.confidence >= 1.0
    
    colors[mask_zero] = [255, 0, 0]      # Red
    colors[mask_low] = [255, 125, 0]     # Orange
    colors[mask_mid] = [255, 255, 0]     # Yellow
    colors[mask_high] = [0, 0, 255]      # Blue
    colors[mask_one] = [0, 255, 0]       # Green
    
    # Create plotly scatter3d
    fig = go.Figure(data=[go.Scatter3d(
        x=CMesh.observed_positions[:, 0],
        y=CMesh.observed_positions[:, 1],
        z=CMesh.observed_positions[:, 2],
        mode='markers',
        marker=dict(size=2, color=[f'rgb({c[0]},{c[1]},{c[2]})' for c in colors])
    )])
    
    fig.update_layout(
        scene=dict(aspectmode='data'),
        title=f'Confidence: {mask_zero.sum()} red (0), {mask_low.sum()} orange (0-0.25), '
              f'{mask_mid.sum()} yellow (0.25-0.5), {mask_high.sum()} blue (0.5-1), {mask_one.sum()} green (1)'
    )
    
    return fig

## First frame

In [None]:
video_id = 'AP14'
mesh_file= f'/Experiments/simonep01/ho3d/first_frame_instantmeshes/{video_id}/mesh.obj'
test_scene_dir= f'/Experiments/simonep01/ho3d/evaluation/{video_id}'
est_refine_iter= 5
track_refine_iter= 2
n_frames = 100
debug= 0
debug_dir= f'debug/{video_id}_{n_frames}'
attach_every_n_frames= 5

set_logging_format()
set_seed(0)

os.system(f'rm -rf {debug_dir}/* && mkdir -p {debug_dir}/track_vis {debug_dir}/ob_in_cam')

log_path = os.path.join(debug_dir, 'log.txt')
file_handler = logging.FileHandler(log_path, mode='w')
file_handler.setLevel(logging.INFO)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
logging.getLogger().addHandler(file_handler)

mesh = trimesh.load(mesh_file)

reader = Ho3dReader(video_dir=test_scene_dir)

mesh, _ = estimate_and_scale_mesh(mesh,reader)

CMesh = MeshWithConfidence(mesh)

to_origin, extents = trimesh.bounds.oriented_bounds(CMesh.mesh)
bbox = np.stack([-extents/2, extents/2], axis=0).reshape(2,3)

scorer = ScorePredictor()
refiner = PoseRefinePredictor()
glctx = dr.RasterizeCudaContext()
est = FoundationPose(model_pts=CMesh.mesh.vertices, model_normals=CMesh.mesh.vertex_normals, mesh=CMesh.mesh, scorer=scorer, refiner=refiner, debug_dir=debug_dir, debug=debug, glctx=glctx)
logging.info("estimator initialization done")

In [None]:
gt_mesh = reader.get_gt_mesh()
pts_gt_orig = np.array(gt_mesh.vertices, dtype=np.float32)

metrics = {
    'ADD': 0.0,
    'ADI': 0.0,
    '3D_IOU': 0.0,
    'Chamfer': 0.0
}

per_frame_metrics = {key: [] for key in metrics.keys()}

In [None]:
from pose_metrics import add, adi_est, chamfer_distance
def evaluate_frame(est_mesh, pose, gt_pose):
    pts_est_orig = np.array(est_mesh.vertices, dtype=np.float32)
    R_est, t_est = pose_to_Rt(pose)
    R_gt, t_gt = pose_to_Rt(gt_pose)
    frame_metrics = {}
    frame_metrics['ADD'] = add(R_est=R_est, t_est=t_est, R_gt=R_gt, t_gt=t_gt, pts=pts_gt_orig)
    frame_metrics['3D_IOU'], frame_metrics['ADI'] = adi_est(R_est, t_est, pts_est_orig, R_gt, t_gt, pts_gt_orig)
    frame_metrics['Chamfer'] = chamfer_distance(R_est, t_est, pts_est_orig, R_gt, t_gt, pts_gt_orig)
    return frame_metrics

In [None]:
i=0
color = reader.get_color(i)
depth = reader.get_depth(i)
mask = reader.get_mask(i).astype(bool)
pose = est.register(K=reader.K, rgb=color, depth=depth, ob_mask=mask, iteration=est_refine_iter)

tmp_mesh = CMesh.mesh.copy()

if attach_every_n_frames > 0:
        CMesh = perform_attachment(est, CMesh, pose, reader, i)
        frame_metrics = evaluate_frame(tmp_mesh, pose, reader.get_gt_pose(i))
        for key in metrics.keys():
                metrics[key] += frame_metrics[key]
                per_frame_metrics[key].append(frame_metrics[key])

os.makedirs(f'{debug_dir}/ob_in_cam', exist_ok=True)
np.savetxt(f'{debug_dir}/ob_in_cam/{reader.id_strs[i]}.txt', pose.reshape(4,4))

In [None]:
CMesh.mesh.show()

In [None]:
visualize_comparison(tmp_mesh, pose, test_scene_dir, i=i)

In [None]:
fig = visualize_confidence_pointcloud(CMesh)
fig.show()

## Following Frames

In [None]:
while i < 5:
    i+=1
    color = reader.get_color(i)
    depth = reader.get_depth(i)
    pose = est.track_one(rgb=color, depth=depth, K=reader.K, iteration=track_refine_iter)
    
    tmp_mesh = CMesh.mesh.copy()
    if attach_every_n_frames > 0 and i % attach_every_n_frames == 0:
        CMesh = perform_attachment(est, CMesh, pose, reader, i)
        frame_metrics = evaluate_frame(tmp_mesh, pose, reader.get_gt_pose(i))
        for key in metrics.keys():
            metrics[key] += frame_metrics[key]
            per_frame_metrics[key].append(frame_metrics[key])
    
    np.savetxt(f'{debug_dir}/ob_in_cam/{reader.id_strs[i]}.txt', pose.reshape(4,4))

#fig = visualize_confidence_pointcloud(CMesh)
#fig.show()

In [None]:
visualize_comparison(tmp_mesh, pose, test_scene_dir, i=i)

In [None]:
CMesh.mesh.show()

In [None]:
while i < 10:
    i+=1
    color = reader.get_color(i)
    depth = reader.get_depth(i)
    pose = est.track_one(rgb=color, depth=depth, K=reader.K, iteration=track_refine_iter)
    
    tmp_mesh = CMesh.mesh.copy()
    if attach_every_n_frames > 0 and i % attach_every_n_frames == 0:
        CMesh = perform_attachment(est, CMesh, pose, reader, i)
        frame_metrics = evaluate_frame(tmp_mesh, pose, reader.get_gt_pose(i))
        for key in metrics.keys():
            metrics[key] += frame_metrics[key]
            per_frame_metrics[key].append(frame_metrics[key])
    
    np.savetxt(f'{debug_dir}/ob_in_cam/{reader.id_strs[i]}.txt', pose.reshape(4,4))

#fig = visualize_confidence_pointcloud(CMesh)
#fig.show()

In [None]:
visualize_comparison(tmp_mesh, pose, test_scene_dir, i=i)

In [None]:
while i < 25-1:
    i+=1
    color = reader.get_color(i)
    depth = reader.get_depth(i)
    pose = est.track_one(rgb=color, depth=depth, K=reader.K, iteration=track_refine_iter)
    
    tmp_mesh = CMesh.mesh.copy()
    if attach_every_n_frames > 0 and i % attach_every_n_frames == 0:
        CMesh = perform_attachment(est, CMesh, pose, reader, i)
        frame_metrics = evaluate_frame(tmp_mesh, pose, reader.get_gt_pose(i))
        for key in metrics.keys():
            metrics[key] += frame_metrics[key]
            per_frame_metrics[key].append(frame_metrics[key])
    
    np.savetxt(f'{debug_dir}/ob_in_cam/{reader.id_strs[i]}.txt', pose.reshape(4,4))

#fig = visualize_confidence_pointcloud(CMesh)
#fig.show()

In [None]:
visualize_comparison(tmp_mesh, pose, test_scene_dir, i=i)

In [None]:
while i < 50-1:
    i+=1
    color = reader.get_color(i)
    depth = reader.get_depth(i)
    pose = est.track_one(rgb=color, depth=depth, K=reader.K, iteration=track_refine_iter)
    
    tmp_mesh = CMesh.mesh.copy()
    if attach_every_n_frames > 0 and i % attach_every_n_frames == 0:
        CMesh = perform_attachment(est, CMesh, pose, reader, i)
        frame_metrics = evaluate_frame(tmp_mesh, pose, reader.get_gt_pose(i))
        for key in metrics.keys():
            metrics[key] += frame_metrics[key]
            per_frame_metrics[key].append(frame_metrics[key])
    
    np.savetxt(f'{debug_dir}/ob_in_cam/{reader.id_strs[i]}.txt', pose.reshape(4,4))

#fig = visualize_confidence_pointcloud(CMesh)
#fig.show()

In [None]:
visualize_comparison(tmp_mesh, pose, test_scene_dir, i=i)

In [None]:
while i < 100-1:
    i+=1
    color = reader.get_color(i)
    depth = reader.get_depth(i)
    pose = est.track_one(rgb=color, depth=depth, K=reader.K, iteration=track_refine_iter)
    
    tmp_mesh = CMesh.mesh.copy()
    if attach_every_n_frames > 0 and i % attach_every_n_frames == 0:
        CMesh = perform_attachment(est, CMesh, pose, reader, i)
        frame_metrics = evaluate_frame(tmp_mesh, pose, reader.get_gt_pose(i))
        for key in metrics.keys():
            metrics[key] += frame_metrics[key]
            per_frame_metrics[key].append(frame_metrics[key])
    
    np.savetxt(f'{debug_dir}/ob_in_cam/{reader.id_strs[i]}.txt', pose.reshape(4,4))

#fig = visualize_confidence_pointcloud(CMesh)
#fig.show()

In [None]:
CMesh.mesh.show()

In [None]:
visualize_comparison(tmp_mesh, pose, test_scene_dir, i=i)

In [None]:
while i < 200-1:
    i+=1
    color = reader.get_color(i)
    depth = reader.get_depth(i)
    pose = est.track_one(rgb=color, depth=depth, K=reader.K, iteration=track_refine_iter)
    
    tmp_mesh = CMesh.mesh.copy()
    if attach_every_n_frames > 0 and i % attach_every_n_frames == 0:
        CMesh = perform_attachment(est, CMesh, pose, reader, i)
        frame_metrics = evaluate_frame(tmp_mesh, pose, reader.get_gt_pose(i))
        for key in metrics.keys():
            metrics[key] += frame_metrics[key]
            per_frame_metrics[key].append(frame_metrics[key])
    
    np.savetxt(f'{debug_dir}/ob_in_cam/{reader.id_strs[i]}.txt', pose.reshape(4,4))

fig = visualize_confidence_pointcloud(CMesh)
fig.show()

In [None]:
CMesh.mesh.show()

In [None]:
visualize_comparison(tmp_mesh, pose, test_scene_dir, i=i)

In [None]:
while i < 400-1:
    i+=1
    color = reader.get_color(i)
    depth = reader.get_depth(i)
    pose = est.track_one(rgb=color, depth=depth, K=reader.K, iteration=track_refine_iter)
    
    tmp_mesh = CMesh.mesh.copy()
    if attach_every_n_frames > 0 and i % attach_every_n_frames == 0:
        CMesh = perform_attachment(est, CMesh, pose, reader, i)
        frame_metrics = evaluate_frame(tmp_mesh, pose, reader.get_gt_pose(i))
        for key in metrics.keys():
            metrics[key] += frame_metrics[key]
            per_frame_metrics[key].append(frame_metrics[key])
    
    np.savetxt(f'{debug_dir}/ob_in_cam/{reader.id_strs[i]}.txt', pose.reshape(4,4))

fig = visualize_confidence_pointcloud(CMesh)
fig.show()

In [None]:
CMesh.mesh.show()

In [None]:
visualize_comparison(tmp_mesh, pose, test_scene_dir, i=i)

In [None]:
while i < 600-1:
    i+=1
    color = reader.get_color(i)
    depth = reader.get_depth(i)
    pose = est.track_one(rgb=color, depth=depth, K=reader.K, iteration=track_refine_iter)
    
    tmp_mesh = CMesh.mesh.copy()
    if attach_every_n_frames > 0 and i % attach_every_n_frames == 0:
        CMesh = perform_attachment(est, CMesh, pose, reader, i)
        frame_metrics = evaluate_frame(tmp_mesh, pose, reader.get_gt_pose(i))
        for key in metrics.keys():
            metrics[key] += frame_metrics[key]
            per_frame_metrics[key].append(frame_metrics[key])
    
    np.savetxt(f'{debug_dir}/ob_in_cam/{reader.id_strs[i]}.txt', pose.reshape(4,4))

fig = visualize_confidence_pointcloud(CMesh)
fig.show()

In [None]:
CMesh.mesh.show()

In [None]:
visualize_comparison(tmp_mesh, pose, test_scene_dir, i=i)

In [None]:
n_frames = i+1
eval_dir = f"{debug_dir}/evaluation_results"
os.makedirs(eval_dir, exist_ok=True)

for key in metrics:
    metrics[key] /= n_frames

In [None]:
# Save summary as JSON
summary_data = {
    'num_frames': n_frames,
    'metrics': {}
}

for key in metrics.keys():
    summary_data['metrics'][key] = {
        'mean': float(metrics[key]),
        'min': float(np.min(per_frame_metrics[key])),
        'max': float(np.max(per_frame_metrics[key]))
    }

summary_file = os.path.join(eval_dir, 'summary.json')
with open(summary_file, 'w') as f:
    json.dump(summary_data, f, indent=2)
print(f"Saved summary to: {summary_file}")


# Save per-frame metrics as JSON
for key in metrics.keys():
    per_frame_data = {
        'metric': key,
        'num_frames': n_frames,
        'values': [float(v) for v in per_frame_metrics[key]]
    }
    output_file = os.path.join(eval_dir, f'{key}_per_frame.json')
    with open(output_file, 'w') as f:
        json.dump(per_frame_data, f, indent=2)
print(f"Saved per-frame results to: {eval_dir}")

print(f"\n{'='*60}")
print(f"Evaluation Results ({n_frames} frames)")
print(f"{'='*60}")
print(f"ADI (Average Distance):        {metrics['ADI']:.4f} mm")
print(f"3D IOU:                        {metrics['3D_IOU']:.3f} %")
print(f"Chamfer Distance:              {metrics['Chamfer']:.4f} mm")

In [None]:
# Plotting code
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
fig.suptitle('Per-Frame Metrics', fontsize=16, fontweight='bold')

axes = axes.flatten()

for idx, (metric_name, values) in enumerate(per_frame_metrics.items()):
    axes[idx].plot(range(len(values)), values, marker='o', markersize=4, linewidth=2)
    axes[idx].set_xlabel('Frame Number')
    axes[idx].set_ylabel(metric_name)
    axes[idx].set_title(f'{metric_name} over Frames')
    axes[idx].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f'{eval_dir}metrics_plot.png', dpi=150, bbox_inches='tight')
plt.show()