In [10]:
import torch
import cv2
from torchvision import transforms
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone

# ==== 你的类别（和训练时一致）====
MY_CLASSES = ['__background__', 'person', 'table', 'chair', 'suitcase']

# ==== 路径设置 ====
image_path = "./images_new/frame_00000.jpg"
model_path = "./best_model_resnet101.pth"

# ==== 加载图像 ====
image = cv2.imread(image_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
transform = transforms.Compose([transforms.ToTensor()])
input_tensor = transform(image_rgb).unsqueeze(0)

# ==== 加载模型 ====
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
backbone = resnet_fpn_backbone('resnet101', pretrained=False)
model = FasterRCNN(backbone, num_classes=len(MY_CLASSES))
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval().to(device)

# ==== 推理 ====
with torch.no_grad():
    input_tensor = input_tensor.to(device)
    outputs = model(input_tensor)[0]

# ==== 绘制检测框 ====
for i in range(len(outputs['boxes'])):
    score = outputs['scores'][i].item()
    if score < 0.6:
        continue
    box = outputs['boxes'][i].cpu().numpy().astype(int)
    label_id = outputs['labels'][i].item()
    label = MY_CLASSES[label_id]
    x1, y1, x2, y2 = box
    cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    cv2.putText(image, f"{label} {score:.2f}", (x1, y1 - 5),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

# ==== 显示与保存 ====
cv2.imshow("Detection", image)
cv2.imwrite("predicted_result.jpg", image)
cv2.waitKey(0)
cv2.destroyAllWindows()


In [None]:
import cv2
import numpy as np
import torch
from torchvision import transforms
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from collections import deque

# ========== 路径配置 ==========
image_path = "./images_new/frame_00000.jpg"
depth_path = "./depth_new/frame_00000.npy"
model_path = "./best_model_resnet101.pth"

# ========== 类别配置 ==========
MY_CLASSES = ['__background__', 'person', 'table', 'chair', 'suitcase']

# ========== 模型加载 ==========
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
backbone = resnet_fpn_backbone('resnet101', pretrained=False)
model = FasterRCNN(backbone, num_classes=len(MY_CLASSES))
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval().to(device)

# ========== 数据加载 ==========
image = cv2.imread(image_path)
depth = np.load(depth_path)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
transform = transforms.Compose([transforms.ToTensor()])
input_tensor = transform(image_rgb).unsqueeze(0).to(device)

# ========== 模型预测 ==========
with torch.no_grad():
    outputs = model(input_tensor)[0]

# ========== 单目标区域内生长分割 ==========
def region_grow(depth_crop, start_y, start_x, threshold=0.06):
    h, w = depth_crop.shape
    visited = np.zeros_like(depth_crop, dtype=bool)
    region_mask = np.zeros_like(depth_crop, dtype=np.uint8)
    ref_val = float(depth_crop[start_y, start_x])
    if not np.isfinite(ref_val) or ref_val <= 0:
        return region_mask  # 空区域

    queue = deque()
    queue.append((start_y, start_x))

    while queue:
        y, x = queue.popleft()
        if visited[y, x]:
            continue
        if not np.isfinite(depth_crop[y, x]):
            continue
        if abs(float(depth_crop[y, x]) - ref_val) > threshold:
            continue
        visited[y, x] = True
        region_mask[y, x] = 1
        for dy in [-1, 0, 1]:
            for dx in [-1, 0, 1]:
                ny, nx = y + dy, x + dx
                if 0 <= ny < h and 0 <= nx < w and not visited[ny, nx]:
                    queue.append((ny, nx))
    return region_mask

# ========== 处理每个检测目标 ==========
result_img = image.copy()
for i in range(len(outputs["boxes"])):
    score = outputs["scores"][i].item()
    if score < 0.6:
        continue

    box = outputs["boxes"][i].cpu().numpy().astype(int)
    x1, y1, x2, y2 = box
    label_id = outputs["labels"][i].item()
    label_name = MY_CLASSES[label_id]
    cx = (x1 + x2) // 2
    cy = (y1 + y2) // 2

    # 裁剪深度图
    depth_crop = depth[y1:y2, x1:x2]
    rel_cy, rel_cx = cy - y1, cx - x1
    if not (0 <= rel_cx < depth_crop.shape[1] and 0 <= rel_cy < depth_crop.shape[0]):
        continue

    # 区域生长（仅在框中）
    region_mask = region_grow(depth_crop, rel_cy, rel_cx, threshold=0.06)

    # 计算平均深度
    masked_depth = depth_crop[region_mask == 1]
    masked_depth = masked_depth[(masked_depth > 0) & np.isfinite(masked_depth)]
    if masked_depth.size > 0:
        avg_depth = float(np.median(masked_depth))
        label_text = f"{label_name} | {avg_depth:.2f}m"
    else:
        label_text = f"{label_name} | N/A"

    # 显示
    cv2.rectangle(result_img, (x1, y1), (x2, y2), (0, 255, 0), 2)
    cv2.putText(result_img, label_text, (x1, y1 - 5),
                cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

    # 可视化轮廓线（叠加在原图上）
    contours, _ = cv2.findContours(region_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    for cnt in contours:
        cnt[:, 0, 0] += x1  # x偏移
        cnt[:, 0, 1] += y1  # y偏移
        cv2.drawContours(result_img, [cnt], -1, (0, 0, 255), 2)

# ========== 显示与保存 ==========
cv2.imshow("Segment + Distance", result_img)
cv2.imwrite("segment_distance_result.jpg", result_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
