In [None]:
from scipy.optimize import minimize
import numpy as np
from imgutils.data import load_image
from imgutils.pose import dwpose_estimate
from unity_controller import UnityController

def KeyPoint(keypoints):
    return [
        keypoints[0].body[2],
        keypoints[0].body[5],
        keypoints[0].body[7],
        keypoints[0].body[8]
    ]

# 初始化控制器和参考数据
controller = UnityController()
image_estimate = load_image(r"C:\Users\31878\Desktop\diffusion_pic\02977-2464155392.png")
keypoints_estimate = dwpose_estimate(image_estimate)
target_points = np.array(KeyPoint(keypoints_estimate)).flatten()  # 目标关键点

# 初始相机参数
initial_params = np.array([
    0.3, 1, 2,    # 位置 (x, y, z)
    0, 180, 0,    # 旋转 (x_r, y_r, z_r)
    60            # FOV
])

def get_keypoints(params):
    """ 根据给定参数获取渲染后的关键点 """
    try:
        img = controller.Control_render(
            x=params[0], y=params[1], z=params[2],
            x_r=params[3], y_r=params[4], z_r=params[5],
            fov=params[6],
            width=1024, height=1024
        )
        kps = dwpose_estimate(img)
        return np.array(KeyPoint(kps)).flatten()
    except:
        return np.full_like(target_points, 1e6)  # 返回极大值表示异常

# 在loss_function中添加迭代信息打印
def loss_function(params):
    current_points = get_keypoints(params)
    loss = np.sum((current_points - target_points) ** 2)
    print(f"Iteration {loss_function.counter}: Loss = {loss:.2f}")
    loss_function.counter += 1
    return loss

loss_function.counter = 0  # 初始化计数器

# 参数边界约束
bounds = [
    (None, None), (None, None), (None, None),  # 位置
    (None, None), (0, 360), (None, None),     # 旋转角度
    (30, 90)                                  # FOV
]

# 使用优化算法进行参数调整
result = minimize(
    loss_function,
    initial_params,
    method='Powell',  # 适合非光滑优化的方法
    options={'maxiter': 50, 'disp': True}
)

# 输出最优参数
optimized_params = result.x
print(f"Optimized Parameters: {optimized_params}")

# 使用最优参数获取最终结果
final_image = controller.Control_render(
    x=optimized_params[0], 
    y=optimized_params[1],
    z=optimized_params[2],
    x_r=optimized_params[3],
    y_r=optimized_params[4],
    z_r=optimized_params[5],
    fov=optimized_params[6],
    width=1024,
    height=1024
)

Iteration 0: Loss = 226425.96
Iteration 1: Loss = 226425.96
Iteration 2: Loss = 1238008.98
Iteration 3: Loss = 1270644.90
Iteration 4: Loss = 214742.84
Iteration 5: Loss = 523988.74
Iteration 6: Loss = 143715.76
Iteration 7: Loss = 143730.08
Iteration 8: Loss = 141397.65
Iteration 9: Loss = 154469.32
Iteration 10: Loss = 145449.54
Iteration 11: Loss = 142821.03
Iteration 12: Loss = 144405.43
Iteration 13: Loss = 140507.50
Iteration 14: Loss = 140507.50
Iteration 15: Loss = 500700.79
Iteration 16: Loss = 1558722.28
Iteration 17: Loss = 692293.46
Iteration 18: Loss = 110511.92
Iteration 19: Loss = 95196.18
Iteration 20: Loss = 94980.88
Iteration 21: Loss = 95124.88
Iteration 22: Loss = 94462.62
Iteration 23: Loss = 95375.19
Iteration 24: Loss = 94072.38
Iteration 25: Loss = 94936.43
Iteration 26: Loss = 94072.38
Iteration 27: Loss = 143420.39
Iteration 28: Loss = 575621.86
Iteration 29: Loss = 61428.92
Iteration 30: Loss = 47592.42
Iteration 31: Loss = 57668.52
Iteration 32: Loss = 48935

In [3]:
import numpy as np
from scipy.optimize import basinhopping, minimize
import matplotlib.pyplot as plt
from imgutils.data import load_image
from imgutils.pose import dwpose_estimate
from unity_controller import UnityController
from functools import lru_cache
from concurrent.futures import ThreadPoolExecutor

# ---------------------------
# 核心优化类
# ---------------------------
def KeyPoint(keypoints):
    return [
        keypoints[0].body[2],
        keypoints[0].body[5],
        keypoints[0].body[7],
        keypoints[0].body[8]
    ]
class CameraPoseOptimizer:
    def __init__(self, controller, target_kps, init_position=(0,1,-2), init_rotation=(0,180,0)):
        self.controller = controller
        self.target = np.array(target_kps).flatten()
        
        # 参数初始化（强制背面视角）
        self.params = np.array([
            *init_position,  # x,y,z
            *init_rotation,  # x_r,y_r,z_r
            60.0             # FOV
        ])
        
        # 优化配置
        self.param_weights = np.array([0.1, 0.1, 0.3, 1.0, 2.0, 0.5, 0.2])
        self.best_loss = float('inf')
        self.best_params = None
        self.history = []

        # 并行计算准备
        self.executor = ThreadPoolExecutor(max_workers=4)

    # ---------------------------
    # 核心优化方法
    # ---------------------------
    def optimize(self, global_steps=15, local_steps=30):
        """执行混合优化流程"""
        print("=== 开始全局优化 ===")
        global_result = basinhopping(
            func=self._total_loss,
            x0=self.params,
            niter=global_steps,
            T=1.5,
            stepsize=0.8,
            minimizer_kwargs={
                "method": "L-BFGS-B",
                "bounds": self._get_bounds('global'),
                "options": {"eps": 0.01}
            },
            take_step=self._global_step
        )
        
        print("\n=== 开始局部优化 ===")
        local_result = minimize(
            self._total_loss,
            global_result.x,
            method='Nelder-Mead',
            options={
                'initial_simplex': self._create_simplex(global_result.x),
                'xatol': 0.01,
                'fatol': 10.0,
                'adaptive': True,
                'maxiter': local_steps
            }
        )
        
        self._save_result(local_result.x)
        return self.best_params

    # ---------------------------
    # 优化组件
    # ---------------------------
    def _get_bounds(self, mode):
        """获取参数边界约束"""
        if mode == 'global':
            return [
                (-2, 2), (0, 3), (-5, -0.5),  # 位置
                (-30, 30), (90, 270), (-30, 30),  # 旋转
                (45, 75)  # FOV
            ]
        return [
            (-1, 1), (0.5, 1.5), (-3, -1),  # 位置
            (-15, 15), (160, 200), (-15, 15),  # 旋转
            (50, 70)  # FOV
        ]

    def _global_step(self, x):
        """全局优化步长策略"""
        delta = np.abs(x - self.best_params) if self.best_params is not None else np.ones_like(x)
        step = np.array([0.05, 0.05, 0.2, 0.5, 1.5, 0.3, 0.2])
        scale = np.where(delta > 3*step, 3.0, 1.0)
        return x + np.random.normal(0, step*scale)

    def _create_simplex(self, center):
        """创建带权重的初始单纯形"""
        simplex = [center.copy()]
        for i in range(len(center)):
            delta = 0.02 * self.param_weights[i]
            point = center.copy()
            point[i] += delta
            simplex.append(point)
        return np.array(simplex)

    # ---------------------------
    # 损失函数计算（带缓存）
    # ---------------------------
    @lru_cache(maxsize=100)
    def _cached_render(self, params_tuple):
        """带缓存的渲染方法"""
        try:
            return self.controller.Control_render(
                x=params_tuple[0],
                y=params_tuple[1],
                z=params_tuple[2],
                x_r=params_tuple[3],
                y_r=params_tuple[4],
                z_r=params_tuple[5],
                fov=params_tuple[6],
                width=512,
                height=512
            )
        except:
            return None

    def _compute_kps(self, params):
        """并行计算关键点"""
        params_tuple = tuple(params.tolist())
        img = self._cached_render(params_tuple)
        if img is None:
            return None
        return KeyPoint(dwpose_estimate(img))

    def _direction_penalty(self, params):
        """物理约束惩罚项"""
        # 位置约束（z必须为负）
        pos_penalty = 50.0 if params[2] > 0 else 0.0
        
        # 旋转约束（y轴在160-200之间）
        y_rot = params[4] % 360
        rot_penalty = 20 * abs(y_rot - 180)/180 if not (160 < y_rot < 200) else 0.0
        
        return pos_penalty + rot_penalty

    def _total_loss(self, params):
        """带约束的总损失函数"""
        # 参数范围约束
        params = np.clip(params, 
                        [b[0] for b in self._get_bounds('global')],
                        [b[1] for b in self._get_bounds('global')])
        
        # 异步获取关键点
        future = self.executor.submit(self._compute_kps, params)
        current_kps = future.result()
        
        if current_kps is None:
            return float('inf')
            
        # 计算基础损失
        diff = np.array(current_kps).flatten() - self.target
        base_loss = np.sum(diff ** 2)
        
        # 总损失 = 基础损失 + 方向约束
        total_loss = base_loss + self._direction_penalty(params)
        
        # 记录最佳参数
        if total_loss < self.best_loss:
            self.best_loss = total_loss
            self.best_params = params.copy()
            self._visualize_progress(params, total_loss)
        
        self.history.append(total_loss)
        return total_loss

    # ---------------------------
    # 可视化与工具方法
    # ---------------------------
    def _visualize_progress(self, params, loss):
        """实时可视化优化进度"""
        plt.figure(figsize=(12, 5))
        
        # 显示当前渲染画面
        plt.subplot(1, 2, 1)
        img = self._cached_render(tuple(params.tolist()))
        if img is not None:
            plt.imshow(img)
            plt.title(f"Loss: {loss:.1f}\nParams: {params.round(2)}")
            plt.axis('off')
        
        # 显示损失曲线
        plt.subplot(1, 2, 2)
        plt.plot(self.history)
        plt.yscale('log')
        plt.title("Optimization Progress")
        plt.xlabel("Iteration")
        plt.ylabel("Loss")
        
        plt.tight_layout()
        plt.pause(0.1)
        plt.close()

    def _save_result(self, params):
        """保存最终结果"""
        final_img = self.controller.Control_render(
            x=params[0], y=params[1], z=params[2],
            x_r=params[3], y_r=params[4], z_r=params[5],
            fov=params[6],
            width=1024,
            height=1024
        )
        plt.imsave("optimized_view.png", final_img)
        np.save("optimized_params.npy", params)

# ---------------------------
# 使用示例
# ---------------------------
if __name__ == "__main__":
    # 初始化Unity控制器
    controller = UnityController()
    
    # 加载参考图像
    ref_image = load_image(r"C:\Users\31878\Desktop\diffusion_pic\02977-2464155392.png")
    ref_kps = KeyPoint(dwpose_estimate(ref_image))
    
    # 创建优化器并执行优化
    optimizer = CameraPoseOptimizer(
        controller=controller,
        target_kps=ref_kps,
        init_position=(0, 1, -2),    # 初始z坐标为负（背面）
        init_rotation=(0, 180, 0)    # 初始面向背面
    )
    
    optimized_params = optimizer.optimize(
        global_steps=20,   # 全局搜索迭代次数
        local_steps=30     # 局部优化迭代次数
    )
    
    print(f"优化完成！最佳参数：{optimized_params}")

=== 开始全局优化 ===


IndexError: list index out of range

In [None]:
from imgutils.data import load_image
# from matplotlib import pyplot as plt
from PIL import Image
image_path = r"C:\Users\31878\Desktop\diffusion_pic\10__.png"
image = Image.open(image_path)
image


In [None]:
import torch
import requests
import numpy as np

from PIL import Image

from transformers import AutoProcessor, RTDetrForObjectDetection, VitPoseForPoseEstimation

device = "cuda" if torch.cuda.is_available() else "cpu"

# url = "http://images.cocodataset.org/val2017/000000000139.jpg"
# image = Image.open(requests.get(url, stream=True).raw)
image_path = r"C:\Users\31878\Desktop\diffusion_pic\10__.png"
image = Image.open(image_path)

# ------------------------------------------------------------------------
# Stage 1. Detect humans on the image
# ------------------------------------------------------------------------

# You can choose any detector of your choice
person_image_processor = AutoProcessor.from_pretrained("PekingU/rtdetr_r50vd_coco_o365")
person_model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd_coco_o365", device_map=device)

inputs = person_image_processor(images=image, return_tensors="pt").to(device)

with torch.no_grad():
    outputs = person_model(**inputs)

results = person_image_processor.post_process_object_detection(
    outputs, target_sizes=torch.tensor([(image.height, image.width)]), threshold=0.3
)
result = results[0]  # take first image results

# Human label refers 0 index in COCO dataset
person_boxes = result["boxes"][result["labels"] == 0]
person_boxes = person_boxes.cpu().numpy()

# Convert boxes from VOC (x1, y1, x2, y2) to COCO (x1, y1, w, h) format
person_boxes[:, 2] = person_boxes[:, 2] - person_boxes[:, 0]
person_boxes[:, 3] = person_boxes[:, 3] - person_boxes[:, 1]

# ------------------------------------------------------------------------
# Stage 2. Detect keypoints for each person found
# ------------------------------------------------------------------------

image_processor = AutoProcessor.from_pretrained("usyd-community/vitpose-base-simple")
model = VitPoseForPoseEstimation.from_pretrained("usyd-community/vitpose-base-simple", device_map=device)

inputs = image_processor(image, boxes=[person_boxes], return_tensors="pt").to(device)

with torch.no_grad():
    outputs = model(**inputs)

pose_results = image_processor.post_process_pose_estimation(outputs, boxes=[person_boxes])
image_pose_result = pose_results[0]  # results for first image

In [None]:
import supervision as sv

xy = torch.stack([pose_result['keypoints'] for pose_result in image_pose_result]).cpu().numpy()
scores = torch.stack([pose_result['scores'] for pose_result in image_pose_result]).cpu().numpy()

key_points = sv.KeyPoints(
    xy=xy, confidence=scores
)

edge_annotator = sv.EdgeAnnotator(
    color=sv.Color.GREEN,
    thickness=1
)
vertex_annotator = sv.VertexAnnotator(
    color=sv.Color.RED,
    radius=2
)
annotated_frame = edge_annotator.annotate(
    scene=image.copy(),
    key_points=key_points
)
annotated_frame = vertex_annotator.annotate(
    scene=annotated_frame,
    key_points=key_points
)

In [None]:
annotated_frame

---------------------------------------------------------------render----------------------------------------------------

In [None]:
from imgutils.data import load_image
from imgutils.pose import dwpose_estimate, op18_visualize
image = load_image(r"C:\Users\31878\Desktop\diffusion_pic\02977-2464155392.png")
keypoints = dwpose_estimate(image)
from matplotlib import pyplot as plt
plt.imshow(op18_visualize(image, keypoints))
plt.show()