In [1]:
import cv2
import numpy as np
from pathlib import Path
import time
import psutil
import os
import gc
import math
from dataclasses import dataclass
from typing import List, Tuple, Optional


In [2]:
@dataclass
class FrameInfo:
    """存储每一帧的信息"""
    frame_id: int  # 帧的唯一标识
    position: Tuple[float, float]  # 在全景图中的坐标 (x, y)
    keypoints: List  # SIFT特征点
    descriptors: np.ndarray  # SIFT描述子
    timestamp: int  # 添加时的序号
    
class FrameTracker:
    """帧追踪器"""
    def __init__(self, max_matches=5, coordinate_threshold=50.0):
        self.frames = {}  # Dict[int, FrameInfo]
        self.current_id = 0
        self.timestamp = 0
        self.max_matches = max_matches
        self.coordinate_threshold = coordinate_threshold  # 坐标相似性阈值
        self.matcher = cv2.BFMatcher()
        
    def add_frame(self, position: Tuple[float, float], 
                 keypoints, descriptors) -> int:
        """添加新帧到追踪器"""
        frame_info = FrameInfo(
            frame_id=self.current_id,
            position=position,
            keypoints=keypoints,
            descriptors=descriptors,
            timestamp=self.timestamp
        )
        self.frames[self.current_id] = frame_info
        self.current_id += 1
        self.timestamp += 1
        return self.current_id - 1

    def get_frames_by_coordinate(self, target_pos: Tuple[float, float], 
                               is_horizontal: bool) -> List[FrameInfo]:
        """根据移动方向获取合适的参考帧
        
        Args:
            target_pos: 目标位置 (x, y)
            is_horizontal: 是否为水平移动
            
        Returns:
            符合条件的参考帧列表
        """
        candidates = []
        if is_horizontal:
            # 水平移动时，寻找y坐标相近的帧
            target_y = target_pos[1]
            for frame_info in self.frames.values():
                if abs(frame_info.position[1] - target_y) < self.coordinate_threshold:
                    candidates.append((
                        abs(frame_info.position[1] - target_y),  # y坐标差异作为距离
                        frame_info
                    ))
        else:
            # 垂直移动时，寻找x坐标相近的帧
            target_x = target_pos[0]
            for frame_info in self.frames.values():
                if abs(frame_info.position[0] - target_x) < self.coordinate_threshold:
                    candidates.append((
                        abs(frame_info.position[0] - target_x),  # x坐标差异作为距离
                        frame_info
                    ))
        
        # 按坐标差异排序并返回最近的n帧
        candidates.sort(key=lambda x: x[0])
        return [frame_info for _, frame_info in candidates[:self.max_matches]]

    def compute_weighted_position(self, 
                                current_kps, 
                                current_descs,
                                predicted_pos: Tuple[float, float],
                                movement_angle: float
                                ) -> Optional[Tuple[float, float]]:
        """计算加权后的位置
        
        Args:
            current_kps: 当前帧特征点
            current_descs: 当前帧描述子
            predicted_pos: 预测位置
            movement_angle: 运动角度（用于判断主要运动方向）
        """
        # 根据运动角度判断主要运动方向
        dx = math.cos(math.radians(movement_angle))
        dy = math.sin(math.radians(movement_angle))
        is_horizontal = abs(dx) > abs(dy)
        
        # 获取合适的参考帧
        reference_frames = self.get_frames_by_coordinate(predicted_pos, is_horizontal)
        
        if not reference_frames:
            return None
            
        all_positions = []
        all_weights = []
        
        # 对每个参考帧计算位置和权重
        for frame_info in reference_frames:
            pos, weight = self._compute_position_from_matches(
                current_kps, current_descs,
                frame_info,
                predicted_pos,
                is_horizontal
            )
            if pos is not None:
                all_positions.append(pos)
                all_weights.append(weight)
                
        if not all_positions:
            return None
            
        # 计算加权平均位置
        all_weights = np.array(all_weights)
        all_weights = all_weights / np.sum(all_weights)
        
        # 根据移动方向选择性地更新坐标
        if is_horizontal:
            # 水平移动时只更新x坐标
            weighted_x = sum(pos[0] * w for pos, w in zip(all_positions, all_weights))
            return (weighted_x, predicted_pos[1])
        else:
            # 垂直移动时只更新y坐标
            weighted_y = sum(pos[1] * w for pos, w in zip(all_positions, all_weights))
            return (predicted_pos[0], weighted_y)
        
    def _compute_position_from_matches(self,
                                     current_kps,
                                     current_descs,
                                     frame_info: FrameInfo,
                                     predicted_pos: Tuple[float, float],
                                     is_horizontal: bool
                                     ) -> Tuple[Optional[Tuple[float, float]], float]:
        """从特征匹配计算位置和权重"""
        # 特征匹配
        matches = []
        try:
            matches_pairs = self.matcher.knnMatch(current_descs, frame_info.descriptors, k=2)
            for m, n in matches_pairs:
                if m.distance < 0.75 * n.distance:
                    matches.append(m)
        except:
            return None, 0
            
        if len(matches) < 10:
            return None, 0
            
        # 计算相对位移
        movements = []
        for match in matches:
            pt1 = np.array(current_kps[match.queryIdx].pt)
            pt2 = np.array(frame_info.keypoints[match.trainIdx].pt)
            movement = pt2 - pt1
            movements.append(movement)
            
        mean_movement = np.mean(movements, axis=0)
        dx, dy = mean_movement
        
        # 计算在全景图坐标系中的位置
        abs_x = frame_info.position[0] + dx
        abs_y = frame_info.position[1] + dy
        
        # 根据移动方向计算相关距离
        if is_horizontal:
            relevant_dist = abs(abs_y - predicted_pos[1])  # 水平移动关注y方向差异
        else:
            relevant_dist = abs(abs_x - predicted_pos[0])  # 垂直移动关注x方向差异
        
        # 计算权重
        match_quality = len(matches) / 100  # 归一化匹配质量
        coordinate_weight = 1 / (1 + relevant_dist/self.coordinate_threshold)  # 坐标相似度权重
        
        final_weight = match_quality * coordinate_weight
        
        return (abs_x, abs_y), final_weight

In [3]:
class Panorama:
    def __init__(self, initial_frame):
        """初始化全景图系统"""
        self.frame_h, self.frame_w = initial_frame.shape[:2]
        
        # 创建初始画布（给一些边距以便扩展）
        margin = 100
        self.canvas = np.zeros((self.frame_h + 2*margin, self.frame_w + 2*margin, 3), dtype=np.uint8)
        
        # 使用浮点数存储当前位置（从中心开始）
        self.current_x = float(margin)
        self.current_y = float(margin)
        
        # 记录已使用区域的边界（仍然使用整数，因为这是实际的像素边界）
        self.min_x = margin
        self.max_x = margin + self.frame_w
        self.min_y = margin
        self.max_y = margin + self.frame_h
        
        # 初始化帧追踪器和特征检测器
        self.frame_tracker = FrameTracker()
        self.sift = cv2.SIFT_create()
        
        # 放置第一帧
        self.canvas[margin:margin+self.frame_h, margin:margin+self.frame_w] = initial_frame
        
        # 添加初始帧到追踪器
        initial_kps, initial_descs = self.sift.detectAndCompute(initial_frame, None)
        self.frame_tracker.add_frame(
            (self.current_x, self.current_y),
            initial_kps,
            initial_descs
        )

    def expand_canvas_if_needed(self, new_x_float, new_y_float):
        """在需要时扩展画布，接受浮点数坐标"""
        # 转换为整数进行边界检查
        new_x = int(np.floor(new_x_float))
        new_y = int(np.floor(new_y_float))
        
        need_expand = False
        pad_left = pad_right = pad_top = pad_bottom = 0
        
        if new_x < 0:
            pad_left = abs(new_x)
            need_expand = True
        if new_x + self.frame_w > self.canvas.shape[1]:
            pad_right = new_x + self.frame_w - self.canvas.shape[1]
            need_expand = True
        if new_y < 0:
            pad_top = abs(new_y)
            need_expand = True
        if new_y + self.frame_h > self.canvas.shape[0]:
            pad_bottom = new_y + self.frame_h - self.canvas.shape[0]
            need_expand = True
            
        if need_expand:
            # 创建新画布
            new_h = self.canvas.shape[0] + pad_top + pad_bottom
            new_w = self.canvas.shape[1] + pad_left + pad_right
            new_canvas = np.zeros((new_h, new_w, 3), dtype=np.uint8)
            
            # 复制原画布内容到新位置
            y_start = pad_top
            x_start = pad_left
            new_canvas[y_start:y_start+self.canvas.shape[0], 
                      x_start:x_start+self.canvas.shape[1]] = self.canvas
            
            # 更新坐标（保持浮点数精度）
            self.current_x += float(pad_left)
            self.current_y += float(pad_top)
            self.min_x += pad_left
            self.max_x += pad_left
            self.min_y += pad_top
            self.max_y += pad_top
            
            self.canvas = new_canvas
            return float(pad_left), float(pad_top)
            
        return 0.0, 0.0

    def add_frame(self, frame, angle, magnitude):
        """添加新帧到全景图"""
        # 获取当前帧的特征
        curr_kps, curr_descs = self.sift.detectAndCompute(frame, None)
        
        # 计算预测位置（使用运动估计）
        dx = magnitude * math.cos(math.radians(angle + 90))
        dy = magnitude * math.sin(math.radians(angle - 90))
        predicted_x = self.current_x + dx
        predicted_y = self.current_y + dy
        
        # 使用帧追踪器计算加权位置，这里传入angle参数
        weighted_pos = self.frame_tracker.compute_weighted_position(
            curr_kps,
            curr_descs,
            (predicted_x, predicted_y),
            angle
        )
        
        # 如果无法计算加权位置，使用预测位置
        if weighted_pos is None:
            new_x_float = predicted_x
            new_y_float = predicted_y
        else:
            new_x_float, new_y_float = weighted_pos
            print(f"\n预测位置: ({predicted_x:.2f}, {predicted_y:.2f})")
            print(f"加权位置: ({new_x_float:.2f}, {new_y_float:.2f})")
        
        # 扩展画布（使用浮点数坐标）
        offset_x, offset_y = self.expand_canvas_if_needed(new_x_float, new_y_float)
        new_x_float += offset_x
        new_y_float += offset_y
        
        # 在访问像素时才转换为整数
        new_x = int(np.floor(new_x_float))
        new_y = int(np.floor(new_y_float))
        
        # 判断是否为向下移动
        is_moving_down = dy > 0 and abs(dy) > abs(dx)
        
        if is_moving_down:
            # 向下移动时，新帧在下方
            self.canvas[new_y:new_y+self.frame_h, new_x:new_x+self.frame_w] = frame
        else:
            # 其他情况，新帧在上方
            # 先保存当前区域
            current_region = self.canvas[new_y:new_y+self.frame_h, new_x:new_x+self.frame_w].copy()
            # 放置新帧
            self.canvas[new_y:new_y+self.frame_h, new_x:new_x+self.frame_w] = frame
            # 将非零区域的原内容覆盖回来
            mask = (current_region != 0).any(axis=2)
            self.canvas[new_y:new_y+self.frame_h, new_x:new_x+self.frame_w][mask] = current_region[mask]
        
        # 更新位置信息（保持浮点数精度）
        self.current_x = new_x_float
        self.current_y = new_y_float
        self.min_x = min(self.min_x, new_x)
        self.max_x = max(self.max_x, new_x + self.frame_w)
        self.min_y = min(self.min_y, new_y)
        self.max_y = max(self.max_y, new_y + self.frame_h)
        
        # 添加当前帧到追踪器
        self.frame_tracker.add_frame(
            (new_x_float, new_y_float),
            curr_kps,
            curr_descs
        )

    def get_result(self):
        """获取最终结果"""
        return self.canvas[self.min_y:self.max_y, self.min_x:self.max_x]
    


In [4]:
def calculate_movement(img1, img2):
    """计算两帧之间的运动方向和幅度"""
    h, w = img1.shape[:2]
    
    sift = cv2.SIFT_create()
    keypoints1, descriptors1 = sift.detectAndCompute(img1, None)
    keypoints2, descriptors2 = sift.detectAndCompute(img2, None)
    
    print(f"\n== 调试信息 ==")
    print(f"图像尺寸: {w}x{h}")
    print(f"检测到的特征点数量: 帧1={len(keypoints1)}, 帧2={len(keypoints2)}")
    
    if descriptors1 is None or descriptors2 is None:
        print("未检测到特征点")
        return None, None
    
    # 特征匹配
    bf = cv2.BFMatcher()
    matches = bf.knnMatch(descriptors1, descriptors2, k=2)
    
    # 收集特征点位置信息用于分析分布
    frame1_points = []
    frame2_points = []
    
    good_matches = []
    for m, n in matches:
        if m.distance < 0.75 * n.distance:
            good_matches.append(m)
            pt1 = keypoints1[m.queryIdx].pt
            pt2 = keypoints2[m.trainIdx].pt
            frame1_points.append(pt1)
            frame2_points.append(pt2)
    
    print(f"良好匹配点数量: {len(good_matches)}")
            
    if len(good_matches) < 10:
        print("匹配点数量不足")
        return None, None
        
    # 计算平均移动向量
    movements = []
    for match in good_matches:
        pt1 = np.array(keypoints1[match.queryIdx].pt)
        pt2 = np.array(keypoints2[match.trainIdx].pt)
        movement = pt2 - pt1
        movements.append(movement)
    
    mean_movement = np.mean(movements, axis=0)
    dx, dy = mean_movement
    
    # 计算角度（0度为正上方，顺时针旋转）
    angle = math.degrees(math.atan2(-dy, dx))  # 使用-dy是因为图像坐标系y轴向下
    angle = (angle + 90) % 360
    
    # 计算移动幅度
    magnitude = math.sqrt(dx*dx + dy*dy)
    
    # 判断主要移动方向
    move_direction = "横向" if abs(dx) > abs(dy) else "纵向"
    print(f"主要移动方向: {move_direction}")
    print(f"移动角度: {angle:.2f}°")
    print(f"移动幅度: {magnitude:.2f}")
    print("================\n")
    
    return angle, magnitude


In [5]:
@dataclass
class PerformanceStats:
    """性能统计数据类"""
    total_time: float
    avg_frame_time: float
    max_memory: float
    avg_memory: float
    total_frames: int
    memory_readings: List[float]

class PerformanceMonitor:
    """性能监控类"""
    def __init__(self):
        self.start_time = time.time()
        self.process_end_time = None
        self.max_memory = 0
        self.memory_readings = []
        self.frame_count = 0

    def stop_timer(self):
        """停止计时"""
        self.process_end_time = time.time()

    def update(self) -> None:
        """更新性能监控数据"""
        current_memory = self._get_memory_usage()
        self.memory_readings.append(current_memory)
        self.max_memory = max(self.max_memory, current_memory)
        self.frame_count += 1

    def get_stats(self) -> PerformanceStats:
        """获取性能统计数据"""
        if self.process_end_time is None:
            self.stop_timer()
        total_time = self.process_end_time - self.start_time
        avg_memory = sum(self.memory_readings) / len(self.memory_readings) if self.memory_readings else 0
        
        return PerformanceStats(
            total_time=total_time,
            avg_frame_time=total_time/self.frame_count if self.frame_count else 0,
            max_memory=self.max_memory,
            avg_memory=avg_memory,
            total_frames=self.frame_count,
            memory_readings=self.memory_readings
        )

    @staticmethod
    def _get_memory_usage() -> float:
        """获取当前进程的内存使用量（MB）"""
        process = psutil.Process(os.getpid())
        return process.memory_info().rss / 1024 / 1024

    @staticmethod
    def print_stats(stats: PerformanceStats) -> None:
        """打印性能统计信息"""
        print(f"\n=== 性能统计 ===")
        print(f"总处理时间: {stats.total_time:.2f} 秒")
        print(f"平均每帧处理时间: {stats.avg_frame_time:.2f} 秒")
        print(f"最大内存使用: {stats.max_memory:.2f} MB")
        print(f"平均内存使用: {stats.avg_memory:.2f} MB")
        print(f"总处理帧数: {stats.total_frames}")
        print("================")


In [6]:
class DisplayManager:
    """显示管理类"""
    def __init__(self, max_width: int = 1920, max_height: int = 1080):
        self.MAX_DISPLAY_WIDTH = max_width
        self.MAX_DISPLAY_HEIGHT = max_height
        self.windows = {}

    def create_window(self, name: str) -> None:
        """创建命名窗口"""
        if name not in self.windows:
            cv2.namedWindow(name, cv2.WINDOW_NORMAL)
            self.windows[name] = True

    def resize_to_screen(self, image: np.ndarray) -> Tuple[np.ndarray, int, int]:
        """调整图像大小以适应屏幕"""
        height, width = image.shape[:2]
        width_ratio = self.MAX_DISPLAY_WIDTH / width
        height_ratio = self.MAX_DISPLAY_HEIGHT / height
        scale = min(width_ratio, height_ratio, 1.0)

        if scale < 1.0:
            new_width = int(width * scale)
            new_height = int(height * scale)
            resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
            return resized, new_width, new_height
        return image, width, height

    def show_progress(self, image: np.ndarray) -> None:
        """显示处理进度"""
        window_name = 'Panorama Progress'
        self.create_window(window_name)
        display_result, win_width, win_height = self.resize_to_screen(image)
        try:
            cv2.resizeWindow(window_name, win_width, win_height)
            cv2.imshow(window_name, display_result)
        except cv2.error as e:
            print(f"Warning: Failed to resize/show window '{window_name}': {str(e)}")

    def show_final_result(self, image: np.ndarray) -> None:
        """显示最终结果"""
        window_name = 'Final Panorama'
        self.create_window(window_name)
        display_result, win_width, win_height = self.resize_to_screen(image)
        try:
            cv2.resizeWindow(window_name, win_width, win_height)
            cv2.imshow(window_name, display_result)
        except cv2.error as e:
            print(f"Warning: Failed to resize/show window '{window_name}': {str(e)}")

    def cleanup(self) -> None:
        """清理显示资源"""
        # 逐个关闭窗口
        for window_name in self.windows:
            try:
                cv2.destroyWindow(window_name)
            except cv2.error:
                pass
        self.windows.clear()
        
        # 最后调用destroyAllWindows确保清理
        cv2.destroyAllWindows()
        # 给系统一些时间来处理窗口销毁
        cv2.waitKey(1)

class VideoProcessor:
    """视频处理类"""
    def __init__(self, video_path: str):
        self.video_path = video_path
        self.cap = cv2.VideoCapture(video_path)
        if not self.cap.isOpened():
            raise ValueError("无法打开视频文件")

    def set_start_frame(self, start_frame: int) -> None:
        """设置起始帧"""
        self.cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    def read_frame(self, skip_frames: int = 0) -> Tuple[bool, Optional[np.ndarray]]:
        """读取帧"""
        # 跳过指定数量的帧
        for _ in range(skip_frames):
            ret = self.cap.grab()
            if not ret:
                return False, None

        # 读取当前帧
        ret, frame = self.cap.read()
        return ret, frame if ret else None

    def cleanup(self) -> None:
        """清理视频资源"""
        if self.cap is not None:
            self.cap.release()
            self.cap = None


In [7]:
def main(video_path: str, frame_interval: int, start_frame: int = 1):
    """主函数
    Args:
        video_path: 视频文件路径
        frame_interval: 处理帧间隔
        start_frame: 起始帧位置（默认为1）
    """
    OUTPUT_DIR = "output"

    # 创建输出目录
    Path(OUTPUT_DIR).mkdir(exist_ok=True)

    # 初始化各个模块
    perf_monitor = PerformanceMonitor()
    display_manager = DisplayManager()
    video_processor = VideoProcessor(video_path)

    try:
        # 设置起始帧
        video_processor.set_start_frame(start_frame)

        # 读取第一帧
        ret, prev_frame = video_processor.read_frame()
        if not ret:
            raise ValueError("无法读取第一帧")

        # 初始化全景图
        panorama = Panorama(prev_frame)

        # 显示初始状态
        display_manager.show_progress(panorama.get_result())
        display_manager.create_window('Final Panorama')  # 预创建最终结果窗口

        while True:
            # 检查是否按下 'q' 键退出
            if cv2.waitKey(1) & 0xFF == ord('q'):
                print("\n用户终止处理")
                break

            # 读取当前帧
            ret, curr_frame = video_processor.read_frame(frame_interval - 1)
            if not ret:
                break

            # 计算运动
            angle, magnitude = calculate_movement(prev_frame, curr_frame)
            if angle is None or magnitude is None:
                print("\n无法计算帧间运动，跳过当前帧")
                continue

            # 添加到全景图
            try:
                panorama.add_frame(curr_frame, angle, magnitude)
                print(f"\r处理第 {perf_monitor.frame_count} 帧 - 方向: {angle:.1f}°, 幅度: {magnitude:.1f}", end="")
                display_manager.show_progress(panorama.get_result())
            except Exception as e:
                print(f"\n处理帧时出错: {str(e)}")
                break

            # 更新性能监控
            perf_monitor.update()

            # 更新前一帧
            prev_frame = curr_frame.copy()

            # 定期清理内存
            if perf_monitor.frame_count % 100 == 0:
                gc.collect()

        # 停止性能计时并保存结果
        perf_monitor.stop_timer()
        result = panorama.get_result()
        cv2.imwrite(f'{OUTPUT_DIR}/panorama.jpg', result)
        print(f"\n处理完成，共处理 {perf_monitor.frame_count} 帧")

        # 显示最终结果和性能统计
        display_manager.show_final_result(result)
        print("\n按任意键关闭窗口...")
        cv2.waitKey(0)

        # 打印性能统计
        stats = perf_monitor.get_stats()
        PerformanceMonitor.print_stats(stats)

    finally:
        # 清理资源
        video_processor.cleanup()
        display_manager.cleanup()
        gc.collect()



In [8]:
if __name__ == "__main__":
    video_path = "video/4.mp4"  # 替换为实际的视频路径
    frame_interval = 10
    main(video_path, frame_interval)


== 调试信息 ==
图像尺寸: 1920x1080
检测到的特征点数量: 帧1=11571, 帧2=12216
良好匹配点数量: 4434
主要移动方向: 纵向
移动角度: 1.95°
移动幅度: 73.95

处理第 0 帧 - 方向: 1.9°, 幅度: 74.0
== 调试信息 ==
图像尺寸: 1920x1080
检测到的特征点数量: 帧1=12216, 帧2=12893
良好匹配点数量: 4643
主要移动方向: 纵向
移动角度: 358.85°
移动幅度: 72.98

处理第 1 帧 - 方向: 358.9°, 幅度: 73.0
== 调试信息 ==
图像尺寸: 1920x1080
检测到的特征点数量: 帧1=12893, 帧2=13225
良好匹配点数量: 4996
主要移动方向: 纵向
移动角度: 359.58°
移动幅度: 75.07

处理第 2 帧 - 方向: 359.6°, 幅度: 75.1
== 调试信息 ==
图像尺寸: 1920x1080
检测到的特征点数量: 帧1=13225, 帧2=14018
良好匹配点数量: 5150
主要移动方向: 纵向
移动角度: 0.51°
移动幅度: 74.67

处理第 3 帧 - 方向: 0.5°, 幅度: 74.7
== 调试信息 ==
图像尺寸: 1920x1080
检测到的特征点数量: 帧1=14018, 帧2=14468
良好匹配点数量: 5319
主要移动方向: 纵向
移动角度: 0.04°
移动幅度: 73.69

处理第 4 帧 - 方向: 0.0°, 幅度: 73.7
== 调试信息 ==
图像尺寸: 1920x1080
检测到的特征点数量: 帧1=14468, 帧2=14776
良好匹配点数量: 5072
主要移动方向: 纵向
移动角度: 1.49°
移动幅度: 72.50

处理第 5 帧 - 方向: 1.5°, 幅度: 72.5
== 调试信息 ==
图像尺寸: 1920x1080
检测到的特征点数量: 帧1=14776, 帧2=14898
良好匹配点数量: 5293
主要移动方向: 纵向
移动角度: 0.85°
移动幅度: 73.35

处理第 6 帧 - 方向: 0.9°, 幅度: 73.4
== 调试信息 ==
图像尺寸: 1920x1080
检测到的特征点数量: 帧