# Colab：Deep Online Fused Video Stabilization 推理与指标

说明：本 notebook 直接在 Colab GPU 上运行，完成仓库克隆、依赖安装、示例数据下载、推理、以及原视频与稳定视频的简单稳定性指标对比并导出 CSV。每个代码块按顺序执行即可。


## 环境要求
- 在 Colab 菜单选择：Runtime → Change runtime type → Hardware accelerator 选 GPU。
- 每个代码块都是独立单元，按顺序运行。


In [None]:
# 克隆仓库并进入 dvs 目录
!git clone --depth 1 https://github.com/googleinterns/deep-stabilization.git
%cd deep-stabilization/dvs

!pwd
!ls


Cloning into 'deep-stabilization'...
remote: Enumerating objects: 78, done.[K
remote: Counting objects: 100% (78/78), done.[K
remote: Compressing objects: 100% (68/68), done.[K
remote: Total 78 (delta 8), reused 47 (delta 6), pack-reused 0 (from 0)[K
Receiving objects: 100% (78/78), 39.71 MiB | 25.69 MiB/s, done.
Resolving deltas: 100% (8/8), done.
/content/deep-stabilization/dvs
/content/deep-stabilization/dvs
checkpoint  flownet2		       loss.py	   requirements.txt
conf	    gyro		       metrics.py  train.py
data	    inference.py	       model.py    util.py
dataset.py  load_frame_sensor_data.py  printer.py  warp


In [None]:
# 安装依赖（避开旧版本兼容问题）
!pip -q install --upgrade pip
!pip -q install colorama ffmpeg imageio matplotlib pytz PyYAML tensorboardX tqdm pillow pandas
!pip -q install opencv-python-headless scikit-image
# Py3.12 上 1.10.1 没有 wheel，使用兼容的 1.11.x
!pip -q install "scipy==1.11.4"

import torch, torchvision, sys, os
print('PyTorch:', torch.__version__, 'CUDA available:', torch.cuda.is_available())


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.6/1.8 MB[0m [31m18.2 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m30.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for ffmpeg (pyproject.toml) ... [?25l[?25hdone
[31mERROR: Ignored the following yanked versions: 1.11.0, 1.14.0rc1[0m[31m
[0m[31mERROR: Ignored the following versions that require a different python version: 1.10.0 Requires-Python >=3.8,<3.12; 1.10.0rc1 Requires-Python >=3.8,<3.12; 1.10.0rc2 Requires-Python >=3.8,<3.12; 1.10.1 Requires-Python >=3.8,<3.12; 1.6.2 Requires-Python >=3.7,<3.10; 1.6.3 Requires-Python >=3.7,<3.10; 1.7.0 Re

In [None]:
# 下载示例数据（作者提供的 sample video）
!pip -q install gdown

!rm -rf video sample_video.zip
!gdown --fuzzy -O sample_video.zip "https://drive.google.com/file/d/1PpF3-6BbQKy9fldjIfwa5AlbtQflx3sG/view?usp=sharing"
!unzip -q sample_video.zip

!ls video


Downloading...
From (original): https://drive.google.com/uc?id=1PpF3-6BbQKy9fldjIfwa5AlbtQflx3sG
From (redirected): https://drive.google.com/uc?id=1PpF3-6BbQKy9fldjIfwa5AlbtQflx3sG&confirm=t&uuid=07b1c4d0-3dcd-4f0a-a66b-f0cd7dba2407
To: /content/deep-stabilization/dvs/sample_video.zip
100% 914M/914M [00:13<00:00, 68.4MB/s]
s_114_outdoor_running_trail_daytime


In [None]:
# 检查/准备 5 个待推理序列
import os, shutil

DESIRED_VIDEO_COUNT = 5
seq_dirs = [d for d in sorted(os.listdir("./video")) if os.path.isdir(os.path.join("video", d))]
print(f"当前检测到 {len(seq_dirs)} 个序列:", seq_dirs)

if len(seq_dirs) < DESIRED_VIDEO_COUNT:
    print(f"⚠️ 还差 {DESIRED_VIDEO_COUNT - len(seq_dirs)} 个序列。请将包含原视频及传感器/光流文件的文件夹解压到 ./video 下（结构与示例序列一致）。")
    print("如需要，可启用下方的重复样本开关，仅用于验证流程：")

ALLOW_DUPLICATE_SAMPLE = False
if len(seq_dirs) < DESIRED_VIDEO_COUNT and ALLOW_DUPLICATE_SAMPLE and seq_dirs:
    sample_src = os.path.join("video", seq_dirs[0])
    while len(seq_dirs) < DESIRED_VIDEO_COUNT:
        dup_name = f"{seq_dirs[0]}_dup{len(seq_dirs)}"
        dup_dst = os.path.join("video", dup_name)
        if os.path.exists(dup_dst):
            seq_dirs.append(dup_name)
            continue
        shutil.copytree(sample_src, dup_dst)
        seq_dirs.append(dup_name)
        print("已复制示例序列到", dup_dst)
print("最终序列列表:", seq_dirs)


In [None]:
# （可选）数据可视化/预处理示例：生成传感器曲线等
# 如果不需要可跳过此单元
!python load_frame_sensor_data.py --config ./conf/stabilzation.yaml --dir_path ./video


Traceback (most recent call last):
  File "/content/deep-stabilization/dvs/load_frame_sensor_data.py", line 118, in <module>
    main(args = args)
  File "/content/deep-stabilization/dvs/load_frame_sensor_data.py", line 100, in main
    cf = yaml.load(open(config_file, 'r'))
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: load() missing 1 required positional argument: 'Loader'


In [None]:
# 运行推理（使用仓库自带的预训练模型）
# 默认会读取 ./checkpoint/stabilzation/stabilzation_last.checkpoint
!python inference.py --config ./conf/stabilzation.yaml --dir_path ./video

# 查看输出文件（稳定后视频和曲线）
!ls test/stabilzation


Traceback (most recent call last):
  File "/content/deep-stabilization/dvs/inference.py", line 217, in <module>
    main(args = args)
  File "/content/deep-stabilization/dvs/inference.py", line 190, in main
    cf = yaml.load(open(config_file, 'r'))
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: load() missing 1 required positional argument: 'Loader'
ls: cannot access 'test/stabilzation': No such file or directory


## 计算稳定性指标（原视频 vs 稳定视频）
新增更通用、可比较的指标，并对多视频做统计：
- `mean_flow_*`/`std_flow_*`：逐帧 Farneback 光流模长的均值/标准差。
- `flow_p95_*`/`flow_iqr_*`：逐帧光流模长分布的 95 分位与四分位距，衡量抖动尾部与稳定性。
- `flow_jitter_std_*`：逐帧光流中位数的一阶差分标准差，反映抖动突变。
- `temporal_mse_*`：相邻帧像素 MSE，越小越平滑。
- `temporal_ssim_*`：相邻帧 SSIM，越大越平滑。
- `stability_gain`：`(orig_mean_flow - stab_mean_flow) / orig_mean_flow`。
结果会写到 `test/stabilzation/stabilization_metrics.csv`，并生成跨 5 段视频的统计 `stabilization_metrics_summary.csv`。


In [None]:
import os, cv2, numpy as np, pandas as pd
from typing import List, Tuple
from skimage.metrics import structural_similarity as ssim


def load_video(path: str, max_frames: int = None, resize: float = 1.0) -> Tuple[List[np.ndarray], float, Tuple[int, int]]:
    cap = cv2.VideoCapture(path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if resize != 1.0:
            frame = cv2.resize(frame, (0, 0), fx=resize, fy=resize, interpolation=cv2.INTER_LINEAR)
        frames.append(frame)
        if max_frames and len(frames) >= max_frames:
            break
    cap.release()
    return frames, fps, (frames[0].shape[1], frames[0].shape[0]) if frames else (0, 0)


def flow_stats(frames: List[np.ndarray]):
    med_per_frame = []
    for i in range(len(frames) - 1):
        g1 = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
        g2 = cv2.cvtColor(frames[i + 1], cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(g1, g2, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        mag, _ = cv2.cartToPolar(flow[..., 0], flow[..., 1])
        med_per_frame.append(np.median(mag))
    if len(med_per_frame) == 0:
        return {
            "mean": 0.0,
            "std": 0.0,
            "p95": 0.0,
            "iqr": 0.0,
            "jitter_std": 0.0,
        }
    arr = np.array(med_per_frame)
    diffs = np.diff(arr)
    return {
        "mean": float(np.mean(arr)),
        "std": float(np.std(arr)),
        "p95": float(np.percentile(arr, 95)),
        "iqr": float(np.percentile(arr, 75) - np.percentile(arr, 25)),
        "jitter_std": float(np.std(diffs)) if len(diffs) > 0 else 0.0,
    }


def temporal_mse(frames: List[np.ndarray]) -> float:
    vals = []
    for i in range(len(frames) - 1):
        diff = frames[i].astype(np.float32) - frames[i + 1].astype(np.float32)
        vals.append(np.mean(diff ** 2))
    return float(np.mean(vals)) if vals else 0.0


def temporal_ssim(frames: List[np.ndarray]) -> float:
    vals = []
    for i in range(len(frames) - 1):
        g1 = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
        g2 = cv2.cvtColor(frames[i + 1], cv2.COLOR_BGR2GRAY)
        score = ssim(g1, g2, data_range=255)
        vals.append(score)
    return float(np.mean(vals)) if vals else 0.0


def compare_one(orig_path: str, stab_path: str, max_frames: int = 600, resize: float = 0.5):
    orig_frames, fps_o, _ = load_video(orig_path, max_frames=max_frames, resize=resize)
    stab_frames, fps_s, _ = load_video(stab_path, max_frames=max_frames, resize=resize)
    n = min(len(orig_frames), len(stab_frames))
    if n < 2:
        raise ValueError(f"帧数不足，orig={len(orig_frames)}, stab={len(stab_frames)}")
    orig_frames = orig_frames[:n]
    stab_frames = stab_frames[:n]

    o_flow = flow_stats(orig_frames)
    s_flow = flow_stats(stab_frames)
    o_mse = temporal_mse(orig_frames)
    s_mse = temporal_mse(stab_frames)
    o_ssim = temporal_ssim(orig_frames)
    s_ssim = temporal_ssim(stab_frames)

    return {
        "orig_video": orig_path,
        "stab_video": stab_path,
        "num_frames_used": n,
        "fps_orig": fps_o,
        "fps_stab": fps_s,
        "mean_flow_orig": o_flow["mean"],
        "mean_flow_stab": s_flow["mean"],
        "std_flow_orig": o_flow["std"],
        "std_flow_stab": s_flow["std"],
        "flow_p95_orig": o_flow["p95"],
        "flow_p95_stab": s_flow["p95"],
        "flow_iqr_orig": o_flow["iqr"],
        "flow_iqr_stab": s_flow["iqr"],
        "flow_jitter_std_orig": o_flow["jitter_std"],
        "flow_jitter_std_stab": s_flow["jitter_std"],
        "temporal_mse_orig": o_mse,
        "temporal_mse_stab": s_mse,
        "temporal_ssim_orig": o_ssim,
        "temporal_ssim_stab": s_ssim,
        "stability_gain": 0.0 if o_flow["mean"] == 0 else (o_flow["mean"] - s_flow["mean"]) / o_flow["mean"],
    }


def run_all(video_root: str = "./video", stab_root: str = "./test/stabilzation", desired_count: int = 5):
    records = []
    seq_list = [d for d in sorted(os.listdir(video_root)) if os.path.isdir(os.path.join(video_root, d))]
    if desired_count is not None:
        if len(seq_list) < desired_count:
            print(f"⚠️ 当前只有 {len(seq_list)} 个序列，少于期望的 {desired_count} 个，请补齐后重跑以避免偶然性。")
        seq_list = seq_list[:desired_count]
    for seq in seq_list:
        seq_dir = os.path.join(video_root, seq)
        mp4s = [f for f in os.listdir(seq_dir) if f.lower().endswith(".mp4") and "gimbal" not in f.lower()]
        if not mp4s:
            print(f"跳过 {seq}，未找到 mp4")
            continue
        mp4 = mp4s[0]
        orig_path = os.path.join(seq_dir, mp4)
        stab_path = os.path.join(stab_root, f"{seq}_stab.mp4")
        if not os.path.exists(stab_path):
            print(f"跳过，未找到稳定视频: {stab_path}")
            continue
        try:
            rec = compare_one(orig_path, stab_path)
            rec["seq"] = seq
            records.append(rec)
            print(f"done: {seq}")
        except Exception as e:
            print(f"失败 {seq}: {e}")
    if not records:
        print("未生成任何指标")
        return None, None
    df = pd.DataFrame(records)
    out_path = os.path.join(stab_root, "stabilization_metrics.csv")
    df.to_csv(out_path, index=False)
    print("保存:", out_path)

    metric_cols = [c for c in df.columns if c not in {"seq", "orig_video", "stab_video"}]
    summary_df = df[metric_cols].agg(["mean", "std", "min", "max"])
    summary_path = os.path.join(stab_root, "stabilization_metrics_summary.csv")
    summary_df.to_csv(summary_path)
    print("保存统计:", summary_path)
    return df, summary_df


# 运行指标计算
metrics_df, metrics_summary = run_all()
metrics_df


跳过，未找到稳定视频: ./test/stabilzation/s_114_outdoor_running_trail_daytime_stab.mp4
未生成任何指标


In [None]:
# 修复依赖问题：将 yaml.load 替换为 yaml.safe_load（防止 Loader 报错）
!sed -i "s/yaml.load(open(config_file, 'r'))/yaml.safe_load(open(config_file, 'r'))/g" inference.py load_frame_sensor_data.py

# 创建日志目录（修复 FileNotFoundError）
!mkdir -p log

# 重新运行推理（确保生成 test/stabilzation 目录）
!python inference.py --config ./conf/stabilzation.yaml --dir_path ./video

# 重新计算指标（如果 run_all 函数已定义）
import os
try:
    if 'run_all' in globals():
        print("正在重新计算指标...")
        metrics_df, metrics_summary = run_all(desired_count=5)
        if metrics_df is not None:
            display(metrics_df)
        if metrics_summary is not None:
            print("跨视频统计：")
            display(metrics_summary)
    else:
        print("run_all 函数未定义，跳过指标计算。请确保已运行上一个代码块。")
except Exception as e:
    print(f"指标计算出错: {e}")
    import traceback
    traceback.print_exc()

# 打包下载推理结果和指标
!zip -qr results_stabilzation.zip test/stabilzation
from google.colab import files
if os.path.exists('results_stabilzation.zip'):
    files.download('results_stabilzation.zip')
else:
    print("错误：无法找到 results_stabilzation.zip，请检查推理步骤是否成功。")


Running Inference: 1/1
------Load Pretrined Model--------
./checkpoint/stabilzation/stabilzation_last.checkpoint
-----------Load Dataset----------
./video/s_114_outdoor_running_trail_daytime
frame: (511, 7)    gyro: (3746, 5)    ois: (3916, 3)    flo_path: 470    flo_shape: (270, 480, 2)    
Fininsh Load data
  grid_t = torch.nn.functional.upsample_bilinear(grid_t, size = (h, w)) # [B,C(xy),H,W]
  grid_t_1 = torch.nn.functional.upsample_bilinear(grid_t_1, size = (h, w)) # [B,C(xy),H,W]
Step: 100/470
Step: 200/470
Step: 300/470
Step: 400/470

Loss: follow, angle, smooth, c2_smooth, undefine, optical
[8.4722955e-03 1.9884116e-03 1.2372581e-05 1.1924115e-05 1.0951313e-03
 8.4513842e-05] 

(471, 5)
Time_used: 0.8719 minutes
------Start Warping Video--------
------Start Visual Result--------
(1080, 1920, 3)
Video length:  470
Frame: 0/470
Frame: 100/470
Frame: 200/470
Frame: 300/470
Frame: 400/470
正在重新计算指标...
done: s_114_outdoor_running_trail_daytime
保存: ./test/stabilzation/stabilization_me

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

从 notebook 的推理日志可知示例视频（s_114_outdoor_running_trail_daytime）的主要规格：
分辨率：1920 x 1080（日志里的 (1080, 1920, 3)）。
帧数：约 470 帧（日志显示 Video length: 470）。
光流帧尺寸：270 x 480 x 2（用于模型的 flo 输入）。
gyro/ois/帧对齐后数据行数：frame: (511, 7) gyro: (3746, 5) ois: (3916, 3)（输入元数据）。


In [None]:
# 查看视频 FPS / 帧数 / 分辨率
import os, cv2, pandas as pd

def list_video_fps(video_root: str = "./video"):
    rows = []
    for seq in sorted(os.listdir(video_root)):
        seq_dir = os.path.join(video_root, seq)
        if not os.path.isdir(seq_dir):
            continue
        mp4s = [f for f in os.listdir(seq_dir) if f.lower().endswith(".mp4")]
        for mp4 in mp4s:
            path = os.path.join(seq_dir, mp4)
            cap = cv2.VideoCapture(path)
            fps = cap.get(cv2.CAP_PROP_FPS)
            frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
            w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
            h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
            cap.release()
            rows.append({
                "seq": seq,
                "file": mp4,
                "fps": fps,
                "frames": frames,
                "width": w,
                "height": h,
            })
    if rows:
        df = pd.DataFrame(rows)
        display(df)
    else:
        print("未找到 mp4 文件，请先解压/放置到 ./video")

list_video_fps()


In [None]:
import os
import pandas as pd

# 读取生成的指标 CSV 文件
csv_path = './test/stabilzation/stabilization_metrics.csv'
summary_path = './test/stabilzation/stabilization_metrics_summary.csv'
if os.path.exists(csv_path):
    df_metrics = pd.read_csv(csv_path)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', 1000)
    display(df_metrics)
else:
    print("未找到指标文件，请确认上一步是否执行成功。")

if os.path.exists(summary_path):
    df_summary = pd.read_csv(summary_path, index_col=0)
    print("跨视频统计：")
    display(df_summary)
else:
    print("未找到统计文件，请确认 run_all 已运行。")

Unnamed: 0,orig_video,stab_video,num_frames_used,fps_orig,fps_stab,mean_flow_orig,mean_flow_stab,std_flow_orig,std_flow_stab,temporal_mse_orig,temporal_mse_stab,stability_gain,seq
0,./video/s_114_outdoor_running_trail_daytime/Co...,./test/stabilzation/s_114_outdoor_running_trai...,470,30.020508,30.02,9.107697,3.28857,7.322388,1.550897,1093.572266,1028.535034,0.638924,s_114_outdoor_running_trail_daytime
