# VideoLingo Colab (批处理 / 无 WebUI)

本 Notebook 用于在 Colab 的 T4 GPU 上按步骤运行 VideoLingo 的批处理模式（不使用 Streamlit WebUI）。
输出结果会保存在 `batch/output/`。

准备：LLM/ASR/TTS 的 API Key（若使用云端服务）。


## 0. 选择 T4 GPU
在 Colab 菜单：`Runtime -> Change runtime type -> Hardware accelerator` 选择 **T4 GPU**。


In [None]:
!nvidia-smi


## 1. 克隆仓库


In [None]:
!bash -lc 'if [ -d /content/VideoLingo ]; then cd /content/VideoLingo && git pull; else git clone https://github.com/MAE5blog/VideoLingo.git /content/VideoLingo; fi'
%cd /content/VideoLingo


## 2. 安装依赖（约 5-10 分钟）
包括系统依赖（ffmpeg/字体）与 Python 依赖，并安装 GGUF 翻译模型所需的 `llama_cpp`。


In [None]:
!apt-get update -y
!apt-get install -y ffmpeg fonts-noto-cjk libsndfile1 build-essential cmake ninja-build

!python -m pip install -U pip
!grep -vE '^torch' requirements.txt > /tmp/requirements_no_torch.txt
!python -m pip install -r /tmp/requirements_no_torch.txt

# 兜底安装：requirements 里可能因网络失败导致缺包
!python -m pip install json-repair

# 兜底安装：WhisperX 依赖（ctranslate2/faster-whisper）
!python -m pip install ctranslate2==4.4.0 tokenizers>=0.14.0
# 如果上面的 ctranslate2 安装失败，可启用下面这行预编译包
# !python -m pip install https://oplist.mae5.com/d/gdrive_lz26xg/share/ctranslate2-4.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl?sign=s08ZZHeakHFTTevv8Vja5I-6HPXyT4ojOHMesEXpZUQ=:0

# 兜底安装：WhisperX（本地 ASR），避免 PyAV 编译失败（py3.12）
!python -m pip install av==12.0.0
!python -m pip install faster-whisper==1.0.0 --no-deps
!python -m pip install git+https://github.com/m-bain/whisperx.git@7307306a9d8dd0d261e588cc933322454f853853 --no-deps
!python -m pip install pyannote.audio==3.1.1

# 兜底安装：字幕时长估计依赖
!python -m pip install syllables pypinyin g2p-en

# 兜底安装：字幕对齐/纠错依赖
!python -m pip install autocorrect-py

# 兜底安装：Demucs（人声分离）
!python -m pip install -U "demucs>=4.0.1"

# torchaudio 新版本保存音频可能需要 torchcodec
!python -m pip install torchcodec

# 修复 torchaudio 缺失 set_audio_backend（Python 3.12）
import site
from pathlib import Path
patch = """try:
    import torchaudio
    import sys
    import types
    if not hasattr(torchaudio, 'set_audio_backend'):
        def _noop_backend(_backend):
            return None
        torchaudio.set_audio_backend = _noop_backend
    if not hasattr(torchaudio, 'get_audio_backend'):
        torchaudio.get_audio_backend = lambda: 'soundfile'
    if not hasattr(torchaudio, 'list_audio_backends'):
        torchaudio.list_audio_backends = lambda: ['soundfile']
    backend_mod = sys.modules.get('torchaudio.backend')
    if backend_mod is None:
        backend_mod = types.ModuleType('torchaudio.backend')
        sys.modules['torchaudio.backend'] = backend_mod
    if not hasattr(backend_mod, '__path__'):
        backend_mod.__path__ = []
    backend_mod.set_audio_backend = torchaudio.set_audio_backend
    backend_mod.get_audio_backend = torchaudio.get_audio_backend
    backend_mod.list_audio_backends = torchaudio.list_audio_backends
    if 'torchaudio.backend.common' not in sys.modules:
        common_mod = types.ModuleType('torchaudio.backend.common')
        common_mod.set_audio_backend = torchaudio.set_audio_backend
        common_mod.get_audio_backend = torchaudio.get_audio_backend
        common_mod.list_audio_backends = torchaudio.list_audio_backends
        sys.modules['torchaudio.backend.common'] = common_mod
except Exception:
    pass

try:
    import numpy as np
    if not hasattr(np, 'NaN'):
        np.NaN = np.nan
except Exception:
    pass"""
dst = Path(site.getsitepackages()[0]) / 'sitecustomize.py'
dst.write_text(patch)
print('sitecustomize patched:', dst)

# 同步修补当前内核，避免还没重启就报错
try:
    import torchaudio
    import sys
    import types
    if not hasattr(torchaudio, 'set_audio_backend'):
        torchaudio.set_audio_backend = lambda *_args, **_kwargs: None
    if not hasattr(torchaudio, 'get_audio_backend'):
        torchaudio.get_audio_backend = lambda: 'soundfile'
    if not hasattr(torchaudio, 'list_audio_backends'):
        torchaudio.list_audio_backends = lambda: ['soundfile']
    backend_mod = sys.modules.get('torchaudio.backend')
    if backend_mod is None:
        backend_mod = types.ModuleType('torchaudio.backend')
        sys.modules['torchaudio.backend'] = backend_mod
    if not hasattr(backend_mod, '__path__'):
        backend_mod.__path__ = []
    backend_mod.set_audio_backend = torchaudio.set_audio_backend
    backend_mod.get_audio_backend = torchaudio.get_audio_backend
    backend_mod.list_audio_backends = torchaudio.list_audio_backends
    if 'torchaudio.backend.common' not in sys.modules:
        common_mod = types.ModuleType('torchaudio.backend.common')
        common_mod.set_audio_backend = torchaudio.set_audio_backend
        common_mod.get_audio_backend = torchaudio.get_audio_backend
        common_mod.list_audio_backends = torchaudio.list_audio_backends
        sys.modules['torchaudio.backend.common'] = common_mod
except Exception:
    pass
try:
    import numpy as np
    if not hasattr(np, 'NaN'):
        np.NaN = np.nan
except Exception:
    pass

# llama_cpp server 依赖
!python -m pip install fastapi uvicorn sse-starlette pydantic-settings requests-toolbelt

# 预编译 llama-cpp-python wheel（优先下载，失败则源码编译）
from pathlib import Path
import urllib.request

LLAMA_WHEEL_URL = "https://oplist.mae5.com/d/gdrive_lz26xg/share/llama_cpp_python-0.3.16-cp312-cp312-linux_x86_64.whl?sign=s08ZZHeakHFTTevv8Vja5I-6HPXyT4ojOHMesEXpZUQ=:0"
LLAMA_WHEEL_DIR = Path("/content/llama_wheels")
LLAMA_WHEEL_DIR.mkdir(parents=True, exist_ok=True)
LLAMA_WHEEL_PATH = LLAMA_WHEEL_DIR / "llama_cpp_python-0.3.16-cp312-cp312-linux_x86_64.whl"
if not LLAMA_WHEEL_PATH.exists():
    try:
        print("Downloading prebuilt llama-cpp-python wheel ...")
        urllib.request.urlretrieve(LLAMA_WHEEL_URL, LLAMA_WHEEL_PATH)
    except Exception as e:
        print("Prebuilt wheel download failed, will build from source:", e)

!bash -lc 'LLAMA_WHEEL_DIR=/content/llama_wheels; mkdir -p "$LLAMA_WHEEL_DIR"; if ls "$LLAMA_WHEEL_DIR"/llama_cpp_python-*.whl >/dev/null 2>&1; then python -m pip install "$LLAMA_WHEEL_DIR"/llama_cpp_python-*.whl; else MAX_JOBS=2 CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=75" FORCE_CMAKE=1 python -m pip wheel llama-cpp-python -w "$LLAMA_WHEEL_DIR" -v; python -m pip install "$LLAMA_WHEEL_DIR"/llama_cpp_python-*.whl; fi'


## 3. 配置 `config.yaml`
只需要修改下面代码块里的变量。

默认已按：**日语 → 简体中文**、**本地 Kotoba-Whisper + Sakura GGUF**、**不配音**。

- 翻译服务会在需要时自动启动（每一步完成后卸载模型并清理显存）。
- GGUF 模型会在首次翻译步骤自动下载到 `_model_cache/llm`。
- 如果只做字幕，把任务表里的 `Dubbing` 设为 `0`。


In [None]:
from pathlib import Path
try:
    from ruamel.yaml import YAML
except ModuleNotFoundError:
    import sys
    import subprocess
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'ruamel.yaml'])
    from ruamel.yaml import YAML

# ====== 需要你填写的配置 ======
LLM_API_KEY = "local"
LLM_BASE_URL = "http://127.0.0.1:8000"
LLM_MODEL = "SakuraLLM/Sakura-7B-Qwen2.5-v1.0-GGUF@sakura-7b-qwen2.5-v1.0-iq4xs.gguf"
LLM_SUPPORT_JSON = False
MAX_WORKERS = 1
SUMMARY_LENGTH = 2000

TARGET_LANGUAGE = "简体中文"
SOURCE_LANGUAGE = "ja"
WHISPER_RUNTIME = "local"  # "local" 或 "cloud"
WHISPER_MODEL = "kotoba-tech/kotoba-whisper-v2.2"
WHISPER_302_API_KEY = "YOUR_302_API_KEY"

USE_DEMUCS = True
BURN_SUBTITLES = True
YTB_RESOLUTION = "1080"  # 360/1080/best

# 本地翻译服务（llama_cpp + GGUF）
LOCAL_LLM_ENABLED = True
LOCAL_LLM_MANAGE = True
LOCAL_LLM_MODEL_REPO = "SakuraLLM/Sakura-7B-Qwen2.5-v1.0-GGUF"
LOCAL_LLM_MODEL_FILE = "sakura-7b-qwen2.5-v1.0-iq4xs.gguf"
LOCAL_LLM_MODEL_DIR = "./_model_cache/llm"
LOCAL_LLM_MODEL_PATH = ""
LOCAL_LLM_MODEL_ALIAS = LLM_MODEL
LOCAL_LLM_SERVER_HOST = "127.0.0.1"
LOCAL_LLM_SERVER_PORT = 8000
LOCAL_LLM_API_KEY = "local"
LOCAL_LLM_N_GPU_LAYERS = -1
LOCAL_LLM_N_CTX = 4096
LOCAL_LLM_N_THREADS = 8
LOCAL_LLM_N_BATCH = 512
LOCAL_LLM_CHAT_FORMAT = ""
LOCAL_LLM_LOG_PATH = "output/log/local_llm_server.log"

# 配音（可选）
TTS_METHOD = "edge_tts"  # azure_tts/openai_tts/fish_tts/gpt_sovits 等
EDGE_VOICE = "zh-CN-XiaoxiaoNeural"
# ============================

yaml = YAML()
cfg_path = Path("config.yaml")
with cfg_path.open("r", encoding="utf-8") as f:
    cfg = yaml.load(f)

cfg["api"]["key"] = LLM_API_KEY
cfg["api"]["base_url"] = LLM_BASE_URL
cfg["api"]["model"] = LLM_MODEL
cfg["api"]["llm_support_json"] = LLM_SUPPORT_JSON
cfg["max_workers"] = MAX_WORKERS
cfg["summary_length"] = SUMMARY_LENGTH

cfg["target_language"] = TARGET_LANGUAGE
cfg["whisper"]["language"] = SOURCE_LANGUAGE
cfg["whisper"]["detected_language"] = SOURCE_LANGUAGE
cfg["whisper"]["runtime"] = WHISPER_RUNTIME
cfg["whisper"]["model"] = WHISPER_MODEL
cfg["whisper"]["whisperX_302_api_key"] = WHISPER_302_API_KEY

cfg["demucs"] = USE_DEMUCS
cfg["burn_subtitles"] = BURN_SUBTITLES
cfg["ytb_resolution"] = YTB_RESOLUTION

cfg.setdefault("local_llm", {})
cfg["local_llm"]["enabled"] = LOCAL_LLM_ENABLED
cfg["local_llm"]["manage_server"] = LOCAL_LLM_MANAGE
cfg["local_llm"]["model_repo"] = LOCAL_LLM_MODEL_REPO
cfg["local_llm"]["model_file"] = LOCAL_LLM_MODEL_FILE
cfg["local_llm"]["model_dir"] = LOCAL_LLM_MODEL_DIR
cfg["local_llm"]["model_path"] = LOCAL_LLM_MODEL_PATH
cfg["local_llm"]["model_alias"] = LOCAL_LLM_MODEL_ALIAS
cfg["local_llm"]["server_host"] = LOCAL_LLM_SERVER_HOST
cfg["local_llm"]["server_port"] = LOCAL_LLM_SERVER_PORT
cfg["local_llm"]["api_key"] = LOCAL_LLM_API_KEY
cfg["local_llm"]["n_gpu_layers"] = LOCAL_LLM_N_GPU_LAYERS
cfg["local_llm"]["n_ctx"] = LOCAL_LLM_N_CTX
cfg["local_llm"]["n_threads"] = LOCAL_LLM_N_THREADS
cfg["local_llm"]["n_batch"] = LOCAL_LLM_N_BATCH
cfg["local_llm"]["chat_format"] = LOCAL_LLM_CHAT_FORMAT
cfg["local_llm"]["log_path"] = LOCAL_LLM_LOG_PATH

cfg["tts_method"] = TTS_METHOD
cfg["edge_tts"]["voice"] = EDGE_VOICE

with cfg_path.open("w", encoding="utf-8") as f:
    yaml.dump(cfg, f)

print("config.yaml 已更新")


## 4. 准备任务

默认用**外链下载**视频（自动保存文件名到 `batch/input/`）。也可改为本地上传或 Google Drive。

- 外链：在下方设置 `VIDEO_URL`
- 本地视频：上传到 `batch/input/`，任务表里填写文件名（不含路径）
- YouTube：任务表里直接填写链接

大文件建议先挂载 Google Drive，再复制到 `batch/input/`。


In [None]:
# 默认外链下载（留空则跳过）
VIDEO_URL = "https://oplist.mae5.com/d/本地存储/1.mp4?sign=b58L3c5JYGAMNwLcO09asS9CV8aHTlGBXiO3Yi8Pe0Y=:0"
OUT_PATH = ""  # 可选：自定义保存文件名

import os
import urllib.parse
from pathlib import Path

import requests

def guess_filename(url: str) -> str:
    parts = urllib.parse.urlsplit(url)
    name = Path(urllib.parse.unquote(parts.path)).name
    return name or "video.mp4"

DOWNLOADED_FILE = ""
if VIDEO_URL:
    os.makedirs("batch/input", exist_ok=True)
    filename = OUT_PATH or guess_filename(VIDEO_URL)
    out_path = Path("batch/input") / filename
    if not out_path.exists():
        with requests.get(VIDEO_URL, stream=True, timeout=60) as r:
            r.raise_for_status()
            with open(out_path, "wb") as f:
                for chunk in r.iter_content(chunk_size=1024 * 1024):
                    if chunk:
                        f.write(chunk)
    DOWNLOADED_FILE = out_path.name
    print("Downloaded:", out_path)
else:
    print("VIDEO_URL 为空，跳过外链下载")


In [None]:
from google.colab import files
import os
import shutil

os.makedirs("batch/input", exist_ok=True)
uploaded = files.upload()
for name in uploaded:
    shutil.move(name, f"batch/input/{name}")

if uploaded:
    DOWNLOADED_FILE = list(uploaded.keys())[0]
print("Uploaded:", list(uploaded.keys()))


如果使用 Google Drive，可运行下面代码挂载，然后手动复制文件到 `batch/input/`：


In [None]:
from google.colab import drive
drive.mount("/content/drive")


创建/更新任务表 `batch/tasks_setting.xlsx`：


In [None]:
import pandas as pd

VIDEO_FILE = globals().get("DOWNLOADED_FILE") or "your_video.mp4"  # 也可以写 YouTube 链接

tasks = [
    {
        "Video File": VIDEO_FILE,
        "Source Language": "ja",
        "Target Language": "简体中文",
        "Dubbing": 0,  # 0=不配音, 1=配音
        "Status": ""
    }
]

df = pd.DataFrame(tasks, columns=["Video File", "Source Language", "Target Language", "Dubbing", "Status"])
df.to_excel("batch/tasks_setting.xlsx", index=False)
df


## 5. 运行批处理


In [None]:
!PYTHONPATH=. python batch/utils/batch_processor.py


## 6. 导出结果
结果会在 `batch/output/`，可以打包下载。


In [None]:
!ls -la batch/output
!zip -r videolingo_output.zip batch/output

from google.colab import files
files.download("videolingo_output.zip")
