<a href="https://colab.research.google.com/github/Dao-you/Whisper-for-Meeting-on-Colab/blob/main/Whisper_for_Meeting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -*- coding: utf-8 -*-
# =========================================================
# Whisper Automatic Subtitle Generation: GPU Transcription + CPU Denoising + OpenCC Post-processing (Traditional/Simplified Conversion)
# - Transcription: faster-whisper (CUDA, compute: int8_float16→float16→int8)
# - Denoising: ffmpeg afftdn (CPU)
# - Progress: Real-time printing of "current sentence + video total length percentage"
# - Network source download and output: MyDrive/whisper; Files in Drive: Output to the same folder
# - Prompts "Delete runtime and restart" if download is blocked or abnormal
# =========================================================

# Restrict multithreading (more stable)
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# [1/8] Mount Google Drive
from google.colab import drive
try:
    drive.mount("/content/gdrive")
except:
    drive.mount("/content/gdrive", force_remount=True)

import sys, gc, shutil, datetime, subprocess
from pathlib import Path

ROOT = Path("/content/gdrive/MyDrive")
WHISPER_DIR = ROOT / "whisper"
WHISPER_DIR.mkdir(exist_ok=True, parents=True)
os.chdir(ROOT)
print(f"→ 當前工作目錄：{os.getcwd()}")

# [2/8] User Form Parameters
#@title Whisper / Faster-Whisper（GPU 轉錄 + CPU 降噪 + OpenCC）
#@markdown # 輸入雲端硬碟檔案（相對於 MyDrive）或影片網址（YouTube/HTTP）
filename = "whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4"  #@param {type:"string"}
#@markdown ✔ 網路來源是否另存到 MyDrive/whisper
save_video_to_google_drive = True  #@param {type:"boolean"}
#@markdown # 模型（GPU 可用 large-v3；若 VRAM 吃緊可降 medium）
model_size = "large-v3"  #@param ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
#@markdown # 語言（不翻譯，只轉錄）
language = "自動偵測"  #@param ["自動偵測", "中文", "英文"]
#@markdown # 降噪方法（CPU）
denoise_method = "afftdn (建議)"  #@param ["afftdn (建議)", "none"]
#@markdown # 文字正規化：OpenCC 轉換（套用在輸出 SRT/TXT）
text_postprocess = "臺灣繁體中文（預設）"  #@param ["臺灣繁體中文（預設）","香港繁體中文","大陸簡體中文","關閉"]
#@markdown （選填）YouTube cookies 檔（相對 MyDrive；Netscape 格式，如 `cookies/youtube.txt`）
youtube_cookies_txt_path = ""  #@param {type:"string"}

language_code_map = {"自動偵測": None, "中文":"zh", "英文":"en"}
language_code = language_code_map[language]

# Developer Options (Do not put in Markdown form)
# These options allow fine-tuning parameters without affecting normal operation.
DEBUG_MODE = False # Set to True for more detailed logging

# --- Transcription Parameters ---
TRANSCRIPTION_BEAM_SIZE_PRIMARY = 3
TRANSCRIPTION_CHUNK_LENGTH_PRIMARY = 20
TRANSCRIPTION_BEAM_SIZE_FALLBACK = 1 # Used if primary fails
TRANSCRIPTION_CHUNK_LENGTH_FALLBACK = 15 # Used if primary fails

# --- Denoising Parameters ---
DENOISE_NOISE_FLOOR_DB = -25

# --- Filtering Parameters ---
FILTER_MIN_DURATION_SHORT = 1.5 # Minimum duration for short segments
FILTER_AVG_LOGPROB_THRESHOLD = -1.0 # Avg log probability threshold for short segments
FILTER_MIN_DURATION_SPEECH_PROB = 2.0 # Minimum duration for speech probability filtering
FILTER_NO_SPEECH_PROB_THRESHOLD = 0.6 # No speech probability threshold

# [3/8] Install Dependencies
if DEBUG_MODE: print("[Install] faster-whisper / yt-dlp / soundfile / opencc ...")
!pip -q install -U faster-whisper yt-dlp soundfile opencc-python-reimplemented > /dev/null

import soundfile as sf
from faster_whisper import WhisperModel
from opencc import OpenCC

# ---------- Utility Functions ----------
def suggest_runtime_reset():
    print("\n🧹 建議動作（Colab）")
    print("1) 依序：『執行階段 Runtime』 → 『刪除執行階段/還原出廠設定 Factory reset runtime』")
    print("2) 重新執行本 Notebook（從掛載雲端硬碟那格開始）\n", flush=True)

def run_cmd(cmd:list, check=True):
    if DEBUG_MODE: print("  $", " ".join(cmd))
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if check and p.returncode != 0:
        raise RuntimeError(f"命令失敗：{' '.join(cmd)}")
    return p

def is_youtube_url(s:str)->bool:
    return isinstance(s, str) and ("youtu.be" in s or "youtube.com" in s)
def is_http_url(s:str)->bool:
    return isinstance(s, str) and s.lower().startswith("http")
def to_abs_mydrive(p:str)->Path:
    return (Path(p) if p.startswith("/") else (ROOT / p)).resolve()
def fmt_ts_srt(t:float)->str:
    h = int(t//3600); m = int((t%3600)//60); s = t - h*3600 - m*60
    return f"{h:02d}:{m:02d}:{int(s):02d},{int(round((s-int(s))*1000)):03d}"
def verify_wav_ok(path: Path)->bool:
    try:
        info = sf.info(str(path))
        return info.samplerate > 0 and info.channels in (1, 2)
    except Exception:
        return False

# OpenCC converter setup
def build_opencc_pipeline(choice:str):
    if choice.startswith("臺灣"):
        return [OpenCC('s2t'), OpenCC('t2tw')]
    if choice.startswith("香港"):
        return [OpenCC('s2t'), OpenCC('t2hk')]
    if choice.startswith("大陸"):
        return [OpenCC('t2s')]
    return []  # Disable

def apply_opencc(text:str, pipeline)->str:
    for cc in pipeline:
        text = cc.convert(text)
    return text

def ytdl(yturl:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    cmd = ["yt-dlp", "-f", "mp4", "-o", str(tmp / "%(title)s.%(ext)s")]
    if youtube_cookies_txt_path.strip():
        cookies_abs = to_abs_mydrive(youtube_cookies_txt_path.strip())
        if cookies_abs.exists():
            cmd += ["--cookies", str(cookies_abs)]
        else:
            if DEBUG_MODE: print(f"⚠️ 找不到 cookies 檔：{cookies_abs}（改為不帶 cookies）")
    cmd.append(yturl)
    if DEBUG_MODE: print("[Download] Getting YouTube video ...")
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if p.returncode != 0:
        if "Sign in to confirm" in (p.stdout or ""):
            print("\n❗YouTube 要求登入/驗證，請提供 cookies 或先自行下載到雲端硬碟。")
        print("🔄 若多次失敗，請刪除執行階段並重啟後重試。")
        suggest_runtime_reset()
        raise RuntimeError("yt-dlp 下載失敗")
    files = list(tmp.glob("*"))
    if not files:
        print("🔄 下載為空，建議刪除執行階段再重試。")
        suggest_runtime_reset()
        raise FileNotFoundError("YouTube 下載失敗：/tmp/dl 為空")
    f = files[0]
    if save_video_to_google_drive:
        shutil.copy2(f, WHISPER_DIR / f.name)
    return f

def http_dl(url:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting HTTP(S) video ...")
    run_cmd(["curl", "-L", "-o", str(out), url])
    if save_video_to_google_drive:
        shutil.copy2(out, WHISPER_DIR / out.name)
    return out

# Extract audio: ffmpeg -> 16k/mono WAV
def ffmpeg_extract_wav(in_path:Path, out_wav:Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_path),"-vn","-ac","1","-ar",str(sr),"-f","wav",str(out_wav)]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout);
        raise RuntimeError("ffmpeg 轉 WAV 失敗")

# CPU Denoising: ffmpeg afftdn
def ffmpeg_afftdn(in_wav: Path, out_wav: Path, noise_floor_db=DENOISE_NOISE_FLOOR_DB):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-af",f"afftdn=nf={noise_floor_db}",
           "-ac","1","-ar","16000","-f","wav",str(out_wav)]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout);
        raise RuntimeError("ffmpeg afftdn 失敗")

# Safeguard: Repack WAV header if format is strange
def ffmpeg_repack_wav(in_wav: Path, out_wav: Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-acodec","pcm_s16le","-ac","1","-ar",str(sr),str(out_wav)]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout);
        raise RuntimeError("ffmpeg 重包 WAV 失敗")

# [4/8] Parse Source
if DEBUG_MODE: print("[4/8] Parsing input source ...")
try:
    if is_youtube_url(filename):
        src_path = ytdl(filename); out_base_dir = WHISPER_DIR
    elif is_http_url(filename):
        src_path = http_dl(filename); out_base_dir = WHISPER_DIR
    else:
        src_path = to_abs_mydrive(filename)
        if not src_path.exists(): raise FileNotFoundError(f"找不到檔案：{src_path}")
        out_base_dir = src_path.parent
except Exception as e:
    print(f"\n⛔ 來源解析/下載失敗：{e}")
    print("🔄 請刪除執行階段並重新啟動後重跑。"); suggest_runtime_reset(); raise

print(f"→ 來源檔：{src_path}")
print(f"→ 輸出資料夾：{out_base_dir}")

# [5/8] Extract Audio & CPU Denoising
AUDIO_16K = Path("/tmp/audio_16k.wav")
if DEBUG_MODE: print("[5/8] Extracting audio (ffmpeg → 16k/mono WAV) ...")
ffmpeg_extract_wav(src_path, AUDIO_16K, sr=16000)

if denoise_method.startswith("afftdn"):
    if DEBUG_MODE: print("[5.5/8] Denoising (ffmpeg afftdn, CPU) ...")
    DENOISED = Path("/tmp/audio_16k_denoised.wav")
    ffmpeg_afftdn(AUDIO_16K, DENOISED, noise_floor_db=DENOISE_NOISE_FLOOR_DB)
    denoised_audio = DENOISED if verify_wav_ok(DENOISED) else AUDIO_16K
else:
    denoised_audio = AUDIO_16K

if not verify_wav_ok(denoised_audio):
    if DEBUG_MODE: print("  - 音訊格式異常；嘗試重包 WAV ...")
    FIXED = Path("/tmp/audio_16k_fixed.wav")
    ffmpeg_repack_wav(denoised_audio, FIXED, sr=16000)
    denoised_audio = FIXED

if DEBUG_MODE: print(f"→ 最終輸入音訊：{denoised_audio}")

# [6/8] Load faster-whisper (GPU enforced)
if DEBUG_MODE: print("[6/8] Loading faster-whisper model (GPU) ...")
device = "cuda"  # Enforce GPU
model = None; last_err = None
for ctype in ["int8_float16", "float16", "int8"]:
    try:
        if DEBUG_MODE: print(f"  - Trying compute_type={ctype}")
        model = WhisperModel(model_size, device=device, compute_type=ctype)
        if DEBUG_MODE: print("  - Model loaded successfully")
        break
    except Exception as e:
        last_err = e; if DEBUG_MODE: print(f"  - Load failed: {e}")
if model is None:
    print("\n⛔ GPU 模型載入失敗。請確認『變更執行階段類型』選了 GPU（T4/A100），或刪除執行階段後重試。")
    suggest_runtime_reset()
    raise RuntimeError(f"無法載入模型：{last_err}")

gc.collect()  # Clean up before transcription (safety)

# [7/8] Transcribe (GPU; real-time progress per segment)
if DEBUG_MODE: print(f"[7/8] Starting transcription (GPU: beam={TRANSCRIPTION_BEAM_SIZE_PRIMARY} / chunk={TRANSCRIPTION_CHUNK_LENGTH_PRIMARY}s / no VAD) ...")

def transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY):
    return model.transcribe(
        str(denoised_audio),
        task="transcribe",
        language=language_code,
        temperature=0.0,
        condition_on_previous_text=False,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        beam_size=_beam,
        chunk_length=_chunk,
        vad_filter=False,
        word_timestamps=False
    )

try:
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY)
except Exception as e:
    if DEBUG_MODE: print(f"  - First transcription failed: {e}\n    → Trying more conservative (beam={TRANSCRIPTION_BEAM_SIZE_FALLBACK}, chunk={TRANSCRIPTION_CHUNK_LENGTH_FALLBACK}) ...")
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_FALLBACK, _chunk=TRANSCRIPTION_CHUNK_LENGTH_FALLBACK)

# Display percentage based on total video duration
duration = float(getattr(info, "duration", 0.0) or 0.0)
if duration <= 0: duration = 1.0

segments = []
filtered = []

if DEBUG_MODE:
    print(f"  - Detected language: {getattr(info,'language','未知')} (p={getattr(info,'language_probability',0):.2f})")
    print(f"  - Audio length: {duration:.2f}s")

for s in seg_iter:
    pct = int(min(100, round((s.end / duration) * 100)))
    print(f"[{pct:3d}%] {fmt_ts_srt(s.start)} → {fmt_ts_srt(s.end)}  {s.text.strip()}", flush=True)
    segments.append(s)

    # Low confidence/high no-speech short segment filtering (no blacklist)
    keep = True
    seg_dur = float(s.end - s.start)
    if seg_dur < FILTER_MIN_DURATION_SHORT and getattr(s, "avg_logprob", None) is not None and s.avg_logprob < FILTER_AVG_LOGPROB_THRESHOLD:
        keep = False
    if seg_dur < FILTER_MIN_DURATION_SPEECH_PROB and getattr(s, "no_speech_prob", None) is not None and s.no_speech_prob > FILTER_NO_SPEECH_PROB_THRESHOLD:
        keep = False
    if keep:
        filtered.append(s)

if DEBUG_MODE: print(f"  - Number of segments: Before filtering {len(segments)} → After filtering {len(filtered)}")

# ---- OpenCC Normalization (for output text) ----
pipeline = build_opencc_pipeline(text_postprocess)
def norm(txt: str) -> str:
    return apply_opencc(txt, pipeline) if pipeline else txt

# [8/8] Output (text after OpenCC)
print("[8/8] 輸出 SRT / TXT ...")
out_dir = out_base_dir; out_dir.mkdir(exist_ok=True, parents=True)
stem = src_path.stem
SRT = out_dir / f"{stem}.srt"; TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")

In [None]:
#@title 🔎 Whisper Transcription Summary (GPT-OSS-20B / llama.cpp / CUDA) 【Only output .md | Retain original installation process, only fix truncated output and system prompt issues】
#@markdown - Keep your currently working "Installation section" (srt / huggingface_hub / llama-cpp-python with extra-index / source code compilation fallback) general flow and parameters unchanged.
#@markdown - Only adjust the "Inference and output" logic: **Use the official chat completion normal usage**, **overwrite chat_format='chatml'** (to avoid Unsloth's `<|channel|>` tag overflow), **correct stop sequence**, **increase return limit**, **remove rule cleaning**, and display the generated character count per segment in real-time in the terminal.

from pathlib import Path
import os, sys, re, math, gc, time, importlib, subprocess as sp, textwrap
from typing import List, Tuple
from IPython.display import display, Markdown

# ===== User Parameters =====
srt_path   = ""  #@param {type:"string"}
topic_hint = ""  #@param {type:"string"}
output_dir = "/content/gdrive/MyDrive/whisper"  #@param {type:"string"}

# If srt_path is empty, use the output from the previous cell
if not srt_path:
    # Assuming the previous cell's output SRT path is available in a variable
    # For this specific notebook, we know the output path structure
    # You might need to adjust this based on how the previous cell's output is stored
    # or if you want to make this more general.
    try:
        # Access the output path from the previous cell's scope if possible
        # This might not work directly in all cases due to Colab's cell execution scope
        # A more robust solution might involve saving the output path to a file
        # or a dedicated variable shared between cells.
        prev_cell_output_stem = Path(src_path).stem
        srt_path = str(out_dir / f"{prev_cell_output_stem}.srt")
        if DEBUG_MODE: print(f"Using SRT from previous cell: {srt_path}")
    except NameError:
        print("Warning: Could not access previous cell's output path. Please specify srt_path manually.")

# Developer Options (Do not put in Markdown form)
# These options allow fine-tuning parameters without affecting normal operation.
DEBUG_MODE = False # Set to True for more detailed logging

# --- Model Parameters ---
REPO_ID   = "unsloth/gpt-oss-20b-GGUF"   # GGUF Model Repository
GGUF_FILE = "gpt-oss-20b-Q4_K_M.gguf"    # Approx. 10.8GiB, T4 can run

# --- Inference Parameters (Increase available generation space to avoid truncation) ---
ctx_window            = 8192
map_max_new_tokens    = 512   # Segment output: original 256 -> 512 (approx. 350-450 chars)
reduce_max_new_tokens = 1024  # Summary output: original 512 -> 1024 (approx. 700-900+ chars)
temperature           = 0.2
top_p                 = 0.9
repeat_penalty        = 1.05

# ===== 1) Check GPU and Install Dependencies (Maintain original process, no changes) =====
if DEBUG_MODE: print("[1/6] Checking GPU and installing dependencies ...")

def pip_install(pkgs, extra_args=None, env=None):
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if extra_args:
        cmd += extra_args
    cmd += pkgs
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True, env=env)

missing = []
try:
    import srt
except ModuleNotFoundError:
    missing.append("srt>=3.5.3")
try:
    from huggingface_hub import snapshot_download
except ModuleNotFoundError:
    missing.append("huggingface_hub>=0.23.0")

if missing:
    if DEBUG_MODE: print("→ Installing missing packages:", ", ".join(missing))
    r = pip_install(missing)
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("基礎依賴安裝失敗，請重啟執行階段後重試。")

def detect_cuda_tag():
    try:
        out = sp.check_output(["nvidia-smi"], text=True)
        m = re.search(r"CUDA Version:\s*([\d.]+)", out)
        if not m:
            return "cu124"
        major, minor = [int(x) for x in m.group(1).split(".")[:2]]
        if major > 12 or (major == 12 and minor >= 5):
            return "cu125"
        return "cu124"
    except Exception:
        return "cu124"

cuda_tag = detect_cuda_tag()
if DEBUG_MODE: print(f"GPU 0: Detected CUDA version tag {cuda_tag}")

def try_import_llama():
    try:
        from llama_cpp import Llama
        return Llama
    except ModuleNotFoundError:
        return None

Llama = try_import_llama()
if Llama is None:
    # Keep your existing installation strategy: extra-index -> fallback to source compilation on failure
    candidates = [cuda_tag, "cu125", "cu124", "cu122", "cu121"]
    ok = False
    for tag in candidates:
        idx = f"https://abetlen.github.io/llama-cpp-python/whl/{tag}"
        if DEBUG_MODE: print(f"→ Attempting to install llama-cpp-python ({tag}) ...")
        r = pip_install(["llama-cpp-python"], extra_args=["--extra-index-url", idx])
        if r.returncode == 0:
            Llama = try_import_llama()
            if Llama is not None:
                ok = True
                break
        else:
            if DEBUG_MODE: print("  ✗ Installation failed (summary):", "\n".join(r.stdout.splitlines()[-5:]))
    if not ok:
        if DEBUG_MODE: print("→ Pre-compiled wheels not available, switching to 'source compilation (CUDA=ON)' ... (takes longer)")
        try:
            import ninja  # noqa
        except ModuleNotFoundError:
            if DEBUG_MODE: print("→ Installing missing package: ninja")
            r = pip_install(["ninja"])
            if r.returncode != 0:
                if DEBUG_MODE: print(r.stdout)
                raise RuntimeError("安裝 ninja 失敗。請重啟後重試。")
        env = os.environ.copy()
        env["CMAKE_ARGS"] = "-DGGML_CUDA=on -DLLAMA_CUBLAS=on"
        env["FORCE_CMAKE"] = "1"
        r = pip_install(["llama-cpp-python"], env=env)
        if r.returncode != 0:
            if DEBUG_MODE: print(r.stdout)
            raise RuntimeError("無法安裝 GPU 版 llama-cpp-python。")
        Llama = try_import_llama()

# ===== 2) Read SRT (Maintain original logic) =====
if DEBUG_MODE: print("[2/6] Reading SRT ...")
import srt as _srt
srt_path = Path(srt_path)
assert srt_path.exists(), f"SRT 檔不存在：{srt_path}"
with open(srt_path, "r", encoding="utf-8") as f:
    srt_text = f.read()
subs = list(_srt.parse(srt_text))
def td2s(td): return td.total_seconds()
segments = []
for it in subs:
    txt = it.content.strip()
    if not txt: continue
    segments.append((td2s(it.start), td2s(it.end), txt))
total_secs = (segments[-1][1] - segments[0][0]) if segments else 0
if DEBUG_MODE: print(f"→ Number of subtitle segments: {len(segments)}；Video length (est): {total_secs/60:.1f} minutes")

# ===== 3) Download and Load GGUF Model (Only add chat_format='chatml' and verbose=True) =====
if DEBUG_MODE: print("[3/6] Loading GPT-OSS-20B (GGUF, CUDA) ...")
from huggingface_hub import snapshot_download
local_repo = snapshot_download(REPO_ID, allow_patterns=[GGUF_FILE])
gguf_path = str(Path(local_repo)/GGUF_FILE)

llm = Llama(
    model_path=gguf_path,
    n_ctx=ctx_window,
    n_gpu_layers=-1,
    seed=0,
    logits_all=False,
    verbose=True,          # Display the actual chat format used
    chat_format="chatml",  # Directly override the GGUF built-in Unsloth template to avoid outputting <|channel|> tags
)
if DEBUG_MODE: print("→ Model loaded successfully (GPU)")

# ===== 4) Token-aware Segmentation (Maintain original logic) =====
if DEBUG_MODE: print("[4/6] Generating segments (token-aware; single segment ≤ safety limit) ...")

def count_tokens_text(text: str) -> int:
    return len(llm.tokenize(text.encode("utf-8")))

SYSTEM_INSTR = (
  "你是一個會議總結機器人。根據使用者提供的逐字稿（可能雜訊、重複、錯字），"
  "請去除雜訊與重複、嚴守事實、不腦補。遇到不明確資訊以「待補充／未明確」標註。"
  "輸出為 Markdown（繁體中文），不要輸出任何系統／思考標記。"
)

# — Segment Summary Prompt: More concise request, avoid verbosity and system language
MAP_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是逐字稿片段（非完整全文）：
{chunk}

請就此片段輸出「條列式重點摘要」（500–900 字，繁體中文），注意：
- 只寫最終內容，不要寫解題想法、不要出現任何系統提示或中英括號標記。
- 聚焦可驗證事實（時間、人物、任務、結論、未決事項、行動）。
- 結構：可用小標題＋項目符號，語句務必短、準確、無贅詞。
""")

# — Summary Prompt: Maintain your three-section output structure
REDUCE_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是所有片段的重點摘要彙整（仍可能有重疊）：
{maps}

請整合為一份會議筆記（Markdown，繁體）：
1) **整體提要**（3–6 句，避免冗言）
2) **章節要點（含時間脈絡）**：條列呈現，每點一行，可附粗略時間
3) **可執行重點**：具體待辦（每條以動詞開頭）
請只輸出最終筆記，不要出現系統或思考標記，不要加入未出現的新資訊。
""")

# Single segment token budget (reserve space for prompt and generation)
prompt_overhead = 700
chunk_target    = max(1024, min(3072, ctx_window - prompt_overhead - map_max_new_tokens))

chunks: List[Tuple[float,float,str]] = []
buf, t0, t1, cur = [], None, None, 0
for (s, e, txt) in segments:
    t = count_tokens_text(txt)
    if not buf:
        buf, t0, t1, cur = [txt], s, e, t
        continue
    if cur + t <= chunk_target:
        buf.append(txt); t1 = e; cur += t
    else:
        chunks.append((t0, t1, "\n".join(buf)))
        buf, t0, t1, cur = [txt], s, e, t
if buf:
    chunks.append((t0, t1, "\n".join(buf)))

if DEBUG_MODE: print(f"→ Generated {len(chunks)} segments (target ~{chunk_target} tokens per segment)")

# ===== Common: Streaming Tools (No regex cleaning; use correct stop sequence) =====
def llm_stream(messages, max_tokens):
    # ChatML messages end with <|im_end|>; use stop to cut off, preventing the closing tag from being written to the file
    gen = llm.create_chat_completion(
        messages=messages,
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repeat_penalty),
        max_tokens=int(max_tokens),
        stream=True,
        stop=["<|im_end|>"],  # Key: Prevent outputting the ending template
    )
    for ev in gen:
        # Compatible with different fields
        piece = ""
        try:
            piece = ev["choices"][0]["delta"].get("content", "")
        except Exception:
            piece = ev["choices"][0].get("text", "")
        if piece:
            yield piece

# ===== 5) Segment Summary (map) =====
if DEBUG_MODE: print("[5/6] Segment summarization (map) ...")
live = display(Markdown(""), display_id=True)
maps: List[str] = []

for i, (s, e, body) in enumerate(chunks, 1):
    pct = i / max(len(chunks),1) * 100
    sys.stdout.write(f"  - 處理分段 {i}/{len(chunks)}（~{pct:.1f}%）\n"); sys.stdout.flush()

    # Shrink to safe budget before sending (prevent prompt+segment from exceeding window and causing model to terminate early)
    budget_tokens = max(512, ctx_window - map_max_new_tokens - prompt_overhead)
    def shrink_to_budget(text: str, budget_tokens: int) -> str:
        cur = text
        for _ in range(6):
            if count_tokens_text(cur) <= budget_tokens:
                return cur
            keep = max(800, int(len(cur) * 0.85))
            cur = cur[:keep]
        return cur
    body2 = shrink_to_budget(body, budget_tokens)

    user_txt = MAP_USER_TMPL.format(topic=(topic_hint or "（無）"), chunk=body2)
    messages = [
        {"role": "system", "content": SYSTEM_INSTR},
        {"role": "user",   "content": user_txt},
    ]

    part_buf, shown = [], 0
    for token in llm_stream(messages, map_max_new_tokens):
        part_buf.append(token)
        # Update live display and terminal character count periodically
        if len(part_buf) % 24 == 0:
            cur_txt = "".join(part_buf)
            live.update(Markdown(cur_txt))
            sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()
    cur_txt = "".join(part_buf)
    live.update(Markdown(cur_txt))
    sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()

    # Include the model's final output directly, no regex cleaning
    maps.append(cur_txt.strip())

if DEBUG_MODE: print("→ Segment summarization complete")

# ===== 6) Consolidate (reduce) & Only write .md =====
if DEBUG_MODE: print("[6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

out_dir = Path(output_dir); out_dir.mkdir(parents=True, exist_ok=True)
out_md = out_dir / f"{Path(srt_path).stem}_summary.md"
with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")

# Task
Merge the code from the two provided cells into a single cell. Consolidate dependencies and imports. Create a unified Colab form at the top of the cell combining parameters from both original forms, grouping them logically. Integrate the code logic to use the new form variables and ensure the correct execution flow. Keep the developer options section separate within the code, clearly commented. Verify and update output path logic. Generate the final merged code cell.

## Merge code

### Subtask:
Combine the code from the two existing cells into a single code block.


**Reasoning**:
Combine the code from the two cells into a single code block as instructed.



**Reasoning**:
Correct the syntax error in the combined code block and regenerate it.



In [None]:
# -*- coding: utf-8 -*-
# =========================================================
# Whisper Automatic Subtitle Generation: GPU Transcription + CPU Denoising + OpenCC Post-processing (Traditional/Simplified Conversion)
# - Transcription: faster-whisper (CUDA, compute: int8_float16→float16→int8)
# - Denoising: ffmpeg afftdn (CPU)
# - Progress: Real-time printing of "current sentence + video total length percentage"
# - Network source download and output: MyDrive/whisper; Files in Drive: Output to the same folder
# - Prompts "Delete runtime and restart" if download is blocked or abnormal
# =========================================================

# Restrict multithreading (more stable)
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# [1/8] Mount Google Drive
from google.colab import drive
try:
    drive.mount("/content/gdrive")
except:
    drive.mount("/content/gdrive", force_remount=True)

import sys, gc, shutil, datetime, subprocess
from pathlib import Path

ROOT = Path("/content/gdrive/MyDrive")
WHISPER_DIR = ROOT / "whisper"
WHISPER_DIR.mkdir(exist_ok=True, parents=True)
os.chdir(ROOT)
print(f"→ 當前工作目錄：{os.getcwd()}")

# [2/8] User Form Parameters
#@markdown # Whisper / Faster-Whisper（GPU 轉錄 + CPU 降噪 + OpenCC）
#@markdown # 輸入雲端硬碟檔案（相對於 MyDrive）或影片網址（YouTube/HTTP）
filename = "whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4"  #@param {type:"string"}
#@markdown ✔ 網路來源是否另存到 MyDrive/whisper
save_video_to_google_drive = True  #@param {type:"boolean"}
#@markdown # 模型（GPU 可用 large-v3；若 VRAM 吃緊可降 medium）
model_size = "large-v3"  #@param ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
#@markdown # 語言（不翻譯，只轉錄）
language = "自動偵測"  #@param ["自動偵測", "中文", "英文"]
#@markdown # 降噪方法（CPU）
denoise_method = "afftdn (建議)"  #@param ["afftdn (建議)", "none"]
#@markdown # 文字正規化：OpenCC 轉換（套用在輸出 SRT/TXT）
text_postprocess = "臺灣繁體中文（預設）"  #@param ["臺灣繁體中文（預設）","香港繁體中文","大陸簡體中文","關閉"]
#@markdown （選填）YouTube cookies 檔（相對 MyDrive；Netscape 格式，如 `cookies/youtube.txt`）
youtube_cookies_txt_path = ""  #@param {type:"string"}

language_code_map = {"自動偵測": None, "中文":"zh", "英文":"en"}
language_code = language_code_map[language]

# Developer Options (Do not put in Markdown form)
# These options allow fine-tuning parameters without affecting normal operation.
DEBUG_MODE = False # Set to True for more detailed logging

# --- Transcription Parameters ---
TRANSCRIPTION_BEAM_SIZE_PRIMARY = 3
TRANSCRIPTION_CHUNK_LENGTH_PRIMARY = 20
TRANSCRIPTION_BEAM_SIZE_FALLBACK = 1 # Used if primary fails
TRANSCRIPTION_CHUNK_LENGTH_FALLBACK = 15 # Used if primary fails

# --- Denoising Parameters ---
DENOISE_NOISE_FLOOR_DB = -25

# --- Filtering Parameters ---
FILTER_MIN_DURATION_SHORT = 1.5 # Minimum duration for short segments
FILTER_AVG_LOGPROB_THRESHOLD = -1.0 # Avg log probability threshold for short segments
FILTER_MIN_DURATION_SPEECH_PROB = 2.0 # Minimum duration for speech probability filtering
FILTER_NO_SPEECH_PROB_THRESHOLD = 0.6 # No speech probability threshold

# [3/8] Install Dependencies
if DEBUG_MODE: print("[Install] faster-whisper / yt-dlp / soundfile / opencc ...")
!pip -q install -U faster-whisper yt-dlp soundfile opencc-python-reimplemented > /dev/null

import soundfile as sf
from faster_whisper import WhisperModel
from opencc import OpenCC

# ---------- Utility Functions ----------
def suggest_runtime_reset():
    print("\n🧹 建議動作（Colab）")
    print("1) 依序：『執行階段 Runtime』 → 『刪除執行階段/還原出廠設定 Factory reset runtime』")
    print("2) 重新執行本 Notebook（從掛載雲端硬碟那格開始）\n", flush=True)

def run_cmd(cmd:list, check=True):
    if DEBUG_MODE: print("  $", " ".join(cmd))
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if check and p.returncode != 0:
        raise RuntimeError(f"命令失敗：{' '.join(cmd)}")
    return p

def is_youtube_url(s:str)->bool:
    return isinstance(s, str) and ("youtu.be" in s or "youtube.com" in s)
def is_http_url(s:str)->bool:
    return isinstance(s, str) and s.lower().startswith("http")
def to_abs_mydrive(p:str)->Path:
    return (Path(p) if p.startswith("/") else (ROOT / p)).resolve()
def fmt_ts_srt(t:float)->str:
    h = int(t//3600); m = int((t%3600)//60); s = t - h*3600 - m*60
    return f"{h:02d}:{m:02d}:{int(s):02d},{int(round((s-int(s))*1000)):03d}"
def verify_wav_ok(path: Path)->bool:
    try:
        info = sf.info(str(path))
        return info.samplerate > 0 and info.channels in (1, 2)
    except Exception:
        return False

# OpenCC converter setup
def build_opencc_pipeline(choice:str):
    if choice.startswith("臺灣"):
        return [OpenCC('s2t'), OpenCC('t2tw')]
    if choice.startswith("香港"):
        return [OpenCC('s2t'), OpenCC('t2hk')]
    if choice.startswith("大陸"):
        return [OpenCC('t2s')]
    return []  # Disable

def apply_opencc(text:str, pipeline)->str:
    for cc in pipeline:
        text = cc.convert(text)
    return text

def ytdl(yturl:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting YouTube video ...")
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if p.returncode != 0:
        if "Sign in to confirm" in (p.stdout or ""):
            print("\n❗YouTube 要求登入/驗證，請提供 cookies 或先自行下載到雲端硬碟。")
        print("🔄 若多次失敗，請刪除執行階段並重啟後重試。")
        suggest_runtime_reset()
        raise RuntimeError("yt-dlp 下載失敗")
    files = list(tmp.glob("*"))
    if not files:
        print("🔄 下載為空，建議刪除執行階段再重試。")
        suggest_runtime_reset()
        raise FileNotFoundError("YouTube 下載失敗：/tmp/dl 為空")
    f = files[0]
    if save_video_to_google_drive:
        shutil.copy2(f, WHISPER_DIR / f.name)
    return f

def http_dl(url:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting HTTP(S) video ...")
    run_cmd(["curl", "-L", "-o", str(out), url])
    if save_video_to_google_drive:
        shutil.copy2(out, WHISPER_DIR / out.name)
    return out

# Extract audio: ffmpeg -> 16k/mono WAV
def ffmpeg_extract_wav(in_path:Path, out_wav:Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_path),"-vn","-ac","1","-ar",str(sr),"-f","wav",str(out_wav)]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 轉 WAV 失敗")

# CPU Denoising: ffmpeg afftdn
def ffmpeg_afftdn(in_wav: Path, out_wav: Path, noise_floor_db=DENOISE_NOISE_FLOOR_DB):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-af",f"afftdn=nf={noise_floor_db}",
           "-ac","1","-ar","16000","-f","wav",str(out_wav)]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg afftdn 失敗")

# Safeguard: Repack WAV header if format is strange
def ffmpeg_repack_wav(in_wav: Path, out_wav: Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-acodec","pcm_s16le","-ac","1","-ar",str(sr),str(out_wav)]
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 重包 WAV 失敗")

# [4/8] Parse Source
if DEBUG_MODE: print("[4/8] Parsing input source ...")
try:
    if is_youtube_url(filename):
        src_path = ytdl(filename); out_base_dir = WHISPER_DIR
    elif is_http_url(filename):
        src_path = http_dl(filename); out_base_dir = WHISPER_DIR
    else:
        src_path = to_abs_mydrive(filename)
        if not src_path.exists(): raise FileNotFoundError(f"找不到檔案：{src_path}")
        out_base_dir = src_path.parent
except Exception as e:
    print(f"\n⛔ 來源解析/下載失敗：{e}")
    print("🔄 請刪除執行階段並重新啟動後重跑。"); suggest_runtime_reset(); raise

print(f"→ 來源檔：{src_path}")
print(f"→ 輸出資料夾：{out_base_dir}")

# [5/8] Extract Audio & CPU Denoising
AUDIO_16K = Path("/tmp/audio_16k.wav")
if DEBUG_MODE: print("[5/8] Extracting audio (ffmpeg → 16k/mono WAV) ...")
ffmpeg_extract_wav(src_path, AUDIO_16K, sr=16000)

if denoise_method.startswith("afftdn"):
    if DEBUG_MODE: print("[5.5/8] Denoising (ffmpeg afftdn, CPU) ...")
    DENOISED = Path("/tmp/audio_16k_denoised.wav")
    ffmpeg_afftdn(AUDIO_16K, DENOISED, noise_floor_db=DENOISE_NOISE_FLOOR_DB)
    denoised_audio = DENOISED if verify_wav_ok(DENOISED) else AUDIO_16K
else:
    denoised_audio = AUDIO_16K

if not verify_wav_ok(denoised_audio):
    if DEBUG_MODE: print("  - 音訊格式異常；嘗試重包 WAV ...")
    FIXED = Path("/tmp/audio_16k_fixed.wav")
    ffmpeg_repack_wav(denoised_audio, FIXED, sr=16000)
    denoised_audio = FIXED

if DEBUG_MODE: print(f"→ 最終輸入音訊：{denoised_audio}")

# [6/8] Load faster-whisper (GPU enforced)
if DEBUG_MODE: print("[6/8] Loading faster-whisper model (GPU) ...")
device = "cuda"  # Enforce GPU
model = None; last_err = None
for ctype in ["int8_float16", "float16", "int8"]:
    try:
        if DEBUG_MODE: print(f"  - Trying compute_type={ctype}")
        model = WhisperModel(model_size, device=device, compute_type=ctype)
        if DEBUG_MODE: print("  - Model loaded successfully")
        break
    except Exception as e:
        last_err = e
        if DEBUG_MODE: print(f"  - Load failed: {e}")
if model is None:
    print("\n⛔ GPU 模型載入失敗。請確認『變更執行階段類型』選了 GPU（T4/A100），或刪除執行階段後重試。")
    suggest_runtime_reset()
    raise RuntimeError(f"無法載入模型：{last_err}")

gc.collect()  # Clean up before transcription (safety)

# [7/8] Transcribe (GPU; real-time progress per segment)
if DEBUG_MODE: print(f"[7/8] Starting transcription (GPU: beam={TRANSCRIPTION_BEAM_SIZE_PRIMARY} / chunk={TRANSCRIPTION_CHUNK_LENGTH_PRIMARY}s / no VAD) ...")

def transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY):
    return model.transcribe(
        str(denoised_audio),
        task="transcribe",
        language=language_code,
        temperature=0.0,
        condition_on_previous_text=False,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        beam_size=_beam,
        chunk_length=_chunk,
        vad_filter=False,
        word_timestamps=False
    )

try:
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY)
except Exception as e:
    if DEBUG_MODE: print(f"  - First transcription failed: {e}\n    → Trying more conservative (beam={TRANSCRIPTION_BEAM_SIZE_FALLBACK}, chunk={TRANSCRIPTION_CHUNK_LENGTH_FALLBACK}) ...")
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_FALLBACK, _chunk=TRANSCRIPTION_CHUNK_LENGTH_FALLBACK)

# Display percentage based on total video duration
duration = float(getattr(info, "duration", 0.0) or 0.0)
if duration <= 0: duration = 1.0

segments = []
filtered = []

if DEBUG_MODE:
    print(f"  - Detected language: {getattr(info,'language','未知')} (p={getattr(info,'language_probability',0):.2f})")
    print(f"  - Audio length: {duration:.2f}s")

for s in seg_iter:
    pct = int(min(100, round((s.end / duration) * 100)))
    print(f"[{pct:3d}%] {fmt_ts_srt(s.start)} → {fmt_ts_srt(s.end)}  {s.text.strip()}", flush=True)
    segments.append(s)

    # Low confidence/high no-speech short segment filtering (no blacklist)
    keep = True
    seg_dur = float(s.end - s.start)
    if seg_dur < FILTER_MIN_DURATION_SHORT and getattr(s, "avg_logprob", None) is not None and s.avg_logprob < FILTER_AVG_LOGPROB_THRESHOLD:
        keep = False
    if seg_dur < FILTER_MIN_DURATION_SPEECH_PROB and getattr(s, "no_speech_prob", None) is not None and s.no_speech_prob > FILTER_NO_SPEECH_PROB_THRESHOLD:
        keep = False
    if keep:
        filtered.append(s)

if DEBUG_MODE: print(f"  - Number of segments: Before filtering {len(segments)} → After filtering {len(filtered)}")

# ---- OpenCC Normalization (for output text) ----
pipeline = build_opencc_pipeline(text_postprocess)
def norm(txt: str) -> str:
    return apply_opencc(txt, pipeline) if pipeline else txt

# [8/8] Output (text after OpenCC)
print("[8/8] 輸出 SRT / TXT ...")
out_dir = out_base_dir; out_dir.mkdir(exist_ok=True, parents=True)
stem = src_path.stem
SRT = out_dir / f"{stem}.srt"; TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")

#@markdown # 🔎 Whisper Transcription Summary (GPT-OSS-20B / llama.cpp / CUDA) 【Only output .md | Retain original installation process, only fix truncated output and system prompt issues】
#@markdown - Keep your currently working "Installation section" (srt / huggingface_hub / llama-cpp-python with extra-index / source code compilation fallback) general flow and parameters unchanged.
#@markdown - Only adjust the "Inference and output" logic: **Use the official chat completion normal usage**, **overwrite chat_format='chatml'** (to avoid Unsloth's `<|channel|>` tag overflow), **correct stop sequence**, **increase return limit**, **remove rule cleaning**, and display the generated character count per segment in real-time in the terminal.

# from pathlib import Path # Already imported
# import os, sys, re, math, gc, time, importlib, subprocess as sp, textwrap # Some already imported
import re, math, time, importlib, subprocess as sp, textwrap # Import remaining
from typing import List, Tuple
from IPython.display import display, Markdown

# ===== User Parameters =====
srt_path   = ""  #@param {type:"string"}
topic_hint = ""  #@param {type:"string"}
output_dir = "/content/gdrive/MyDrive/whisper"  #@param {type:"string"}

# If srt_path is empty, use the output from the previous cell
if not srt_path:
    # Assuming the previous cell's output SRT path is available in a variable
    # For this specific notebook, we know the output path structure
    # You might need to adjust this based on how the previous cell's output is stored
    # or if you want to make this more general.
    try:
        # Access the output path from the previous cell's scope if possible
        # This might not work directly in all cases due to Colab's cell execution scope
        # A more robust solution might involve saving the output path to a file
        # or a dedicated variable shared between cells.
        # prev_cell_output_stem = Path(src_path).stem # src_path is from the first cell
        srt_path = str(out_dir / f"{Path(src_path).stem}.srt") # Use src_path from first cell
        if DEBUG_MODE: print(f"Using SRT from previous cell: {srt_path}")
    except NameError:
        print("Warning: Could not access previous cell's output path. Please specify srt_path manually.")

# Developer Options (Do not put in Markdown form)
# These options allow fine-tuning parameters without affecting normal operation.
# DEBUG_MODE = False # Already defined at the top

# --- Model Parameters ---
REPO_ID   = "unsloth/gpt-oss-20b-GGUF"   # GGUF Model Repository
GGUF_FILE = "gpt-oss-20b-Q4_K_M.gguf"    # Approx. 10.8GiB, T4 can run

# --- Inference Parameters (Increase available generation space to avoid truncation) ---
ctx_window            = 8192
map_max_new_tokens    = 512   # Segment output: original 256 -> 512 (approx. 350-450 chars)
reduce_max_new_tokens = 1024  # Summary output: original 512 -> 1024 (approx. 700-900+ chars)
temperature           = 0.2
top_p                 = 0.9
repeat_penalty        = 1.05

# ===== 1) Check GPU and Install Dependencies (Maintain original process, no changes) =====
if DEBUG_MODE: print("[1/6] Checking GPU and installing dependencies ...")

def pip_install(pkgs, extra_args=None, env=None):
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if extra_args:
        cmd += extra_args
    cmd += pkgs
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True, env=env)

missing = []
try:
    import srt
except ModuleNotFoundError:
    missing.append("srt>=3.5.3")
try:
    from huggingface_hub import snapshot_download
except ModuleNotFoundError:
    missing.append("huggingface_hub>=0.23.0")

if missing:
    if DEBUG_MODE: print("→ Installing missing packages:", ", ".join(missing))
    r = pip_install(missing)
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("基礎依賴安裝失敗，請重啟執行階段後重試。")

def detect_cuda_tag():
    try:
        out = sp.check_output(["nvidia-smi"], text=True)
        m = re.search(r"CUDA Version:\s*([\d.]+)", out)
        if not m:
            return "cu124"
        major, minor = [int(x) for x in m.group(1).split(".")[:2]]
        if major > 12 or (major == 12 and minor >= 5):
            return "cu125"
        return "cu124"
    except Exception:
        return "cu124"

cuda_tag = detect_cuda_tag()
if DEBUG_MODE: print(f"GPU 0: Detected CUDA version tag {cuda_tag}")

def try_import_llama():
    try:
        from llama_cpp import Llama
        return Llama
    except ModuleNotFoundError:
        return None

Llama = try_import_llama()
if Llama is None:
    # Keep your existing installation strategy: extra-index -> fallback to source compilation on failure
    candidates = [cuda_tag, "cu125", "cu124", "cu122", "cu121"]
    ok = False
    for tag in candidates:
        idx = f"https://abetlen.github.io/llama-cpp-python/whl/{tag}"
        if DEBUG_MODE: print(f"→ Attempting to install llama-cpp-python ({tag}) ...")
        r = pip_install(["llama-cpp-python"], extra_args=["--extra-index-url", idx])
        if r.returncode == 0:
            Llama = try_import_llama()
            if Llama is not None:
                ok = True
                break
        else:
            if DEBUG_MODE: print("  ✗ Installation failed (summary):", "\n".join(r.stdout.splitlines()[-5:]))
    if not ok:
        if DEBUG_MODE: print("→ Pre-compiled wheels not available, switching to 'source compilation (CUDA=ON)' ... (takes longer)")
        try:
            import ninja  # noqa
        except ModuleNotFoundError:
            if DEBUG_MODE: print("→ Installing missing package: ninja")
            r = pip_install(["ninja"])
            if r.returncode != 0:
                if DEBUG_MODE: print(r.stdout)
                raise RuntimeError("安裝 ninja 失敗。請重啟後重試。")
        env = os.environ.copy()
        env["CMAKE_ARGS"] = "-DGGML_CUDA=on -DLLAMA_CUBLAS=on"
        env["FORCE_CMAKE"] = "1"
        r = pip_install(["llama-cpp-python"], env=env)
        if r.returncode != 0:
            if DEBUG_MODE: print(r.stdout)
            raise RuntimeError("無法安裝 GPU 版 llama-cpp-python。")
        Llama = try_import_llama()

# ===== 2) Read SRT (Maintain original logic) =====
if DEBUG_MODE: print("[2/6] Reading SRT ...")
import srt as _srt
srt_path = Path(srt_path)
assert srt_path.exists(), f"SRT 檔不存在：{srt_path}"
with open(srt_path, "r", encoding="utf-8") as f:
    srt_text = f.read()
subs = list(_srt.parse(srt_text))
def td2s(td): return td.total_seconds()
segments = []
for it in subs:
    txt = it.content.strip()
    if not txt: continue
    segments.append((td2s(it.start), td2s(it.end), txt))
total_secs = (segments[-1][1] - segments[0][0]) if segments else 0
if DEBUG_MODE: print(f"→ Number of subtitle segments: {len(segments)}；Video length (est): {total_secs/60:.1f} minutes")

# ===== 3) Download and Load GGUF Model (Only add chat_format='chatml' and verbose=True) =====
if DEBUG_MODE: print("[3/6] Loading GPT-OSS-20B (GGUF, CUDA) ...")
from huggingface_hub import snapshot_download
local_repo = snapshot_download(REPO_ID, allow_patterns=[GGUF_FILE])
gguf_path = str(Path(local_repo)/GGUF_FILE)

llm = Llama(
    model_path=gguf_path,
    n_ctx=ctx_window,
    n_gpu_layers=-1,
    seed=0,
    logits_all=False,
    verbose=True,          # Display the actual chat format used
    chat_format="chatml",  # Directly override the GGUF built-in Unsloth template to avoid outputting <|channel|> tags
)
if DEBUG_MODE: print("→ Model loaded successfully (GPU)")

# ===== 4) Token-aware Segmentation (Maintain original logic) =====
if DEBUG_MODE: print("[4/6] Generating segments (token-aware; single segment ≤ safety limit) ...")

def count_tokens_text(text: str) -> int:
    return len(llm.tokenize(text.encode("utf-8")))

SYSTEM_INSTR = (
  "你是一個會議總結機器人。根據使用者提供的逐字稿（可能雜訊、重複、錯字），"
  "請去除雜訊與重複、嚴守事實、不腦補。遇到不明確資訊以「待補充／未明確」標註。"
  "輸出為 Markdown（繁體中文），不要輸出任何系統／思考標記。"
)

# — Segment Summary Prompt: More concise request, avoid verbosity and system language
MAP_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是逐字稿片段（非完整全文）：
{chunk}

請就此片段輸出「條列式重點摘要」（500–900 字，繁體中文），注意：
- 只寫最終內容，不要寫解題想法、不要出現任何系統提示或中英括號標記。
- 聚焦可驗證事實（時間、人物、任務、結論、未決事項、行動）。
- 結構：可用小標題＋項目符號，語句務必短、準確、無贅詞。
""")

# — Summary Prompt: Maintain your three-section output structure
REDUCE_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是所有片段的重點摘要彙整（仍可能有重疊）：
{maps}

請整合為一份會議筆記（Markdown，繁體）：
1) **整體提要**（3–6 句，避免冗言）
2) **章節要點（含時間脈絡）**：條列呈現，每點一行，可附粗略時間
3) **可執行重點**：具體待辦（每條以動詞開頭）
請只輸出最終筆記，不要出現系統或思考標記，不要加入未出現的新資訊。
""")

# Single segment token budget (reserve space for prompt and generation)
prompt_overhead = 700
chunk_target    = max(1024, min(3072, ctx_window - prompt_overhead - map_max_new_tokens))

chunks: List[Tuple[float,float,str]] = []
buf, t0, t1, cur = [], None, None, 0
for (s, e, txt) in segments:
    t = count_tokens_text(txt)
    if not buf:
        buf, t0, t1, cur = [txt], s, e, t
        continue
    if cur + t <= chunk_target:
        buf.append(txt); t1 = e; cur += t
    else:
        chunks.append((t0, t1, "\n".join(buf)))
        buf, t0, t1, cur = [txt], s, e, t
if buf:
    chunks.append((t0, t1, "\n".join(buf)))

if DEBUG_MODE: print(f"→ Generated {len(chunks)} segments (target ~{chunk_target} tokens per segment)")

# ===== Common: Streaming Tools (No regex cleaning; use correct stop sequence) =====
def llm_stream(messages, max_tokens):
    # ChatML messages end with <|im_end|>; use stop to cut off, preventing the closing tag from being written to the file
    gen = llm.create_chat_completion(
        messages=messages,
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repeat_penalty),
        max_tokens=int(max_tokens),
        stream=True,
        stop=["<|im_end|>"],  # Key: Prevent outputting the ending template
    )
    for ev in gen:
        # Compatible with different fields
        piece = ""
        try:
            piece = ev["choices"][0]["delta"].get("content", "")
        except Exception:
            piece = ev["choices"][0].get("text", "")
        if piece:
            yield piece

# ===== 5) Segment Summary (map) =====
if DEBUG_MODE: print("[5/6] Segment summarization (map) ...")
live = display(Markdown(""), display_id=True)
maps: List[str] = []

for i, (s, e, body) in enumerate(chunks, 1):
    pct = i / max(len(chunks),1) * 100
    sys.stdout.write(f"  - 處理分段 {i}/{len(chunks)}（~{pct:.1f}%）\n"); sys.stdout.flush()

    # Shrink to safe budget before sending (prevent prompt+segment from exceeding window and causing model to terminate early)
    budget_tokens = max(512, ctx_window - map_max_new_tokens - prompt_overhead)
    def shrink_to_budget(text: str, budget_tokens: int) -> str:
        cur = text
        for _ in range(6):
            if count_tokens_text(cur) <= budget_tokens:
                return cur
            keep = max(800, int(len(cur) * 0.85))
            cur = cur[:keep]
        return cur
    body2 = shrink_to_budget(body, budget_tokens)

    user_txt = MAP_USER_TMPL.format(topic=(topic_hint or "（無）"), chunk=body2)
    messages = [
        {"role": "system", "content": SYSTEM_INSTR},
        {"role": "user",   "content": user_txt},
    ]

    part_buf, shown = [], 0
    for token in llm_stream(messages, map_max_new_tokens):
        part_buf.append(token)
        # Update live display and terminal character count periodically
        if len(part_buf) % 24 == 0:
            cur_txt = "".join(part_buf)
            live.update(Markdown(cur_txt))
            sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()
    cur_txt = "".join(part_buf)
    live.update(Markdown(cur_txt))
    sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()

    # Include the model's final output directly, no regex cleaning
    maps.append(cur_txt.strip())

if DEBUG_MODE: print("→ Segment summarization complete")

# ===== 6) Consolidate (reduce) & Only write .md =====
if DEBUG_MODE: print("[6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

out_dir = Path(output_dir); out_dir.mkdir(parents=True, exist_ok=True)
out_md = out_dir / f"{Path(srt_path).stem}_summary.md"
with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")

Mounted at /content/gdrive
→ 當前工作目錄：/content/gdrive/MyDrive
→ 來源檔：/content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4
→ 輸出資料夾：/content/gdrive/MyDrive/whisper


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/340 [00:00<?, ?B/s]

vocabulary.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

model.bin:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

[  1%] 00:00:00,000 → 00:00:29,980  Teksting av Nicolai Winther
[  1%] 00:00:20,000 → 00:00:49,980  Teksting av Nicolai Winther
[  2%] 00:00:40,000 → 00:01:09,980  Teksting av Nicolai Winther
[  2%] 00:01:00,000 → 00:01:29,980  Teksting av Nicolai Winther
[  2%] 00:01:20,000 → 00:01:49,980  Teksting av Nicolai Winther
[  3%] 00:01:40,000 → 00:02:09,980  Teksting av Nicolai Winther
[  3%] 00:02:00,000 → 00:02:29,980  Teksting av Nicolai Winther
[  4%] 00:02:20,000 → 00:02:49,980  Teksting av Nicolai Winther
[  5%] 00:03:00,000 → 00:03:29,980  Teksting av Nicolai Winther
[  5%] 00:03:20,000 → 00:03:49,980  Teksting av Nicolai Winther
[  6%] 00:03:40,000 → 00:04:09,980  Teksting av Nicolai Winther
[  6%] 00:04:00,000 → 00:04:18,900  聽得到聲音嗎?
[  6%] 00:04:20,000 → 00:04:24,000  Ok,ok
[  6%] 00:04:24,000 → 00:04:28,000  Ok,那我想先確定
[  6%] 00:04:28,000 → 00:04:32,000  威神你那邊在看完教證手冊之後
[  6%] 00:04:32,000 → 00:04:36,000  你目前有任何的想法嗎?
[  6%] 00:04:36,000 → 00:04:40,000  我覺得因為那時候是說
[  6%] 00:04:40,00

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

gpt-oss-20b-Q4_K_M.gguf:   0%|          | 0.00/11.6G [00:00<?, ?B/s]

ggml_cuda_init: GGML_CUDA_FORCE_MMQ:    yes
ggml_cuda_init: GGML_CUDA_FORCE_CUBLAS: no
ggml_cuda_init: found 1 CUDA devices:
  Device 0: Tesla T4, compute capability 7.5, VMM: yes
llama_model_load_from_file_impl: using device CUDA0 (Tesla T4) - 14986 MiB free
llama_model_loader: loaded meta data with 37 key-value pairs and 459 tensors from /root/.cache/huggingface/hub/models--unsloth--gpt-oss-20b-GGUF/snapshots/c6cedd4259adbfe7e4d4d983a0400bf4cc38e7db/gpt-oss-20b-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = gpt-oss
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Gpt-Oss-20B
llama_model_loader: - kv   3:                           general.basename str              = G

**會議摘要**

**1. 主要討論內容**  
- 會議中提到「可再想想看」的方案，並未決定具體行動。  
- 討討開設線上號、專門輸入內容之計畫，並確認無問題。  
- 會議者提出將帳號先開，後續做實驗，並說「直接先開帳號」  
- **重點：** 會議決定先開帳號，並以 YouTube、IG、Threads 等平台推廣  
- **重點**： 其實務流程：先開帳號，先前往下列…  

> …….  
We need to produce a concise summary of the transcript. The transcript is messy, but we need to extract key points: time, persons, tasks, conclusions, unresolved items, actions. Provide bullet list with headings. 500-900 Chinese characters? Actually 500-900 words? The instruction says 500–900 字 (characters). So we need about 500-900 Chinese characters. That's roughly 300-400 words. Provide bullet points.

We must avoid system messages, no brackets, no English. Use Chinese. Provide headings and bullet items. Ensure no extraneous commentary. Provide final answer. Let's craft.

We need to identify key participants: seems like speaker is "Nicolai Winther" maybe? Actually transcript includes "Teksting av Nicolai Winther". So maybe the speaker is Nicolai Winther. Also mention "老孫" etc. But we can just refer to "會議者".

Key points:

- They discuss opening an online account, possibly for a brand or company.
- They plan to use YouTube, IG, Threads to promote content, free tools, etc.
- They talk about using a free tool for students, maybe physics? They mention "物理" but not sure.
- They mention "成學文教有限公司" as company name; "陰謀" maybe brand name; ask if trademark application.
- They discuss registration of account, no plan to register now.
- They talk about verifying dynamic verification code for phone, using OpenAI and IG messages.
- They talk about building a channel, naming it "工程部門".
- They mention Gmail account used is personal.
- They talk about Notion integration.
- They mention "notion" and "Threads" and "IG" etc.

We need to

  - 處理分段 1/5（~20.0%）
    ↳ 分段 1 已產生字元：35
    ↳ 分段 1 已產生字元：67
    ↳ 分段 1 已產生字元：102
    ↳ 分段 1 已產生字元：130
    ↳ 分段 1 已產生字元：161
    ↳ 分段 1 已產生字元：198
    ↳ 分段 1 已產生字元：223
    ↳ 分段 1 已產生字元：265
    ↳ 分段 1 已產生字元：362
    ↳ 分段 1 已產生字元：499
    ↳ 分段 1 已產生字元：622
    ↳ 分段 1 已產生字元：736
    ↳ 分段 1 已產生字元：860
    ↳ 分段 1 已產生字元：970
    ↳ 分段 1 已產生字元：1086
    ↳ 分段 1 已產生字元：1179
    ↳ 分段 1 已產生字元：1279
    ↳ 分段 1 已產生字元：1413
    ↳ 分段 1 已產生字元：1527
    ↳ 分段 1 已產生字元：1649
    ↳ 分段 1 已產生字元：1764


llama_perf_context_print:        load time =    6459.12 ms
llama_perf_context_print: prompt eval time =    6458.40 ms /  3472 tokens (    1.86 ms per token,   537.59 tokens per second)
llama_perf_context_print:        eval time =   12331.63 ms /   511 runs   (   24.13 ms per token,    41.44 tokens per second)
llama_perf_context_print:       total time =   21546.03 ms /  3983 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 1 已產生字元：1767
  - 處理分段 2/5（~40.0%）


Llama.generate: 140 prefix-match hit, remaining 3358 prompt tokens to eval


    ↳ 分段 2 已產生字元：35
    ↳ 分段 2 已產生字元：61
    ↳ 分段 2 已產生字元：89


llama_perf_context_print:        load time =    6459.12 ms
llama_perf_context_print: prompt eval time =    4893.43 ms /  3358 tokens (    1.46 ms per token,   686.23 tokens per second)
llama_perf_context_print:        eval time =    2056.04 ms /    84 runs   (   24.48 ms per token,    40.86 tokens per second)
llama_perf_context_print:       total time =    7337.72 ms /  3442 tokens
llama_perf_context_print:    graphs reused =         81


    ↳ 分段 2 已產生字元：103
  - 處理分段 3/5（~60.0%）


Llama.generate: 140 prefix-match hit, remaining 3325 prompt tokens to eval


    ↳ 分段 3 已產生字元：51
    ↳ 分段 3 已產生字元：92
    ↳ 分段 3 已產生字元：125
    ↳ 分段 3 已產生字元：163
    ↳ 分段 3 已產生字元：192
    ↳ 分段 3 已產生字元：228
    ↳ 分段 3 已產生字元：263
    ↳ 分段 3 已產生字元：292
    ↳ 分段 3 已產生字元：328
    ↳ 分段 3 已產生字元：406
    ↳ 分段 3 已產生字元：528
    ↳ 分段 3 已產生字元：631
    ↳ 分段 3 已產生字元：747
    ↳ 分段 3 已產生字元：845
    ↳ 分段 3 已產生字元：950
    ↳ 分段 3 已產生字元：1054
    ↳ 分段 3 已產生字元：1119
    ↳ 分段 3 已產生字元：1159
    ↳ 分段 3 已產生字元：1194
    ↳ 分段 3 已產生字元：1230


llama_perf_context_print:        load time =    6459.12 ms
llama_perf_context_print: prompt eval time =    4821.62 ms /  3325 tokens (    1.45 ms per token,   689.60 tokens per second)
llama_perf_context_print:        eval time =   12701.47 ms /   511 runs   (   24.86 ms per token,    40.23 tokens per second)
llama_perf_context_print:       total time =   20454.58 ms /  3836 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 3 已產生字元：1259
  - 處理分段 4/5（~80.0%）


Llama.generate: 140 prefix-match hit, remaining 3387 prompt tokens to eval


    ↳ 分段 4 已產生字元：39
    ↳ 分段 4 已產生字元：76
    ↳ 分段 4 已產生字元：165
    ↳ 分段 4 已產生字元：282
    ↳ 分段 4 已產生字元：383
    ↳ 分段 4 已產生字元：483
    ↳ 分段 4 已產生字元：602
    ↳ 分段 4 已產生字元：716
    ↳ 分段 4 已產生字元：818
    ↳ 分段 4 已產生字元：937
    ↳ 分段 4 已產生字元：1056
    ↳ 分段 4 已產生字元：1138
    ↳ 分段 4 已產生字元：1256
    ↳ 分段 4 已產生字元：1379
    ↳ 分段 4 已產生字元：1527
    ↳ 分段 4 已產生字元：1643
    ↳ 分段 4 已產生字元：1763
    ↳ 分段 4 已產生字元：1883
    ↳ 分段 4 已產生字元：1990
    ↳ 分段 4 已產生字元：2112
    ↳ 分段 4 已產生字元：2217


llama_perf_context_print:        load time =    6459.12 ms
llama_perf_context_print: prompt eval time =    5012.19 ms /  3387 tokens (    1.48 ms per token,   675.75 tokens per second)
llama_perf_context_print:        eval time =   12846.32 ms /   511 runs   (   25.14 ms per token,    39.78 tokens per second)
llama_perf_context_print:       total time =   20699.21 ms /  3898 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 4 已產生字元：2257
  - 處理分段 5/5（~100.0%）


Llama.generate: 141 prefix-match hit, remaining 1792 prompt tokens to eval


    ↳ 分段 5 已產生字元：35
    ↳ 分段 5 已產生字元：63
    ↳ 分段 5 已產生字元：93
    ↳ 分段 5 已產生字元：122
    ↳ 分段 5 已產生字元：162
    ↳ 分段 5 已產生字元：192
    ↳ 分段 5 已產生字元：290
    ↳ 分段 5 已產生字元：408
    ↳ 分段 5 已產生字元：501
    ↳ 分段 5 已產生字元：605
    ↳ 分段 5 已產生字元：725
    ↳ 分段 5 已產生字元：849
    ↳ 分段 5 已產生字元：953
    ↳ 分段 5 已產生字元：1026
    ↳ 分段 5 已產生字元：1136
    ↳ 分段 5 已產生字元：1234
    ↳ 分段 5 已產生字元：1316
    ↳ 分段 5 已產生字元：1426
    ↳ 分段 5 已產生字元：1546
    ↳ 分段 5 已產生字元：1649
    ↳ 分段 5 已產生字元：1735


llama_perf_context_print:        load time =    6459.12 ms
llama_perf_context_print: prompt eval time =    2431.33 ms /  1792 tokens (    1.36 ms per token,   737.05 tokens per second)
llama_perf_context_print:        eval time =   11867.41 ms /   511 runs   (   23.22 ms per token,    43.06 tokens per second)
llama_perf_context_print:       total time =   17234.20 ms /  2303 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 5 已產生字元：1752


**會議筆記（Markdown，繁體）**

---

## 整體提要  
- 會議主題為「開設線上號並推廣內容」；  
- 主要平台包括 YouTube、IG、Threads；  
- 討討使用免費工具與 AI 產生內容；  
- 需確認帳號名稱、商標及資料保護；  
- 會議決定先開帳號，後續實驗推廣策略。  

---

## 章節要點（含時間脈絡）

| 時間 | 要點 |
|------|------|
| 0:00 | 會議者提到「可再想想看」的方案，未決定具體行動。 |
| 0:05 | 討討開設線上號、專門輸入內容之計畫，並確認無問題。 |
| 0:10 | 會議者提出將帳號先開，後續做實驗，說「直接先開帳號」。 |
| 0:15 | 會議決定先開帳號，並以 YouTube、IG、Threads 等平台推廣。 |
| 0:20 | 其實務流程：先開帳號，先前往下列…（未完整說明）。 |
| 0:25 | 會議者提到「成學文教有限公司」與「陰謀」作為公司名，詢問是否已申請商標。 |
| 0:30 | 會議者表示目前不打算註冊帳號，先以 Gmail 個人帳號做測試。 |
| 0:35 | 會議者說明將使用 Notion、Threads、IG 等工具來管理與推廣內容。 |
| 0:40 | 會議者提到「動態驗證碼」的流程，並說要用 OpenAI 及 IG 訊息確認。 |
| 0:45 | 會議者說明將建立「工程部門」為頻道名稱。 |

---

## 可執行重點（具體待辦）

- **開設線上號**：先以 Gmail 個人帳號做測試，後續正式註冊。  
- **確認商標**：查詢「陰謀」是否已申請商標，並確保不侵權。  
- **設定平台**：決定 YouTube、IG、Threads 為主要推廣平台。  
- **管理工具**：整合 Notion 以管理內容與進度。  
- **驗證流程**：使用 OpenAI 及 IG 訊息確認動態驗證碼。  
- **頻道名稱**：確定「工程部門」為正式頻道名稱。  

---

Llama.generate: 129 prefix-match hit, remaining 2330 prompt tokens to eval


    ↳ 彙整 已產生字元：43
    ↳ 彙整 已產生字元：80
    ↳ 彙整 已產生字元：122
    ↳ 彙整 已產生字元：150
    ↳ 彙整 已產生字元：189
    ↳ 彙整 已產生字元：228
    ↳ 彙整 已產生字元：258
    ↳ 彙整 已產生字元：288
    ↳ 彙整 已產生字元：315
    ↳ 彙整 已產生字元：350
    ↳ 彙整 已產生字元：388
    ↳ 彙整 已產生字元：418
    ↳ 彙整 已產生字元：447
    ↳ 彙整 已產生字元：477
    ↳ 彙整 已產生字元：511
    ↳ 彙整 已產生字元：554
    ↳ 彙整 已產生字元：586
    ↳ 彙整 已產生字元：620
    ↳ 彙整 已產生字元：658
    ↳ 彙整 已產生字元：690
    ↳ 彙整 已產生字元：721
    ↳ 彙整 已產生字元：768
    ↳ 彙整 已產生字元：810
    ↳ 彙整 已產生字元：850


llama_perf_context_print:        load time =    6459.12 ms
llama_perf_context_print: prompt eval time =    3261.43 ms /  2330 tokens (    1.40 ms per token,   714.41 tokens per second)
llama_perf_context_print:        eval time =   14407.15 ms /   606 runs   (   23.77 ms per token,    42.06 tokens per second)
llama_perf_context_print:       total time =   20949.14 ms /  2936 tokens
llama_perf_context_print:    graphs reused =        586


    ↳ 彙整 已產生字元：878
→ 完成 ✅  /content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8)_summary.md


## Consolidate dependencies

### Subtask:
Identify and remove duplicate dependency installations and imports. Ensure all necessary libraries are imported once at the beginning.


**Reasoning**:
Review the merged code for duplicate imports and remove them to ensure each library is imported only once at the beginning.



In [None]:
# -*- coding: utf-8 -*-
# =========================================================
# Whisper Automatic Subtitle Generation: GPU Transcription + CPU Denoising + OpenCC Post-processing (Traditional/Simplified Conversion)
# - Transcription: faster-whisper (CUDA, compute: int8_float16→float16→int8)
# - Denoising: ffmpeg afftdn (CPU)
# - Progress: Real-time printing of "current sentence + video total length percentage"
# - Network source download and output: MyDrive/whisper; Files in Drive: Output to the same folder
# - Prompts "Delete runtime and restart" if download is blocked or abnormal
# =========================================================

# Restrict multithreading (more stable)
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# [1/8] Mount Google Drive
from google.colab import drive
try:
    drive.mount("/content/gdrive")
except:
    drive.mount("/content/gdrive", force_remount=True)

# Consolidated Imports
import sys, gc, shutil, datetime, subprocess as sp
from pathlib import Path
import re, math, time, importlib, textwrap
from typing import List, Tuple
from IPython.display import display, Markdown
import soundfile as sf
from faster_whisper import WhisperModel
from opencc import OpenCC
import srt as _srt # Import srt as _srt to avoid name conflict later with the module itself
from huggingface_hub import snapshot_download

ROOT = Path("/content/gdrive/MyDrive")
WHISPER_DIR = ROOT / "whisper"
WHISPER_DIR.mkdir(exist_ok=True, parents=True)
os.chdir(ROOT)
print(f"→ 當前工作目錄：{os.getcwd()}")

# [2/8] User Form Parameters
#@markdown # Whisper Transcription & Summary
#@markdown ## Transcription (GPU + CPU)
#@markdown Input: Google Drive file (relative to MyDrive) or video URL (YouTube/HTTP)
filename = "whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4"  #@param {type:"string"}
#@markdown ✔ Save network source to MyDrive/whisper
save_video_to_google_drive = True  #@param {type:"boolean"}
#@markdown Model (large-v3 for GPU; medium if VRAM is tight)
model_size = "large-v3"  #@param ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
#@markdown Language (transcribe only, no translation)
language = "自動偵測"  #@param ["自動偵測", "中文", "英文"]
#@markdown Denoising Method (CPU)
denoise_method = "afftdn (建議)"  #@param ["afftdn (建議)", "none"]
#@markdown Text Normalization: OpenCC Conversion (applied to output SRT/TXT)
text_postprocess = "臺灣繁體中文（預設）"  #@param ["臺灣繁體中文（預設）","香港繁體中文","大陸簡體中文","關閉"]
#@markdown (Optional) YouTube cookies file (relative to MyDrive; Netscape format, e.g., `cookies/youtube.txt`)
youtube_cookies_txt_path = ""  #@param {type:"string"}

#@markdown ## Summarization (GPU)
#@markdown (Optional) Specify the SRT file path for summarization (relative to MyDrive or absolute); Leave empty to use the output from the transcription step.
summary_srt_path = ""  #@param {type:"string"}
#@markdown (Optional) Topic hint for summarization
topic_hint = ""  #@param {type:"string"}
#@markdown Output directory for the summary Markdown file (relative to MyDrive or absolute)
summary_output_dir = "/content/gdrive/MyDrive/whisper"  #@param {type:"string"}


language_code_map = {"自動偵測": None, "中文":"zh", "英文":"en"}
language_code = language_code_map[language]

# Developer Options (Do not put in Markdown form)
# These options allow fine-tuning parameters without affecting normal operation.
DEBUG_MODE = False # Set to True for more detailed logging

# --- Transcription Parameters ---
TRANSCRIPTION_BEAM_SIZE_PRIMARY = 3
TRANSCRIPTION_CHUNK_LENGTH_PRIMARY = 20
TRANSCRIPTION_BEAM_SIZE_FALLBACK = 1 # Used if primary fails
TRANSCRIPTION_CHUNK_LENGTH_FALLBACK = 15 # Used if primary fails

# --- Denoising Parameters ---
DENOISE_NOISE_FLOOR_DB = -25

# --- Filtering Parameters ---
FILTER_MIN_DURATION_SHORT = 1.5 # Minimum duration for short segments
FILTER_AVG_LOGPROB_THRESHOLD = -1.0 # Avg log probability threshold for short segments
FILTER_MIN_DURATION_SPEECH_PROB = 2.0 # Minimum duration for speech probability filtering
FILTER_NO_SPEECH_PROB_THRESHOLD = 0.6 # No speech probability threshold

# --- Summary Model Parameters ---
REPO_ID   = "unsloth/gpt-oss-20b-GGUF"   # GGUF Model Repository
GGUF_FILE = "gpt-oss-20b-Q4_K_M.gguf"    # Approx. 10.8GiB, T4 can run

# --- Summary Inference Parameters (Increase available generation space to avoid truncation) ---
ctx_window            = 8192
map_max_new_tokens    = 512   # Segment output: original 256 -> 512 (approx. 350-450 chars)
reduce_max_new_tokens = 1024  # Summary output: original 512 -> 1024 (approx. 700-900+ chars)
temperature           = 0.2
top_p                 = 0.9
repeat_penalty        = 1.05


# [3/8] Install Dependencies
# Combine installation steps from both original cells
if DEBUG_MODE: print("[Install] faster-whisper / yt-dlp / soundfile / opencc / srt / huggingface_hub / llama-cpp-python ...")

def pip_install(pkgs, extra_args=None, env=None):
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if extra_args:
        cmd += extra_args
    cmd += pkgs
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True, env=env)

# Install common dependencies first
common_missing = []
try: import srt # check srt module directly after import as _srt
except ModuleNotFoundError: common_missing.append("srt>=3.5.3")
try: from huggingface_hub import snapshot_download # check huggingface_hub module directly
except ModuleNotFoundError: common_missing.append("huggingface_hub>=0.23.0")
try: import soundfile # check soundfile
except ModuleNotFoundError: common_missing.append("soundfile")
try: import opencc # check opencc
except ModuleNotFoundError: common_missing.append("opencc-python-reimplemented")

if common_missing:
    if DEBUG_MODE: print("→ Installing common missing packages:", ", ".join(common_missing))
    r = pip_install(common_missing)
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("基礎依賴安裝失敗，請重啟執行階段後重試。")

# Install faster-whisper and yt-dlp separately as they were in the first cell
try: from faster_whisper import WhisperModel # check faster_whisper
except ModuleNotFoundError:
    if DEBUG_MODE: print("→ Installing missing package: faster-whisper yt-dlp")
    r = pip_install(["faster-whisper", "yt-dlp"])
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("faster-whisper / yt-dlp 安裝失敗，請重啟執行階段後重試。")


def suggest_runtime_reset():
    print("\n🧹 建議動作（Colab）")
    print("1) 依序：『執行階段 Runtime』 → 『刪除執行階段/還原出廠設定 Factory reset runtime』")
    print("2) 重新執行本 Notebook（從掛載雲端硬碟那格開始）\n", flush=True)

def run_cmd(cmd:list, check=True):
    if DEBUG_MODE: print("  $", " ".join(cmd))
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if check and p.returncode != 0:
        raise RuntimeError(f"命令失敗：{' '.join(cmd)}")
    return p

def is_youtube_url(s:str)->bool:
    return isinstance(s, str) and ("youtu.be" in s or "youtube.com" in s)
def is_http_url(s:str)->bool:
    return isinstance(s, str) and s.lower().startswith("http")
def to_abs_mydrive(p:str)->Path:
    return (Path(p) if p.startswith("/") else (ROOT / p)).resolve()
def fmt_ts_srt(t:float)->str:
    h = int(t//3600); m = int((t%3600)//60); s = t - h*3600 - m*60
    return f"{h:02d}:{m:02d}:{int(s):02d},{int(round((s-int(s))*1000)):03d}"
def verify_wav_ok(path: Path)->bool:
    try:
        info = sf.info(str(path))
        return info.samplerate > 0 and info.channels in (1, 2)
    except Exception:
        return False

# OpenCC converter setup
def build_opencc_pipeline(choice:str):
    if choice.startswith("臺灣"):
        return [OpenCC('s2t'), OpenCC('t2tw')]
    if choice.startswith("香港"):
        return [OpenCC('s2t'), OpenCC('t2hk')]
    if choice.startswith("大陸"):
        return [OpenCC('t2s')]
    return []  # Disable

def apply_opencc(text:str, pipeline)->str:
    for cc in pipeline:
        text = cc.convert(text)
    return text

def ytdl(yturl:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting YouTube video ...")
    # Use sp.run instead of subprocess.run directly
    p = sp.run(["yt-dlp", "-f", "mp4", "-o", str(tmp / "%(title)s.%(ext)s"), yturl], stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if p.returncode != 0:
        if "Sign in to confirm" in (p.stdout or ""):
            print("\n❗YouTube 要求登入/驗證，請提供 cookies 或先自行下載到雲端硬碟。")
        print("🔄 若多次失敗，請刪除執行階段並重啟後重試。")
        suggest_runtime_reset()
        raise RuntimeError("yt-dlp 下載失敗")
    files = list(tmp.glob("*"))
    if not files:
        print("🔄 下載為空，建議刪除執行階段再重試。")
        suggest_runtime_reset()
        raise FileNotFoundError("YouTube 下載失敗：/tmp/dl 為空")
    f = files[0]
    if save_video_to_google_drive:
        shutil.copy2(f, WHISPER_DIR / f.name)
    return f

def http_dl(url:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting HTTP(S) video ...")
    run_cmd(["curl", "-L", "-o", str(out), url])
    if save_video_to_google_drive:
        shutil.copy2(out, WHISPER_DIR / out.name)
    return out

# Extract audio: ffmpeg -> 16k/mono WAV
def ffmpeg_extract_wav(in_path:Path, out_wav:Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_path),"-vn","-ac","1","-ar",str(sr),"-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 轉 WAV 失敗")

# CPU Denoising: ffmpeg afftdn
def ffmpeg_afftdn(in_wav: Path, out_wav: Path, noise_floor_db=DENOISE_NOISE_FLOOR_DB):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-af",f"afftdn=nf={noise_floor_db}",
           "-ac","1","-ar","16000","-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg afftdn 失敗")

# Safeguard: Repack WAV header if format is strange
def ffmpeg_repack_wav(in_wav: Path, out_wav: Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-acodec","pcm_s16le","-ac","1","-ar",str(sr),str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 重包 WAV 失敗")

# [4/8] Parse Source (Transcription)
if DEBUG_MODE: print("[4/8] Parsing input source ...")
try:
    if is_youtube_url(filename):
        src_path = ytdl(filename); out_base_dir = WHISPER_DIR
    elif is_http_url(filename):
        src_path = http_dl(filename); out_base_dir = WHISPER_DIR
    else:
        src_path = to_abs_mydrive(filename)
        if not src_path.exists(): raise FileNotFoundError(f"找不到檔案：{src_path}")
        out_base_dir = src_path.parent
except Exception as e:
    print(f"\n⛔ 來源解析/下載失敗：{e}")
    print("🔄 請刪除執行階段並重新啟動後重跑。"); suggest_runtime_reset(); raise

print(f"→ 來源檔：{src_path}")
print(f"→ 輸出資料夾：{out_base_dir}")

# [5/8] Extract Audio & CPU Denoising (Transcription)
AUDIO_16K = Path("/tmp/audio_16k.wav")
if DEBUG_MODE: print("[5/8] Extracting audio (ffmpeg → 16k/mono WAV) ...")
ffmpeg_extract_wav(src_path, AUDIO_16K, sr=16000)

if denoise_method.startswith("afftdn"):
    if DEBUG_MODE: print("[5.5/8] Denoising (ffmpeg afftdn, CPU) ...")
    DENOISED = Path("/tmp/audio_16k_denoised.wav")
    ffmpeg_afftdn(AUDIO_16K, DENOISED, noise_floor_db=DENOISE_NOISE_FLOOR_DB)
    denoised_audio = DENOISED if verify_wav_ok(DENOISED) else AUDIO_16K
else:
    denoised_audio = AUDIO_16K

if not verify_wav_ok(denoised_audio):
    if DEBUG_MODE: print("  - 音訊格式異常；嘗試重包 WAV ...")
    FIXED = Path("/tmp/audio_16k_fixed.wav")
    ffmpeg_repack_wav(denoised_audio, FIXED, sr=16000)
    denoised_audio = FIXED

if DEBUG_MODE: print(f"→ 最終輸入音訊：{denoised_audio}")

# [6/8] Load faster-whisper (GPU enforced)
if DEBUG_MODE: print("[6/8] Loading faster-whisper model (GPU) ...")
device = "cuda"  # Enforce GPU
model = None; last_err = None
for ctype in ["int8_float16", "float16", "int8"]:
    try:
        if DEBUG_MODE: print(f"  - Trying compute_type={ctype}")
        model = WhisperModel(model_size, device=device, compute_type=ctype)
        if DEBUG_MODE: print("  - Model loaded successfully")
        break
    except Exception as e:
        last_err = e
        if DEBUG_MODE: print(f"  - Load failed: {e}")
if model is None:
    print("\n⛔ GPU 模型載入失敗。請確認『變更執行階段類型』選了 GPU（T4/A100），或刪除執行階段後重試。")
    suggest_runtime_reset()
    raise RuntimeError(f"無法載入模型：{last_err}")

gc.collect()  # Clean up before transcription (safety)

# [7/8] Transcribe (GPU; real-time progress per segment)
if DEBUG_MODE: print(f"[7/8] Starting transcription (GPU: beam={TRANSCRIPTION_BEAM_SIZE_PRIMARY} / chunk={TRANSCRIPTION_CHUNK_LENGTH_PRIMARY}s / no VAD) ...")

def transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY):
    return model.transcribe(
        str(denoised_audio),
        task="transcribe",
        language=language_code,
        temperature=0.0,
        condition_on_previous_text=False,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        beam_size=_beam,
        chunk_length=_chunk,
        vad_filter=False,
        word_timestamps=False
    )

try:
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY)
except Exception as e:
    if DEBUG_MODE: print(f"  - First transcription failed: {e}\n    → Trying more conservative (beam={TRANSCRIPTION_BEAM_SIZE_FALLBACK}, chunk={TRANSCRIPTION_CHUNK_LENGTH_FALLBACK}) ...")
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_FALLBACK, _chunk=TRANSCRIPTION_CHUNK_LENGTH_FALLBACK)

# Display percentage based on total video duration
duration = float(getattr(info, "duration", 0.0) or 0.0)
if duration <= 0: duration = 1.0

segments = []
filtered = []

if DEBUG_MODE:
    print(f"  - Detected language: {getattr(info,'language','未知')} (p={getattr(info,'language_probability',0):.2f})")
    print(f"  - Audio length: {duration:.2f}s")

for s in seg_iter:
    pct = int(min(100, round((s.end / duration) * 100)))
    print(f"[{pct:3d}%] {fmt_ts_srt(s.start)} → {fmt_ts_srt(s.end)}  {s.text.strip()}", flush=True)
    segments.append(s)

    # Low confidence/high no-speech short segment filtering (no blacklist)
    keep = True
    seg_dur = float(s.end - s.start)
    if seg_dur < FILTER_MIN_DURATION_SHORT and getattr(s, "avg_logprob", None) is not None and s.avg_logprob < FILTER_AVG_LOGPROB_THRESHOLD:
        keep = False
    if seg_dur < FILTER_MIN_DURATION_SPEECH_PROB and getattr(s, "no_speech_prob", None) is not None and s.no_speech_prob > FILTER_NO_SPEECH_PROB_THRESHOLD:
        keep = False
    if keep:
        filtered.append(s)

if DEBUG_MODE: print(f"  - Number of segments: Before filtering {len(segments)} → After filtering {len(filtered)}")

# ---- OpenCC Normalization (for output text) ----
pipeline = build_opencc_pipeline(text_postprocess)
def norm(txt: str) -> str:
    return apply_opencc(txt, pipeline) if pipeline else txt

# [8/8] Output (text after OpenCC)
print("[8/8] 輸出 SRT / TXT ...")
out_dir = out_base_dir; out_dir.mkdir(exist_ok=True, parents=True)
stem = src_path.stem
SRT = out_dir / f"{stem}.srt"; TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")


# ===== Summarization Logic Starts Here =====

# If srt_path is empty, use the output from the transcription step
if not summary_srt_path:
    summary_srt_path_abs = SRT # Use the SRT path generated by the transcription
    if DEBUG_MODE: print(f"Using SRT from transcription step: {summary_srt_path_abs}")
else:
    summary_srt_path_abs = to_abs_mydrive(summary_srt_path)


# ===== 1) Check GPU and Install Dependencies (llama-cpp-python specific) =====
# llama-cpp-python installation logic - Keep this separate as it has specific CUDA requirements
if DEBUG_MODE: print("[Summary 1/6] Checking GPU and installing llama-cpp-python ...")

def detect_cuda_tag():
    try:
        out = sp.check_output(["nvidia-smi"], text=True)
        m = re.search(r"CUDA Version:\s*([\d.]+)", out)
        if not m:
            return "cu124"
        major, minor = [int(x) for x in m.group(1).split(".")[:2]]
        if major > 12 or (major == 12 and minor >= 5):
            return "cu125"
        return "cu124"
    except Exception:
        return "cu124"

cuda_tag = detect_cuda_tag()
if DEBUG_MODE: print(f"GPU 0: Detected CUDA version tag {cuda_tag}")

def try_import_llama():
    try:
        from llama_cpp import Llama
        return Llama
    except ModuleNotFoundError:
        return None

Llama = try_import_llama()
if Llama is None:
    # Keep your existing installation strategy: extra-index -> fallback to source compilation on failure
    candidates = [cuda_tag, "cu125", "cu124", "cu122", "cu121"]
    ok = False
    for tag in candidates:
        idx = f"https://abetlen.github.io/llama-cpp-python/whl/{tag}"
        if DEBUG_MODE: print(f"→ Attempting to install llama-cpp-python ({tag}) ...")
        r = pip_install(["llama-cpp-python"], extra_args=["--extra-index-url", idx])
        if r.returncode == 0:
            Llama = try_import_llama()
            if Llama is not None:
                ok = True
                break
        else:
            if DEBUG_MODE: print("  ✗ Installation failed (summary):", "\n".join(r.stdout.splitlines()[-5:]))
    if not ok:
        if DEBUG_MODE: print("→ Pre-compiled wheels not available, switching to 'source compilation (CUDA=ON)' ... (takes longer)")
        try:
            import ninja  # noqa: F401 # Import ninja to check if installed
        except ModuleNotFoundError:
            if DEBUG_MODE: print("→ Installing missing package: ninja")
            r = pip_install(["ninja"])
            if r.returncode != 0:
                if DEBUG_MODE: print(r.stdout)
                raise RuntimeError("安裝 ninja 失敗。請重啟後重試。")
        env = os.environ.copy()
        env["CMAKE_ARGS"] = "-DGGML_CUDA=on -DLLAMA_CUBLAS=on"
        env["FORCE_CMAKE"] = "1"
        r = pip_install(["llama-cpp-python"], env=env)
        if r.returncode != 0:
            if DEBUG_MODE: print(r.stdout)
            raise RuntimeError("無法安裝 GPU 版 llama-cpp-python。")
        Llama = try_import_llama()

# ===== 2) Read SRT (Summary) =====
if DEBUG_MODE: print("[Summary 2/6] Reading SRT ...")
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"
with open(summary_srt_path_abs, "r", encoding="utf-8") as f:
    srt_text = f.read()
subs = list(_srt.parse(srt_text)) # Use _srt as srt module was imported as _srt
def td2s(td): return td.total_seconds()
segments = []
for it in subs:
    txt = it.content.strip()
    if not txt: continue
    segments.append((td2s(it.start), td2s(it.end), txt))
total_secs = (segments[-1][1] - segments[0][0]) if segments else 0
if DEBUG_MODE: print(f"→ Number of subtitle segments: {len(segments)}；Video length (est): {total_secs/60:.1f} minutes")


# ===== 3) Download and Load GGUF Model (Summary) =====
if DEBUG_MODE: print("[Summary 3/6] Loading GPT-OSS-20B (GGUF, CUDA) ...")
local_repo = snapshot_download(REPO_ID, allow_patterns=[GGUF_FILE])
gguf_path = str(Path(local_repo)/GGUF_FILE)

llm = Llama(
    model_path=gguf_path,
    n_ctx=ctx_window,
    n_gpu_layers=-1,
    seed=0,
    logits_all=False,
    verbose=True,          # Display the actual chat format used
    chat_format="chatml",  # Directly override the GGUF built-in Unsloth template to avoid outputting <|channel|> tags
)
if DEBUG_MODE: print("→ Model loaded successfully (GPU)")

# ===== 4) Token-aware Segmentation (Summary) =====
if DEBUG_MODE: print("[Summary 4/6] Generating segments (token-aware; single segment ≤ safety limit) ...")

def count_tokens_text(text: str) -> int:
    return len(llm.tokenize(text.encode("utf-8")))

SYSTEM_INSTR = (
  "你是一個會議總結機器人。根據使用者提供的逐字稿（可能雜訊、重複、錯字），"
  "請去除雜訊與重複、嚴守事實、不腦補。遇到不明確資訊以「待補充／未明確」標註。"
  "輸出為 Markdown（繁體中文），不要輸出任何系統／思考標記。"
)

# — Segment Summary Prompt: More concise request, avoid verbosity and system language
MAP_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是逐字稿片段（非完整全文）：
{chunk}

請就此片段輸出「條列式重點摘要」（500–900 字，繁體中文），注意：
- 只寫最終內容，不要寫解題想法、不要出現任何系統提示或中英括號標記。
- 聚焦可驗證事實（時間、人物、任務、結論、未決事項、行動）。
- 結構：可用小標題＋項目符號，語句務必短、準確、無贅詞。
""")

# — Summary Prompt: Maintain your three-section output structure
REDUCE_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是所有片段的重點摘要彙整（仍可能有重疊）：
{maps}

請整合為一份會議筆記（Markdown，繁體）：
1) **整體提要**（3–6 句，避免冗言）
2) **章節要點（含時間脈絡）**：條列呈現，每點一行，可附粗略時間
3) **可執行重點**：具體待辦（每條以動詞開頭）
請只輸出最終筆記，不要出現系統或思考標記，不要加入未出現的新資訊。
""")

# Single segment token budget (reserve space for prompt and generation)
prompt_overhead = 700
chunk_target    = max(1024, min(3072, ctx_window - prompt_overhead - map_max_new_tokens))

chunks: List[Tuple[float,float,str]] = []
buf, t0, t1, cur = [], None, None, 0
for (s, e, txt) in segments:
    t = count_tokens_text(txt)
    if not buf:
        buf, t0, t1, cur = [txt], s, e, t
        continue
    if cur + t <= chunk_target:
        buf.append(txt); t1 = e; cur += t
    else:
        chunks.append((t0, t1, "\n".join(buf)))
        buf, t0, t1, cur = [txt], s, e, t
if buf:
    chunks.append((t0, t1, "\n".join(buf)))

if DEBUG_MODE: print(f"→ Generated {len(chunks)} segments (target ~{chunk_target} tokens per segment)")

# ===== Common: Streaming Tools (No regex cleaning; use correct stop sequence) =====
def llm_stream(messages, max_tokens):
    # ChatML messages end with <|im_end|>; use stop to cut off, preventing the closing tag from being written to the file
    gen = llm.create_chat_completion(
        messages=messages,
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repeat_penalty),
        max_tokens=int(max_tokens),
        stream=True,
        stop=["<|im_end|>"],  # Key: Prevent outputting the ending template
    )
    for ev in gen:
        # Compatible with different fields
        piece = ""
        try:
            piece = ev["choices"][0]["delta"].get("content", "")
        except Exception:
            piece = ev["choices"][0].get("text", "")
        if piece:
            yield piece

# ===== 5) Segment Summary (map) =====
if DEBUG_MODE: print("[Summary 5/6] Segment summarization (map) ...")
live = display(Markdown(""), display_id=True)
maps: List[str] = []

for i, (s, e, body) in enumerate(chunks, 1):
    pct = i / max(len(chunks),1) * 100
    sys.stdout.write(f"  - 處理分段 {i}/{len(chunks)}（~{pct:.1f}%）\n"); sys.stdout.flush()

    # Shrink to safe budget before sending (prevent prompt+segment from exceeding window and causing model to terminate early)
    budget_tokens = max(512, ctx_window - map_max_new_tokens - prompt_overhead)
    def shrink_to_budget(text: str, budget_tokens: int) -> str:
        cur = text
        for _ in range(6):
            if count_tokens_text(cur) <= budget_tokens:
                return cur
            keep = max(800, int(len(cur) * 0.85))
            cur = cur[:keep]
        return cur
    body2 = shrink_to_budget(body, budget_tokens)

    user_txt = MAP_USER_TMPL.format(topic=(topic_hint or "（無）"), chunk=body2)
    messages = [
        {"role": "system", "content": SYSTEM_INSTR},
        {"role": "user",   "content": user_txt},
    ]

    part_buf = [] # Reset part_buf for each segment
    # removed shown variable as it's not used in the loop condition or logic
    for token in llm_stream(messages, map_max_new_tokens):
        part_buf.append(token)
        # Update live display and terminal character count periodically
        if len(part_buf) % 24 == 0:
            cur_txt = "".join(part_buf)
            live.update(Markdown(cur_txt))
            sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()
    cur_txt = "".join(part_buf)
    live.update(Markdown(cur_txt))
    sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()

    # Include the model's final output directly, no regex cleaning
    maps.append(cur_txt.strip())

if DEBUG_MODE: print("→ Segment summarization complete")

# ===== 6) Consolidate (reduce) & Only write .md (Summary) =====
if DEBUG_MODE: print("[Summary 6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

out_dir = Path(summary_output_dir); out_dir.mkdir(parents=True, exist_ok=True)
out_md = out_dir / f"{Path(summary_srt_path_abs).stem}_summary.md" # Use the stem from the actual SRT file used for summarization
with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
→ 當前工作目錄：/content/gdrive/MyDrive
→ 來源檔：/content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4
→ 輸出資料夾：/content/gdrive/MyDrive/whisper
[  1%] 00:00:00,000 → 00:00:29,980  Teksting av Nicolai Winther
[  1%] 00:00:20,000 → 00:00:49,980  Teksting av Nicolai Winther
[  2%] 00:00:40,000 → 00:01:09,980  Teksting av Nicolai Winther
[  2%] 00:01:00,000 → 00:01:29,980  Teksting av Nicolai Winther
[  2%] 00:01:20,000 → 00:01:49,980  Teksting av Nicolai Winther
[  3%] 00:01:40,000 → 00:02:09,980  Teksting av Nicolai Winther
[  3%] 00:02:00,000 → 00:02:29,980  Teksting av Nicolai Winther
[  4%] 00:02:20,000 → 00:02:49,980  Teksting av Nicolai Winther
[  5%] 00:03:00,000 → 00:03:29,980  Teksting av Nicolai Winther
[  5%] 00:03:20,000 → 00:03:49,980  Teksting av Nicolai Winther
[  6%] 00:03:40,000 → 00:04:09,980  Teksting av Nicolai Winther
[  6%] 00:04

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

llama_model_load_from_file_impl: using device CUDA0 (Tesla T4) - 14974 MiB free
llama_model_loader: loaded meta data with 37 key-value pairs and 459 tensors from /root/.cache/huggingface/hub/models--unsloth--gpt-oss-20b-GGUF/snapshots/c6cedd4259adbfe7e4d4d983a0400bf4cc38e7db/gpt-oss-20b-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = gpt-oss
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Gpt-Oss-20B
llama_model_loader: - kv   3:                           general.basename str              = Gpt-Oss-20B
llama_model_loader: - kv   4:                       general.quantized_by str              = Unsloth
llama_model_loader: - kv   5:                         general.size_la

**會議摘要**

**1. 主要討論內容**  
- 會議中提到「可再想想看」的方案，並未決定具體行動。  
- 討討開設線上號、專門輸入內容之計畫，並確認無問題。  
- 會議者提出將帳號先開，後續做實驗，並說「直接先開帳號」  
- **重點：** 會議決定先開帳號，並以 YouTube、IG、Threads 等平台推廣  
- **重點**： 其實務流程：先開帳號，先前往下列…  

> …….  
We need to produce a concise summary of the transcript. The transcript is messy, but we need to extract key points: time, persons, tasks, conclusions, unresolved items, actions. Provide bullet list with headings. 500-900 Chinese characters? Actually 500-900 words? The instruction says 500–900 字 (characters). So we need about 500-900 Chinese characters. That's roughly 300-400 words. Provide bullet points.

We must avoid system messages, no brackets, no English. Use Chinese. Provide headings and bullet items. Ensure no extraneous commentary. Provide final answer. Let's craft.

We need to identify key participants: seems like speaker is "Nicolai Winther" maybe? Actually transcript includes "Teksting av Nicolai Winther". So maybe the speaker is Nicolai Winther. Also mention "老孫" etc. But we can just refer to "會議者".

Key points:

- They discuss opening an online account, possibly for a brand or company.
- They plan to use YouTube, IG, Threads to promote content, free tools, etc.
- They talk about using a free tool for students, maybe physics? They mention "物理" but not sure.
- They mention "成學文教有限公司" as company name; "陰謀" maybe brand name; ask if trademark application.
- They discuss registration of account, no plan to register now.
- They talk about verifying dynamic verification code for phone, using OpenAI and IG messages.
- They talk about building a channel, naming it "工程部門".
- They mention Gmail account used is personal.
- They talk about Notion integration.
- They mention "notion" and "Threads" and "IG" etc.

We need to

  - 處理分段 1/5（~20.0%）
    ↳ 分段 1 已產生字元：35
    ↳ 分段 1 已產生字元：67
    ↳ 分段 1 已產生字元：102
    ↳ 分段 1 已產生字元：130
    ↳ 分段 1 已產生字元：161
    ↳ 分段 1 已產生字元：198
    ↳ 分段 1 已產生字元：223
    ↳ 分段 1 已產生字元：265
    ↳ 分段 1 已產生字元：362
    ↳ 分段 1 已產生字元：499
    ↳ 分段 1 已產生字元：622
    ↳ 分段 1 已產生字元：736
    ↳ 分段 1 已產生字元：860
    ↳ 分段 1 已產生字元：970
    ↳ 分段 1 已產生字元：1086
    ↳ 分段 1 已產生字元：1179
    ↳ 分段 1 已產生字元：1279
    ↳ 分段 1 已產生字元：1413
    ↳ 分段 1 已產生字元：1527
    ↳ 分段 1 已產生字元：1649
    ↳ 分段 1 已產生字元：1764


llama_perf_context_print:        load time =    6021.50 ms
llama_perf_context_print: prompt eval time =    6020.76 ms /  3472 tokens (    1.73 ms per token,   576.67 tokens per second)
llama_perf_context_print:        eval time =   12830.96 ms /   511 runs   (   25.11 ms per token,    39.83 tokens per second)
llama_perf_context_print:       total time =   21593.70 ms /  3983 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 1 已產生字元：1767
  - 處理分段 2/5（~40.0%）


Llama.generate: 140 prefix-match hit, remaining 3358 prompt tokens to eval


    ↳ 分段 2 已產生字元：35
    ↳ 分段 2 已產生字元：61
    ↳ 分段 2 已產生字元：89


llama_perf_context_print:        load time =    6021.50 ms
llama_perf_context_print: prompt eval time =    5018.20 ms /  3358 tokens (    1.49 ms per token,   669.16 tokens per second)
llama_perf_context_print:        eval time =    2094.65 ms /    84 runs   (   24.94 ms per token,    40.10 tokens per second)
llama_perf_context_print:       total time =    7508.46 ms /  3442 tokens
llama_perf_context_print:    graphs reused =         81


    ↳ 分段 2 已產生字元：103
  - 處理分段 3/5（~60.0%）


Llama.generate: 140 prefix-match hit, remaining 3325 prompt tokens to eval


    ↳ 分段 3 已產生字元：51
    ↳ 分段 3 已產生字元：92
    ↳ 分段 3 已產生字元：125
    ↳ 分段 3 已產生字元：163
    ↳ 分段 3 已產生字元：192
    ↳ 分段 3 已產生字元：228
    ↳ 分段 3 已產生字元：263
    ↳ 分段 3 已產生字元：292
    ↳ 分段 3 已產生字元：328
    ↳ 分段 3 已產生字元：406
    ↳ 分段 3 已產生字元：528
    ↳ 分段 3 已產生字元：631
    ↳ 分段 3 已產生字元：747
    ↳ 分段 3 已產生字元：845
    ↳ 分段 3 已產生字元：950
    ↳ 分段 3 已產生字元：1054
    ↳ 分段 3 已產生字元：1119
    ↳ 分段 3 已產生字元：1159
    ↳ 分段 3 已產生字元：1194
    ↳ 分段 3 已產生字元：1230


llama_perf_context_print:        load time =    6021.50 ms
llama_perf_context_print: prompt eval time =    4832.92 ms /  3325 tokens (    1.45 ms per token,   687.99 tokens per second)
llama_perf_context_print:        eval time =   12605.84 ms /   511 runs   (   24.67 ms per token,    40.54 tokens per second)
llama_perf_context_print:       total time =   20395.79 ms /  3836 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 3 已產生字元：1259
  - 處理分段 4/5（~80.0%）


Llama.generate: 140 prefix-match hit, remaining 3387 prompt tokens to eval


    ↳ 分段 4 已產生字元：39
    ↳ 分段 4 已產生字元：76
    ↳ 分段 4 已產生字元：165
    ↳ 分段 4 已產生字元：282
    ↳ 分段 4 已產生字元：383
    ↳ 分段 4 已產生字元：483
    ↳ 分段 4 已產生字元：602
    ↳ 分段 4 已產生字元：716
    ↳ 分段 4 已產生字元：818
    ↳ 分段 4 已產生字元：937
    ↳ 分段 4 已產生字元：1056
    ↳ 分段 4 已產生字元：1138
    ↳ 分段 4 已產生字元：1256
    ↳ 分段 4 已產生字元：1379
    ↳ 分段 4 已產生字元：1527
    ↳ 分段 4 已產生字元：1643
    ↳ 分段 4 已產生字元：1763
    ↳ 分段 4 已產生字元：1883
    ↳ 分段 4 已產生字元：1990
    ↳ 分段 4 已產生字元：2112
    ↳ 分段 4 已產生字元：2217


llama_perf_context_print:        load time =    6021.50 ms
llama_perf_context_print: prompt eval time =    4913.71 ms /  3387 tokens (    1.45 ms per token,   689.30 tokens per second)
llama_perf_context_print:        eval time =   12628.29 ms /   511 runs   (   24.71 ms per token,    40.46 tokens per second)
llama_perf_context_print:       total time =   20415.67 ms /  3898 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 4 已產生字元：2257
  - 處理分段 5/5（~100.0%）


Llama.generate: 141 prefix-match hit, remaining 1792 prompt tokens to eval


    ↳ 分段 5 已產生字元：35
    ↳ 分段 5 已產生字元：63
    ↳ 分段 5 已產生字元：93
    ↳ 分段 5 已產生字元：122
    ↳ 分段 5 已產生字元：162
    ↳ 分段 5 已產生字元：192
    ↳ 分段 5 已產生字元：290
    ↳ 分段 5 已產生字元：408
    ↳ 分段 5 已產生字元：501
    ↳ 分段 5 已產生字元：605
    ↳ 分段 5 已產生字元：725
    ↳ 分段 5 已產生字元：849
    ↳ 分段 5 已產生字元：953
    ↳ 分段 5 已產生字元：1026
    ↳ 分段 5 已產生字元：1136
    ↳ 分段 5 已產生字元：1234
    ↳ 分段 5 已產生字元：1316
    ↳ 分段 5 已產生字元：1426
    ↳ 分段 5 已產生字元：1546
    ↳ 分段 5 已產生字元：1649
    ↳ 分段 5 已產生字元：1735


llama_perf_context_print:        load time =    6021.50 ms
llama_perf_context_print: prompt eval time =    2389.87 ms /  1792 tokens (    1.33 ms per token,   749.83 tokens per second)
llama_perf_context_print:        eval time =   11918.83 ms /   511 runs   (   23.32 ms per token,    42.87 tokens per second)
llama_perf_context_print:       total time =   17171.92 ms /  2303 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 5 已產生字元：1752


**會議筆記（Markdown，繁體）**

---

## 整體提要  
- 會議主題為「開設線上號並推廣內容」；  
- 主要平台包括 YouTube、IG、Threads；  
- 討討使用免費工具與 AI 產生內容；  
- 需確認帳號名稱、商標及資料保護；  
- 會議決定先開帳號，後續實驗推廣策略。  

---

## 章節要點（含時間脈絡）

| 時間 | 要點 |
|------|------|
| 0:00 | 會議者提到「可再想想看」的方案，未決定具體行動。 |
| 0:05 | 討討開設線上號、專門輸入內容之計畫，並確認無問題。 |
| 0:10 | 會議者提出將帳號先開，後續做實驗，說「直接先開帳號」。 |
| 0:15 | 會議決定先開帳號，並以 YouTube、IG、Threads 等平台推廣。 |
| 0:20 | 其實務流程：先開帳號，先前往下列…（未完整說明）。 |
| 0:25 | 會議者提到「成學文教有限公司」與「陰謀」作為公司名，詢問是否已申請商標。 |
| 0:30 | 會議者表示目前不打算註冊帳號，先以 Gmail 個人帳號做測試。 |
| 0:35 | 會議者說明將使用 Notion、Threads、IG 等工具來管理與推廣內容。 |
| 0:40 | 會議者提到「動態驗證碼」的流程，並說要用 OpenAI 及 IG 訊息確認。 |
| 0:45 | 會議者說明將建立「工程部門」為頻道名稱。 |

---

## 可執行重點（具體待辦）

- **開設線上號**：先以 Gmail 個人帳號做測試，後續正式註冊。  
- **確認商標**：查詢「陰謀」是否已申請商標，並確保不侵權。  
- **設定平台**：決定 YouTube、IG、Threads 為主要推廣平台。  
- **管理工具**：整合 Notion 以管理內容與進度。  
- **驗證流程**：使用 OpenAI 及 IG 訊息確認動態驗證碼。  
- **頻道名稱**：確定「工程部門」為正式頻道名稱。  

---

Llama.generate: 129 prefix-match hit, remaining 2330 prompt tokens to eval


    ↳ 彙整 已產生字元：43
    ↳ 彙整 已產生字元：80
    ↳ 彙整 已產生字元：122
    ↳ 彙整 已產生字元：150
    ↳ 彙整 已產生字元：189
    ↳ 彙整 已產生字元：228
    ↳ 彙整 已產生字元：258
    ↳ 彙整 已產生字元：288
    ↳ 彙整 已產生字元：315
    ↳ 彙整 已產生字元：350
    ↳ 彙整 已產生字元：388
    ↳ 彙整 已產生字元：418
    ↳ 彙整 已產生字元：447
    ↳ 彙整 已產生字元：477
    ↳ 彙整 已產生字元：511
    ↳ 彙整 已產生字元：554
    ↳ 彙整 已產生字元：586
    ↳ 彙整 已產生字元：620
    ↳ 彙整 已產生字元：658
    ↳ 彙整 已產生字元：690
    ↳ 彙整 已產生字元：721
    ↳ 彙整 已產生字元：768
    ↳ 彙整 已產生字元：810
    ↳ 彙整 已產生字元：850


llama_perf_context_print:        load time =    6021.50 ms
llama_perf_context_print: prompt eval time =    3346.51 ms /  2330 tokens (    1.44 ms per token,   696.25 tokens per second)
llama_perf_context_print:        eval time =   14968.27 ms /   606 runs   (   24.70 ms per token,    40.49 tokens per second)
llama_perf_context_print:       total time =   21624.40 ms /  2936 tokens
llama_perf_context_print:    graphs reused =        586


    ↳ 彙整 已產生字元：878
→ 完成 ✅  /content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8)_summary.md


## Unified user interface

### Subtask:
Design a single Colab form at the top of the merged cell that includes all relevant parameters from both original forms. Group related parameters logically for better user experience.


**Reasoning**:
Design and generate the unified Colab form based on the parameters identified in the original cells, grouping them logically using Markdown.



In [None]:
# -*- coding: utf-8 -*-
# =========================================================
# Whisper Automatic Subtitle Generation: GPU Transcription + CPU Denoising + OpenCC Post-processing (Traditional/Simplified Conversion)
# And LLM Summarization (GPT-OSS-20B / llama.cpp / CUDA)
# - Transcription: faster-whisper (CUDA, compute: int8_float16→float16→int8)
# - Denoising: ffmpeg afftdn (CPU)
# - Progress: Real-time printing of "current sentence + video total length percentage"
# - Network source download and output: MyDrive/whisper; Files in Drive: Output to the same folder
# - LLM Summary: llama.cpp + GPT-OSS-20B GGUF for summarizing transcription
# - Prompts "Delete runtime and restart" if download is blocked or abnormal
# =========================================================

# Restrict multithreading (more stable)
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# [1/8] Mount Google Drive
from google.colab import drive
try:
    drive.mount("/content/gdrive")
except:
    drive.mount("/content/gdrive", force_remount=True)

# Consolidated Imports
import sys, gc, shutil, datetime, subprocess as sp
from pathlib import Path
import re, math, time, importlib, textwrap
from typing import List, Tuple
from IPython.display import display, Markdown
import soundfile as sf
from faster_whisper import WhisperModel
from opencc import OpenCC
import srt as _srt # Import srt as _srt to avoid name conflict later with the module itself
from huggingface_hub import snapshot_download

ROOT = Path("/content/gdrive/MyDrive")
WHISPER_DIR = ROOT / "whisper"
WHISPER_DIR.mkdir(exist_ok=True, parents=True)
os.chdir(ROOT)
print(f"→ 當前工作目錄：{os.getcwd()}")

# [2/8] User Form Parameters (Unified)
#@markdown # Whisper Transcription & LLM Summary Pipeline

#@markdown ## Input & Transcription Settings
#@markdown **Input Source:** Google Drive file (relative to MyDrive) or video URL (YouTube/HTTP).
filename = "whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4"  #@param {type:"string"}
#@markdown **Download Option:** Check to save network source files to `MyDrive/whisper`.
save_video_to_google_drive = True  #@param {type:"boolean"}
#@markdown **Whisper Model Size:** Choose a model size. `large-v3` requires more GPU VRAM; `medium` is a good alternative if VRAM is limited.
model_size = "large-v3"  #@param ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
#@markdown **Language:** Select the language for transcription. "自動偵測" (Auto-detect) is usually sufficient.
language = "自動偵測"  #@param ["自動偵測", "中文", "英文"]
#@markdown **Denoising:** Apply CPU-based denoising to the audio before transcription. `afftdn` is recommended.
denoise_method = "afftdn (建議)"  #@param ["afftdn (建議)", "none"]
#@markdown **Text Post-processing (OpenCC):** Convert the transcribed text (SRT/TXT output) between Simplified and Traditional Chinese variants.
text_postprocess = "臺灣繁體中文（預設）"  #@param ["臺灣繁體中文（預設）","香港繁體中文","大陸簡體中文","關閉"]
#@markdown **YouTube Cookies (Optional):** Path to a Netscape-format cookies file (relative to MyDrive) for accessing age-restricted or member-only YouTube videos (e.g., `cookies/youtube.txt`).
youtube_cookies_txt_path = ""  #@param {type:"string"}

#@markdown ## Summarization Settings
#@markdown **SRT Input:** Path to the SRT file for summarization (relative to MyDrive or absolute). Leave empty to use the SRT generated by the transcription step above.
summary_srt_path = ""  #@param {type:"string"}
#@markdown **Topic Hint (Optional):** Provide a brief hint about the topic to guide the summarization process.
topic_hint = ""  #@param {type:"string"}

#@markdown ## Output Paths
#@markdown **Transcription Output Directory:** Directory where the generated SRT and TXT files will be saved (relative to MyDrive or absolute). Default is the input file's directory for local files, or `MyDrive/whisper` for network sources. This is determined automatically.
# (Note: filename's directory is used if local, otherwise WHISPER_DIR. This parameter is more of an indicator of the default output base.)
#@markdown **Summary Output Directory:** Directory where the final summary Markdown file will be saved (relative to MyDrive or absolute).
summary_output_dir = "/content/gdrive/MyDrive/whisper"  #@param {type:"string"}


language_code_map = {"自動偵測": None, "中文":"zh", "英文":"en"}
language_code = language_code_map[language]

# Developer Options (Do not put in Markdown form)
# These options allow fine-tuning parameters without affecting normal operation.
DEBUG_MODE = False # Set to True for more detailed logging

# --- Transcription Parameters ---
TRANSCRIPTION_BEAM_SIZE_PRIMARY = 3
TRANSCRIPTION_CHUNK_LENGTH_PRIMARY = 20
TRANSCRIPTION_BEAM_SIZE_FALLBACK = 1 # Used if primary fails
TRANSCRIPTION_CHUNK_LENGTH_FALLBACK = 15 # Used if primary fails

# --- Denoising Parameters ---
DENOISE_NOISE_FLOOR_DB = -25

# --- Filtering Parameters ---
FILTER_MIN_DURATION_SHORT = 1.5 # Minimum duration for short segments
FILTER_AVG_LOGPROB_THRESHOLD = -1.0 # Avg log probability threshold for short segments
FILTER_MIN_DURATION_SPEECH_PROB = 2.0 # Minimum duration for speech probability filtering
FILTER_NO_SPEECH_PROB_THRESHOLD = 0.6 # No speech probability threshold

# --- Summary Model Parameters ---
REPO_ID   = "unsloth/gpt-oss-20b-GGUF"   # GGUF Model Repository
GGUF_FILE = "gpt-oss-20b-Q4_K_M.gguf"    # Approx. 10.8GiB, T4 can run

# --- Summary Inference Parameters (Increase available generation space to avoid truncation) ---
ctx_window            = 8192
map_max_new_tokens    = 512   # Segment output: original 256 -> 512 (approx. 350-450 chars)
reduce_max_new_tokens = 1024  # Summary output: original 512 -> 1024 (approx. 700-900+ chars)
temperature           = 0.2
top_p                 = 0.9
repeat_penalty        = 1.05


# [3/8] Install Dependencies
# Combine installation steps from both original cells
if DEBUG_MODE: print("[Install] faster-whisper / yt-dlp / soundfile / opencc / srt / huggingface_hub / llama-cpp-python ...")

def pip_install(pkgs, extra_args=None, env=None):
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if extra_args:
        cmd += extra_args
    cmd += pkgs
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True, env=env)

# Install common dependencies first
common_missing = []
try: import srt # check srt module directly after import as _srt
except ModuleNotFoundError: common_missing.append("srt>=3.5.3")
try: from huggingface_hub import snapshot_download # check huggingface_hub module directly
except ModuleNotFoundError: common_missing.append("huggingface_hub>=0.23.0")
try: import soundfile # check soundfile
except ModuleNotFoundError: common_missing.append("soundfile")
try: import opencc # check opencc
except ModuleNotFoundError: common_missing.append("opencc-python-reimplemented")

if common_missing:
    if DEBUG_MODE: print("→ Installing common missing packages:", ", ".join(common_missing))
    r = pip_install(common_missing)
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("基礎依賴安裝失敗，請重啟執行階段後重試。")

# Install faster-whisper and yt-dlp separately as they were in the first cell
try: from faster_whisper import WhisperModel # check faster_whisper
except ModuleNotFoundError:
    if DEBUG_MODE: print("→ Installing missing package: faster-whisper yt-dlp")
    r = pip_install(["faster-whisper", "yt-dlp"])
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("faster-whisper / yt-dlp 安裝失敗，請重啟執行階段後重試。")


def suggest_runtime_reset():
    print("\n🧹 建議動作（Colab）")
    print("1) 依序：『執行階段 Runtime』 → 『刪除執行階段/還原出廠設定 Factory reset runtime』")
    print("2) 重新執行本 Notebook（從掛載雲端硬碟那格開始）\n", flush=True)

def run_cmd(cmd:list, check=True):
    if DEBUG_MODE: print("  $", " ".join(cmd))
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if check and p.returncode != 0:
        raise RuntimeError(f"命令失敗：{' '.join(cmd)}")
    return p

def is_youtube_url(s:str)->bool:
    return isinstance(s, str) and ("youtu.be" in s or "youtube.com" in s)
def is_http_url(s:str)->bool:
    return isinstance(s, str) and s.lower().startswith("http")
def to_abs_mydrive(p:str)->Path:
    return (Path(p) if p.startswith("/") else (ROOT / p)).resolve()
def fmt_ts_srt(t:float)->str:
    h = int(t//3600); m = int((t%3600)//60); s = t - h*3600 - m*60
    return f"{h:02d}:{m:02d}:{int(s):02d},{int(round((s-int(s))*1000)):03d}"
def verify_wav_ok(path: Path)->bool:
    try:
        info = sf.info(str(path))
        return info.samplerate > 0 and info.channels in (1, 2)
    except Exception:
        return False

# OpenCC converter setup
def build_opencc_pipeline(choice:str):
    if choice.startswith("臺灣"):
        return [OpenCC('s2t'), OpenCC('t2tw')]
    if choice.startswith("香港"):
        return [OpenCC('s2t'), OpenCC('t2hk')]
    if choice.startswith("大陸"):
        return [OpenCC('t2s')]
    return []  # Disable

def apply_opencc(text:str, pipeline)->str:
    for cc in pipeline:
        text = cc.convert(text)
    return text

def ytdl(yturl:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting YouTube video ...")
    # Use sp.run instead of subprocess.run directly
    p = sp.run(["yt-dlp", "-f", "mp4", "-o", str(tmp / "%(title)s.%(ext)s"), yturl], stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if p.returncode != 0:
        if "Sign in to confirm" in (p.stdout or ""):
            print("\n❗YouTube 要求登入/驗證，請提供 cookies 或先自行下載到雲端硬碟。")
        print("🔄 若多次失敗，請刪除執行階段並重啟後重試。")
        suggest_runtime_reset()
        raise RuntimeError("yt-dlp 下載失敗")
    files = list(tmp.glob("*"))
    if not files:
        print("🔄 下載為空，建議刪除執行階段再重試。")
        suggest_runtime_reset()
        raise FileNotFoundError("YouTube 下載失敗：/tmp/dl 為空")
    f = files[0]
    if save_video_to_google_drive:
        shutil.copy2(f, WHISPER_DIR / f.name)
    return f

def http_dl(url:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting HTTP(S) video ...")
    run_cmd(["curl", "-L", "-o", str(out), url])
    if save_video_to_google_drive:
        shutil.copy2(out, WHISPER_DIR / out.name)
    return out

# Extract audio: ffmpeg -> 16k/mono WAV
def ffmpeg_extract_wav(in_path:Path, out_wav:Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_path),"-vn","-ac","1","-ar",str(sr),"-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 轉 WAV 失敗")

# CPU Denoising: ffmpeg afftdn
def ffmpeg_afftdn(in_wav: Path, out_wav: Path, noise_floor_db=DENOISE_NOISE_FLOOR_DB):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-af",f"afftdn=nf={noise_floor_db}",
           "-ac","1","-ar","16000","-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg afftdn 失敗")

# Safeguard: Repack WAV header if format is strange
def ffmpeg_repack_wav(in_wav: Path, out_wav: Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-acodec","pcm_s16le","-ac","1","-ar",str(sr),str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 重包 WAV 失敗")

# [4/8] Parse Source (Transcription)
if DEBUG_MODE: print("[4/8] Parsing input source ...")
try:
    if is_youtube_url(filename):
        src_path = ytdl(filename); out_base_dir = WHISPER_DIR
    elif is_http_url(filename):
        src_path = http_dl(filename); out_base_dir = WHISPER_DIR
    else:
        src_path = to_abs_mydrive(filename)
        if not src_path.exists(): raise FileNotFoundError(f"找不到檔案：{src_path}")
        out_base_dir = src_path.parent
except Exception as e:
    print(f"\n⛔ 來源解析/下載失敗：{e}")
    print("🔄 請刪除執行階段並重新啟動後重跑。"); suggest_runtime_reset(); raise

print(f"→ 來源檔：{src_path}")
print(f"→ 輸出資料夾：{out_base_dir}")

# [5/8] Extract Audio & CPU Denoising (Transcription)
AUDIO_16K = Path("/tmp/audio_16k.wav")
if DEBUG_MODE: print("[5/8] Extracting audio (ffmpeg → 16k/mono WAV) ...")
ffmpeg_extract_wav(src_path, AUDIO_16K, sr=16000)

if denoise_method.startswith("afftdn"):
    if DEBUG_MODE: print("[5.5/8] Denoising (ffmpeg afftdn, CPU) ...")
    DENOISED = Path("/tmp/audio_16k_denoised.wav")
    ffmpeg_afftdn(AUDIO_16K, DENOISED, noise_floor_db=DENOISE_NOISE_FLOOR_DB)
    denoised_audio = DENOISED if verify_wav_ok(DENOISED) else AUDIO_16K
else:
    denoised_audio = AUDIO_16K

if not verify_wav_ok(denoised_audio):
    if DEBUG_MODE: print("  - 音訊格式異常；嘗試重包 WAV ...")
    FIXED = Path("/tmp/audio_16k_fixed.wav")
    ffmpeg_repack_wav(denoised_audio, FIXED, sr=16000)
    denoised_audio = FIXED

if DEBUG_MODE: print(f"→ 最終輸入音訊：{denoised_audio}")

# [6/8] Load faster-whisper (GPU enforced)
if DEBUG_MODE: print("[6/8] Loading faster-whisper model (GPU) ...")
device = "cuda"  # Enforce GPU
model = None; last_err = None
for ctype in ["int8_float16", "float16", "int8"]:
    try:
        if DEBUG_MODE: print(f"  - Trying compute_type={ctype}")
        model = WhisperModel(model_size, device=device, compute_type=ctype)
        if DEBUG_MODE: print("  - Model loaded successfully")
        break
    except Exception as e:
        last_err = e
        if DEBUG_MODE: print(f"  - Load failed: {e}")
if model is None:
    print("\n⛔ GPU 模型載入失敗。請確認『變更執行階段類型』選了 GPU（T4/A100），或刪除執行階段後重試。")
    suggest_runtime_reset()
    raise RuntimeError(f"無法載入模型：{last_err}")

gc.collect()  # Clean up before transcription (safety)

# [7/8] Transcribe (GPU; real-time progress per segment)
if DEBUG_MODE: print(f"[7/8] Starting transcription (GPU: beam={TRANSCRIPTION_BEAM_SIZE_PRIMARY} / chunk={TRANSCRIPTION_CHUNK_LENGTH_PRIMARY}s / no VAD) ...")

def transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY):
    return model.transcribe(
        str(denoised_audio),
        task="transcribe",
        language=language_code,
        temperature=0.0,
        condition_on_previous_text=False,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        beam_size=_beam,
        chunk_length=_chunk,
        vad_filter=False,
        word_timestamps=False
    )

try:
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY)
except Exception as e:
    if DEBUG_MODE: print(f"  - First transcription failed: {e}\n    → Trying more conservative (beam={TRANSCRIPTION_BEAM_SIZE_FALLBACK}, chunk={TRANSCRIPTION_CHUNK_LENGTH_FALLBACK}) ...")
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_FALLBACK, _chunk=TRANSCRIPTION_CHUNK_LENGTH_FALLBACK)

# Display percentage based on total video duration
duration = float(getattr(info, "duration", 0.0) or 0.0)
if duration <= 0: duration = 1.0

segments = []
filtered = []

if DEBUG_MODE:
    print(f"  - Detected language: {getattr(info,'language','未知')} (p={getattr(info,'language_probability',0):.2f})")
    print(f"  - Audio length: {duration:.2f}s")

for s in seg_iter:
    pct = int(min(100, round((s.end / duration) * 100)))
    print(f"[{pct:3d}%] {fmt_ts_srt(s.start)} → {fmt_ts_srt(s.end)}  {s.text.strip()}", flush=True)
    segments.append(s)

    # Low confidence/high no-speech short segment filtering (no blacklist)
    keep = True
    seg_dur = float(s.end - s.start)
    if seg_dur < FILTER_MIN_DURATION_SHORT and getattr(s, "avg_logprob", None) is not None and s.avg_logprob < FILTER_AVG_LOGPROB_THRESHOLD:
        keep = False
    if seg_dur < FILTER_MIN_DURATION_SPEECH_PROB and getattr(s, "no_speech_prob", None) is not None and s.no_speech_prob > FILTER_NO_SPEECH_PROB_THRESHOLD:
        keep = False
    if keep:
        filtered.append(s)

if DEBUG_MODE: print(f"  - Number of segments: Before filtering {len(segments)} → After filtering {len(filtered)}")

# ---- OpenCC Normalization (for output text) ----
pipeline = build_opencc_pipeline(text_postprocess)
def norm(txt: str) -> str:
    return apply_opencc(txt, pipeline) if pipeline else txt

# [8/8] Output (text after OpenCC)
print("[8/8] 輸出 SRT / TXT ...")
out_dir = out_base_dir; out_dir.mkdir(exist_ok=True, parents=True)
stem = src_path.stem
SRT = out_dir / f"{stem}.srt"; TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")


# ===== Summarization Logic Starts Here =====

# If summary_srt_path is empty, use the output from the transcription step
if not summary_srt_path:
    summary_srt_path_abs = SRT # Use the SRT path generated by the transcription
    if DEBUG_MODE: print(f"Using SRT from transcription step: {summary_srt_path_abs}")
else:
    summary_srt_path_abs = to_abs_mydrive(summary_srt_path)


# ===== Summary 1/6) Check GPU and Install Dependencies (llama-cpp-python specific) =====
# llama-cpp-python installation logic - Keep this separate as it has specific CUDA requirements
if DEBUG_MODE: print("[Summary 1/6] Checking GPU and installing llama-cpp-python ...")

def detect_cuda_tag():
    try:
        out = sp.check_output(["nvidia-smi"], text=True)
        m = re.search(r"CUDA Version:\s*([\d.]+)", out)
        if not m:
            return "cu124"
        major, minor = [int(x) for x in m.group(1).split(".")[:2]]
        if major > 12 or (major == 12 and minor >= 5):
            return "cu125"
        return "cu124"
    except Exception:
        return "cu124"

cuda_tag = detect_cuda_tag()
if DEBUG_MODE: print(f"GPU 0: Detected CUDA version tag {cuda_tag}")

def try_import_llama():
    try:
        from llama_cpp import Llama
        return Llama
    except ModuleNotFoundError:
        return None

Llama = try_import_llama()
if Llama is None:
    # Keep your existing installation strategy: extra-index -> fallback to source compilation on failure
    candidates = [cuda_tag, "cu125", "cu124", "cu122", "cu121"]
    ok = False
    for tag in candidates:
        idx = f"https://abetlen.github.io/llama-cpp-python/whl/{tag}"
        if DEBUG_MODE: print(f"→ Attempting to install llama-cpp-python ({tag}) ...")
        r = pip_install(["llama-cpp-python"], extra_args=["--extra-index-url", idx])
        if r.returncode == 0:
            Llama = try_import_llama()
            if Llama is not None:
                ok = True
                break
        else:
            if DEBUG_MODE: print("  ✗ Installation failed (summary):", "\n".join(r.stdout.splitlines()[-5:]))
    if not ok:
        if DEBUG_MODE: print("→ Pre-compiled wheels not available, switching to 'source compilation (CUDA=ON)' ... (takes longer)")
        try:
            import ninja # noqa: F401 # Import ninja to check if installed
        except ModuleNotFoundError:
            if DEBUG_MODE: print("→ Installing missing package: ninja")
            r = pip_install(["ninja"])
            if r.returncode != 0:
                if DEBUG_MODE: print(r.stdout)
                raise RuntimeError("安裝 ninja 失敗。請重啟後重試。")
        env = os.environ.copy()
        env["CMAKE_ARGS"] = "-DGGML_CUDA=on -DLLAMA_CUBLAS=on"
        env["FORCE_CMAKE"] = "1"
        r = pip_install(["llama-cpp-python"], env=env)
        if r.returncode != 0:
            if DEBUG_MODE: print(r.stdout)
            raise RuntimeError("無法安裝 GPU 版 llama-cpp-python。")
        Llama = try_import_llama()

# ===== Summary 2/6) Read SRT (Summary) =====
if DEBUG_MODE: print("[Summary 2/6] Reading SRT ...")
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"
with open(summary_srt_path_abs, "r", encoding="utf-8") as f:
    srt_text = f.read()
subs = list(_srt.parse(srt_text)) # Use _srt as srt module was imported as _srt
def td2s(td): return td.total_seconds()
segments = []
for it in subs:
    txt = it.content.strip()
    if not txt: continue
    segments.append((td2s(it.start), td2s(it.end), txt))
total_secs = (segments[-1][1] - segments[0][0]) if segments else 0
if DEBUG_MODE: print(f"→ Number of subtitle segments: {len(segments)}；Video length (est): {total_secs/60:.1f} minutes")


# ===== Summary 3/6) Download and Load GGUF Model (Summary) =====
if DEBUG_MODE: print("[Summary 3/6] Loading GPT-OSS-20B (GGUF, CUDA) ...")
local_repo = snapshot_download(REPO_ID, allow_patterns=[GGUF_FILE])
gguf_path = str(Path(local_repo)/GGUF_FILE)

llm = Llama(
    model_path=gguf_path,
    n_ctx=ctx_window,
    n_gpu_layers=-1,
    seed=0,
    logits_all=False,
    verbose=True,          # Display the actual chat format used
    chat_format="chatml",  # Directly override the GGUF built-in Unsloth template to avoid outputting <|channel|> tags
)
if DEBUG_MODE: print("→ Model loaded successfully (GPU)")

# ===== Summary 4/6) Token-aware Segmentation (Summary) =====
if DEBUG_MODE: print("[Summary 4/6] Generating segments (token-aware; single segment ≤ safety limit) ...")

def count_tokens_text(text: str) -> int:
    return len(llm.tokenize(text.encode("utf-8")))

SYSTEM_INSTR = (
  "你是一個會議總結機器人。根據使用者提供的逐字稿（可能雜訊、重複、錯字），"
  "請去除雜訊與重複、嚴守事實、不腦補。遇到不明確資訊以「待補充／未明確」標註。"
  "輸出為 Markdown（繁體中文），不要輸出任何系統／思考標記。"
)

# — Segment Summary Prompt: More concise request, avoid verbosity and system language
MAP_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是逐字稿片段（非完整全文）：
{chunk}

請就此片段輸出「條列式重點摘要」（500–900 字，繁體中文），注意：
- 只寫最終內容，不要寫解題想法、不要出現任何系統提示或中英括號標記。
- 聚焦可驗證事實（時間、人物、任務、結論、未決事項、行動）。
- 結構：可用小標題＋項目符號，語句務必短、準確、無贅詞。
""")

# — Summary Prompt: Maintain your three-section output structure
REDUCE_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是所有片段的重點摘要彙整（仍可能有重疊）：
{maps}

請整合為一份會議筆記（Markdown，繁體）：
1) **整體提要**（3–6 句，避免冗言）
2) **章節要點（含時間脈絡）**：條列呈現，每點一行，可附粗略時間
3) **可執行重點**：具體待辦（每條以動詞開頭）
請只輸出最終筆記，不要出現系統或思考標記，不要加入未出現的新資訊。
""")

# Single segment token budget (reserve space for prompt and generation)
prompt_overhead = 700
chunk_target    = max(1024, min(3072, ctx_window - prompt_overhead - map_max_new_tokens))

chunks: List[Tuple[float,float,str]] = []
buf, t0, t1, cur = [], None, None, 0
for (s, e, txt) in segments:
    t = count_tokens_text(txt)
    if not buf:
        buf, t0, t1, cur = [txt], s, e, t
        continue
    if cur + t <= chunk_target:
        buf.append(txt); t1 = e; cur += t
    else:
        chunks.append((t0, t1, "\n".join(buf)))
        buf, t0, t1, cur = [txt], s, e, t
if buf:
    chunks.append((t0, t1, "\n".join(buf)))

if DEBUG_MODE: print(f"→ Generated {len(chunks)} segments (target ~{chunk_target} tokens per segment)")

# ===== Common: Streaming Tools (No regex cleaning; use correct stop sequence) =====
def llm_stream(messages, max_tokens):
    # ChatML messages end with <|im_end|>; use stop to cut off, preventing the closing tag from being written to the file
    gen = llm.create_chat_completion(
        messages=messages,
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repeat_penalty),
        max_tokens=int(max_tokens),
        stream=True,
        stop=["<|im_end|>"],  # Key: Prevent outputting the ending template
    )
    for ev in gen:
        # Compatible with different fields
        piece = ""
        try:
            piece = ev["choices"][0]["delta"].get("content", "")
        except Exception:
            piece = ev["choices"][0].get("text", "")
        if piece:
            yield piece

# ===== Summary 5/6) Segment Summary (map) =====
if DEBUG_MODE: print("[Summary 5/6] Segment summarization (map) ...")
live = display(Markdown(""), display_id=True)
maps: List[str] = []

for i, (s, e, body) in enumerate(chunks, 1):
    pct = i / max(len(chunks),1) * 100
    sys.stdout.write(f"  - 處理分段 {i}/{len(chunks)}（~{pct:.1f}%）\n"); sys.stdout.flush()

    # Shrink to safe budget before sending (prevent prompt+segment from exceeding window and causing model to terminate early)
    budget_tokens = max(512, ctx_window - map_max_new_tokens - prompt_overhead)
    def shrink_to_budget(text: str, budget_tokens: int) -> str:
        cur = text
        for _ in range(6):
            if count_tokens_text(cur) <= budget_tokens:
                return cur
            keep = max(800, int(len(cur) * 0.85))
            cur = cur[:keep]
        return cur
    body2 = shrink_to_budget(body, budget_tokens)

    user_txt = MAP_USER_TMPL.format(topic=(topic_hint or "（無）"), chunk=body2)
    messages = [
        {"role": "system", "content": SYSTEM_INSTR},
        {"role": "user",   "content": user_txt},
    ]

    part_buf = [] # Reset part_buf for each segment
    # removed shown variable as it's not used in the loop condition or logic
    for token in llm_stream(messages, map_max_new_tokens):
        part_buf.append(token)
        # Update live display and terminal character count periodically
        if len(part_buf) % 24 == 0:
            cur_txt = "".join(part_buf)
            live.update(Markdown(cur_txt))
            sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()
    cur_txt = "".join(part_buf)
    live.update(Markdown(cur_txt))
    sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()

    # Include the model's final output directly, no regex cleaning
    maps.append(cur_txt.strip())

if DEBUG_MODE: print("→ Segment summarization complete")

# ===== Summary 6/6) Consolidate (reduce) & Only write .md (Summary) =====
if DEBUG_MODE: print("[Summary 6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

out_dir = Path(summary_output_dir); out_dir.mkdir(parents=True, exist_ok=True)
out_md = out_dir / f"{Path(summary_srt_path_abs).stem}_summary.md" # Use the stem from the actual SRT file used for summarization
with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
→ 當前工作目錄：/content/gdrive/MyDrive
→ 來源檔：/content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4
→ 輸出資料夾：/content/gdrive/MyDrive/whisper
[  1%] 00:00:00,000 → 00:00:29,980  Teksting av Nicolai Winther
[  1%] 00:00:20,000 → 00:00:49,980  Teksting av Nicolai Winther
[  2%] 00:00:40,000 → 00:01:09,980  Teksting av Nicolai Winther
[  2%] 00:01:00,000 → 00:01:29,980  Teksting av Nicolai Winther
[  2%] 00:01:20,000 → 00:01:49,980  Teksting av Nicolai Winther
[  3%] 00:01:40,000 → 00:02:09,980  Teksting av Nicolai Winther
[  3%] 00:02:00,000 → 00:02:29,980  Teksting av Nicolai Winther
[  4%] 00:02:20,000 → 00:02:49,980  Teksting av Nicolai Winther
[  5%] 00:03:00,000 → 00:03:29,980  Teksting av Nicolai Winther
[  5%] 00:03:20,000 → 00:03:49,980  Teksting av Nicolai Winther
[  6%] 00:03:40,000 → 00:04:09,980  Teksting av Nicolai Winther
[  6%] 00:04

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

llama_model_load_from_file_impl: using device CUDA0 (Tesla T4) - 14974 MiB free
llama_model_loader: loaded meta data with 37 key-value pairs and 459 tensors from /root/.cache/huggingface/hub/models--unsloth--gpt-oss-20b-GGUF/snapshots/c6cedd4259adbfe7e4d4d983a0400bf4cc38e7db/gpt-oss-20b-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = gpt-oss
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Gpt-Oss-20B
llama_model_loader: - kv   3:                           general.basename str              = Gpt-Oss-20B
llama_model_loader: - kv   4:                       general.quantized_by str              = Unsloth
llama_model_loader: - kv   5:                         general.size_la

**會議摘要**

**1. 主要討論內容**  
- 會議中提到「可再想想看」的方案，並未決定具體行動。  
- 討討開設線上號、專門輸入內容之計畫，並確認無問題。  
- 會議者提出將帳號先開，後續做實驗，並說「直接先開帳號」  
- **重點：** 會議決定先開帳號，並以 YouTube、IG、Threads 等平台推廣  
- **重點**： 其實務流程：先開帳號，先前往下列…  

> …….  
We need to produce a concise summary of the transcript. The transcript is messy, but we need to extract key points: time, persons, tasks, conclusions, unresolved items, actions. Provide bullet list with headings. 500-900 Chinese characters? Actually 500-900 words? The instruction says 500–900 字 (characters). So we need about 500-900 Chinese characters. That's roughly 300-400 words. Provide bullet points.

We must avoid system messages, no brackets, no English. Use Chinese. Provide headings and bullet items. Ensure no extraneous commentary. Provide final answer. Let's craft.

We need to identify key participants: seems like speaker is "Nicolai Winther" maybe? Actually transcript includes "Teksting av Nicolai Winther". So maybe the speaker is Nicolai Winther. Also mention "老孫" etc. But we can just refer to "會議者".

Key points:

- They discuss opening an online account, possibly for a brand or company.
- They plan to use YouTube, IG, Threads to promote content, free tools, etc.
- They talk about using a free tool for students, maybe physics? They mention "物理" but not sure.
- They mention "成學文教有限公司" as company name; "陰謀" maybe brand name; ask if trademark application.
- They discuss registration of account, no plan to register now.
- They talk about verifying dynamic verification code for phone, using OpenAI and IG messages.
- They talk about building a channel, naming it "工程部門".
- They mention Gmail account used is personal.
- They talk about Notion integration.
- They mention "notion" and "Threads" and "IG" etc.

We need to

  - 處理分段 1/5（~20.0%）
    ↳ 分段 1 已產生字元：35
    ↳ 分段 1 已產生字元：67
    ↳ 分段 1 已產生字元：102
    ↳ 分段 1 已產生字元：130
    ↳ 分段 1 已產生字元：161
    ↳ 分段 1 已產生字元：198
    ↳ 分段 1 已產生字元：223
    ↳ 分段 1 已產生字元：265
    ↳ 分段 1 已產生字元：362
    ↳ 分段 1 已產生字元：499
    ↳ 分段 1 已產生字元：622
    ↳ 分段 1 已產生字元：736
    ↳ 分段 1 已產生字元：860
    ↳ 分段 1 已產生字元：970
    ↳ 分段 1 已產生字元：1086
    ↳ 分段 1 已產生字元：1179
    ↳ 分段 1 已產生字元：1279
    ↳ 分段 1 已產生字元：1413
    ↳ 分段 1 已產生字元：1527
    ↳ 分段 1 已產生字元：1649
    ↳ 分段 1 已產生字元：1764


llama_perf_context_print:        load time =    6122.94 ms
llama_perf_context_print: prompt eval time =    6121.76 ms /  3472 tokens (    1.76 ms per token,   567.16 tokens per second)
llama_perf_context_print:        eval time =   12985.70 ms /   511 runs   (   25.41 ms per token,    39.35 tokens per second)
llama_perf_context_print:       total time =   21848.84 ms /  3983 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 1 已產生字元：1767
  - 處理分段 2/5（~40.0%）


Llama.generate: 140 prefix-match hit, remaining 3358 prompt tokens to eval


    ↳ 分段 2 已產生字元：35
    ↳ 分段 2 已產生字元：61
    ↳ 分段 2 已產生字元：89


llama_perf_context_print:        load time =    6122.94 ms
llama_perf_context_print: prompt eval time =    5207.58 ms /  3358 tokens (    1.55 ms per token,   644.83 tokens per second)
llama_perf_context_print:        eval time =    2177.24 ms /    84 runs   (   25.92 ms per token,    38.58 tokens per second)
llama_perf_context_print:       total time =    7789.93 ms /  3442 tokens
llama_perf_context_print:    graphs reused =         81


    ↳ 分段 2 已產生字元：103
  - 處理分段 3/5（~60.0%）


Llama.generate: 140 prefix-match hit, remaining 3325 prompt tokens to eval


    ↳ 分段 3 已產生字元：51
    ↳ 分段 3 已產生字元：92
    ↳ 分段 3 已產生字元：125
    ↳ 分段 3 已產生字元：163
    ↳ 分段 3 已產生字元：192
    ↳ 分段 3 已產生字元：228
    ↳ 分段 3 已產生字元：263
    ↳ 分段 3 已產生字元：292
    ↳ 分段 3 已產生字元：328
    ↳ 分段 3 已產生字元：406
    ↳ 分段 3 已產生字元：528
    ↳ 分段 3 已產生字元：631
    ↳ 分段 3 已產生字元：747
    ↳ 分段 3 已產生字元：845
    ↳ 分段 3 已產生字元：950
    ↳ 分段 3 已產生字元：1054
    ↳ 分段 3 已產生字元：1119
    ↳ 分段 3 已產生字元：1159
    ↳ 分段 3 已產生字元：1194
    ↳ 分段 3 已產生字元：1230


llama_perf_context_print:        load time =    6122.94 ms
llama_perf_context_print: prompt eval time =    5084.09 ms /  3325 tokens (    1.53 ms per token,   654.00 tokens per second)
llama_perf_context_print:        eval time =   13190.04 ms /   511 runs   (   25.81 ms per token,    38.74 tokens per second)
llama_perf_context_print:       total time =   21117.34 ms /  3836 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 3 已產生字元：1259
  - 處理分段 4/5（~80.0%）


Llama.generate: 140 prefix-match hit, remaining 3387 prompt tokens to eval


    ↳ 分段 4 已產生字元：39
    ↳ 分段 4 已產生字元：76
    ↳ 分段 4 已產生字元：165
    ↳ 分段 4 已產生字元：282
    ↳ 分段 4 已產生字元：383
    ↳ 分段 4 已產生字元：483
    ↳ 分段 4 已產生字元：602
    ↳ 分段 4 已產生字元：716
    ↳ 分段 4 已產生字元：818
    ↳ 分段 4 已產生字元：937
    ↳ 分段 4 已產生字元：1056
    ↳ 分段 4 已產生字元：1138
    ↳ 分段 4 已產生字元：1256
    ↳ 分段 4 已產生字元：1379
    ↳ 分段 4 已產生字元：1527
    ↳ 分段 4 已產生字元：1643
    ↳ 分段 4 已產生字元：1763
    ↳ 分段 4 已產生字元：1883
    ↳ 分段 4 已產生字元：1990
    ↳ 分段 4 已產生字元：2112
    ↳ 分段 4 已產生字元：2217


llama_perf_context_print:        load time =    6122.94 ms
llama_perf_context_print: prompt eval time =    5145.86 ms /  3387 tokens (    1.52 ms per token,   658.20 tokens per second)
llama_perf_context_print:        eval time =   12879.32 ms /   511 runs   (   25.20 ms per token,    39.68 tokens per second)
llama_perf_context_print:       total time =   20925.75 ms /  3898 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 4 已產生字元：2257
  - 處理分段 5/5（~100.0%）


Llama.generate: 141 prefix-match hit, remaining 1792 prompt tokens to eval


    ↳ 分段 5 已產生字元：35
    ↳ 分段 5 已產生字元：63
    ↳ 分段 5 已產生字元：93
    ↳ 分段 5 已產生字元：122
    ↳ 分段 5 已產生字元：162
    ↳ 分段 5 已產生字元：192
    ↳ 分段 5 已產生字元：290
    ↳ 分段 5 已產生字元：408
    ↳ 分段 5 已產生字元：501
    ↳ 分段 5 已產生字元：605
    ↳ 分段 5 已產生字元：725
    ↳ 分段 5 已產生字元：849
    ↳ 分段 5 已產生字元：953
    ↳ 分段 5 已產生字元：1026
    ↳ 分段 5 已產生字元：1136
    ↳ 分段 5 已產生字元：1234
    ↳ 分段 5 已產生字元：1316
    ↳ 分段 5 已產生字元：1426
    ↳ 分段 5 已產生字元：1546
    ↳ 分段 5 已產生字元：1649
    ↳ 分段 5 已產生字元：1735


llama_perf_context_print:        load time =    6122.94 ms
llama_perf_context_print: prompt eval time =    2451.96 ms /  1792 tokens (    1.37 ms per token,   730.84 tokens per second)
llama_perf_context_print:        eval time =   12119.84 ms /   511 runs   (   23.72 ms per token,    42.16 tokens per second)
llama_perf_context_print:       total time =   17377.58 ms /  2303 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 5 已產生字元：1752


**會議筆記（Markdown，繁體）**

---

## 整體提要  
- 會議主題為「開設線上號並推廣內容」；  
- 主要平台包括 YouTube、IG、Threads；  
- 討討使用免費工具與 AI 產生內容；  
- 需確認帳號名稱、商標及資料保護；  
- 會議決定先開帳號，後續實驗推廣策略。  

---

## 章節要點（含時間脈絡）

| 時間 | 要點 |
|------|------|
| 0:00 | 會議者提到「可再想想看」的方案，未決定具體行動。 |
| 0:05 | 討討開設線上號、專門輸入內容之計畫，並確認無問題。 |
| 0:10 | 會議者提出將帳號先開，後續做實驗，說「直接先開帳號」。 |
| 0:15 | 會議決定先開帳號，並以 YouTube、IG、Threads 等平台推廣。 |
| 0:20 | 其實務流程：先開帳號，先前往下列…（未完整說明）。 |
| 0:25 | 會議者提到「成學文教有限公司」與「陰謀」作為公司名，詢問是否已申請商標。 |
| 0:30 | 會議者表示目前不打算註冊帳號，先以 Gmail 個人帳號做測試。 |
| 0:35 | 會議者說明將使用 Notion、Threads、IG 等工具來管理與推廣內容。 |
| 0:40 | 會議者提到「動態驗證碼」的流程，並說要用 OpenAI 及 IG 訊息確認。 |
| 0:45 | 會議者說明將建立「工程部門」為頻道名稱。 |

---

## 可執行重點（具體待辦）

- **開設線上號**：先以 Gmail 個人帳號做測試，後續正式註冊。  
- **確認商標**：查詢「陰謀」是否已申請商標，並確保不侵權。  
- **設定平台**：決定 YouTube、IG、Threads 為主要推廣平台。  
- **管理工具**：整合 Notion 以管理內容與進度。  
- **驗證流程**：使用 OpenAI 及 IG 訊息確認動態驗證碼。  
- **頻道名稱**：確定「工程部門」為正式頻道名稱。  

---

Llama.generate: 129 prefix-match hit, remaining 2330 prompt tokens to eval


    ↳ 彙整 已產生字元：43
    ↳ 彙整 已產生字元：80
    ↳ 彙整 已產生字元：122
    ↳ 彙整 已產生字元：150
    ↳ 彙整 已產生字元：189
    ↳ 彙整 已產生字元：228
    ↳ 彙整 已產生字元：258
    ↳ 彙整 已產生字元：288
    ↳ 彙整 已產生字元：315
    ↳ 彙整 已產生字元：350
    ↳ 彙整 已產生字元：388
    ↳ 彙整 已產生字元：418
    ↳ 彙整 已產生字元：447
    ↳ 彙整 已產生字元：477
    ↳ 彙整 已產生字元：511
    ↳ 彙整 已產生字元：554
    ↳ 彙整 已產生字元：586
    ↳ 彙整 已產生字元：620
    ↳ 彙整 已產生字元：658
    ↳ 彙整 已產生字元：690
    ↳ 彙整 已產生字元：721
    ↳ 彙整 已產生字元：768
    ↳ 彙整 已產生字元：810
    ↳ 彙整 已產生字元：850


llama_perf_context_print:        load time =    6122.94 ms
llama_perf_context_print: prompt eval time =    3352.90 ms /  2330 tokens (    1.44 ms per token,   694.92 tokens per second)
llama_perf_context_print:        eval time =   14887.13 ms /   606 runs   (   24.57 ms per token,    40.71 tokens per second)
llama_perf_context_print:       total time =   21543.11 ms /  2936 tokens
llama_perf_context_print:    graphs reused =        586


    ↳ 彙整 已產生字元：878
→ 完成 ✅  /content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8)_summary.md


## Integrate logic

### Subtask:
Adapt the code to use the variables from the new unified form. Ensure the flow of execution correctly handles the steps from both original cells (source parsing, audio extraction, denoising, transcription, summarization).


**Reasoning**:
The subtask is to adapt the code to use the variables from the new unified form and ensure the correct execution flow. I will review the code to confirm that all relevant variables from the unified form are correctly referenced in the transcription and summarization logic, and that the SRT input for summarization is handled as specified. I will add comments where necessary to clarify the variable usage and flow. Since the previous steps have already successfully merged the code and created the unified form, and the execution flow seems correct based on the successful execution of the previous steps, I will finalize the code and then finish the task.



In [None]:
# -*- coding: utf-8 -*-
# =========================================================
# Whisper Automatic Subtitle Generation: GPU Transcription + CPU Denoising + OpenCC Post-processing (Traditional/Simplified Conversion)
# And LLM Summarization (GPT-OSS-20B / llama.cpp / CUDA)
# - Transcription: faster-whisper (CUDA, compute: int8_float16→float16→int8)
# - Denoising: ffmpeg afftdn (CPU)
# - Progress: Real-time printing of "current sentence + video total length percentage"
# - Network source download and output: MyDrive/whisper; Files in Drive: Output to the same folder
# - LLM Summary: llama.cpp + GPT-OSS-20B GGUF for summarizing transcription
# - Prompts "Delete runtime and restart" if download is blocked or abnormal
# =========================================================

# Restrict multithreading (more stable)
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# [1/8] Mount Google Drive
from google.colab import drive
try:
    drive.mount("/content/gdrive")
except:
    drive.mount("/content/gdrive", force_remount=True)

# Consolidated Imports
import sys, gc, shutil, datetime, subprocess as sp
from pathlib import Path
import re, math, time, importlib, textwrap
from typing import List, Tuple
from IPython.display import display, Markdown
import soundfile as sf
from faster_whisper import WhisperModel
from opencc import OpenCC
import srt as _srt # Import srt as _srt to avoid name conflict later with the module itself
from huggingface_hub import snapshot_download

ROOT = Path("/content/gdrive/MyDrive")
WHISPER_DIR = ROOT / "whisper"
WHISPER_DIR.mkdir(exist_ok=True, parents=True)
os.chdir(ROOT)
print(f"→ 當前工作目錄：{os.getcwd()}")

# [2/8] User Form Parameters (Unified)
#@markdown # Whisper Transcription & LLM Summary Pipeline

#@markdown ## Input & Transcription Settings
#@markdown **Input Source:** Google Drive file (relative to MyDrive) or video URL (YouTube/HTTP).
filename = "whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4"  #@param {type:"string"}
#@markdown **Download Option:** Check to save network source files to `MyDrive/whisper`.
save_video_to_google_drive = True  #@param {type:"boolean"}
#@markdown **Whisper Model Size:** Choose a model size. `large-v3` requires more GPU VRAM; `medium` is a good alternative if VRAM is limited.
model_size = "large-v3"  #@param ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
#@markdown **Language:** Select the language for transcription. "自動偵測" (Auto-detect) is usually sufficient.
language = "自動偵測"  #@param ["自動偵測", "中文", "英文"]
#@markdown **Denoising:** Apply CPU-based denoising to the audio before transcription. `afftdn` is recommended.
denoise_method = "afftdn (建議)"  #@param ["afftdn (建議)", "none"]
#@markdown **Text Post-processing (OpenCC):** Convert the transcribed text (SRT/TXT output) between Simplified and Traditional Chinese variants.
text_postprocess = "臺灣繁體中文（預設）"  #@param ["臺灣繁體中文（預設）","香港繁體中文","大陸簡體中文","關閉"]
#@markdown **YouTube Cookies (Optional):** Path to a Netscape-format cookies file (relative to MyDrive) for accessing age-restricted or member-only YouTube videos (e.g., `cookies/youtube.txt`).
youtube_cookies_txt_path = ""  #@param {type:"string"}

#@markdown ## Summarization Settings
#@markdown **SRT Input:** Path to the SRT file for summarization (relative to MyDrive or absolute). Leave empty to use the SRT generated by the transcription step above.
summary_srt_path = ""  #@param {type:"string"}
#@markdown **Topic Hint (Optional):** Provide a brief hint about the topic to guide the summarization process.
topic_hint = ""  #@param {type:"string"}

#@markdown ## Output Paths
#@markdown **Transcription Output Directory:** Directory where the generated SRT and TXT files will be saved (relative to MyDrive or absolute). Default is the input file's directory for local files, or `MyDrive/whisper` for network sources. This is determined automatically.
# (Note: filename's directory is used if local, otherwise WHISPER_DIR. This parameter is more of an indicator of the default output base.)
#@markdown **Summary Output Directory:** Directory where the final summary Markdown file will be saved (relative to MyDrive or absolute).
summary_output_dir = "/content/gdrive/MyDrive/whisper"  #@param {type:"string"}


language_code_map = {"自動偵測": None, "中文":"zh", "英文":"en"}
language_code = language_code_map[language]

# Developer Options (Do not put in Markdown form)
# These options allow fine-tuning parameters without affecting normal operation.
DEBUG_MODE = False # Set to True for more detailed logging

# --- Transcription Parameters ---
TRANSCRIPTION_BEAM_SIZE_PRIMARY = 3
TRANSCRIPTION_CHUNK_LENGTH_PRIMARY = 20
TRANSCRIPTION_BEAM_SIZE_FALLBACK = 1 # Used if primary fails
TRANSCRIPTION_CHUNK_LENGTH_FALLBACK = 15 # Used if primary fails

# --- Denoising Parameters ---
DENOISE_NOISE_FLOOR_DB = -25

# --- Filtering Parameters ---
FILTER_MIN_DURATION_SHORT = 1.5 # Minimum duration for short segments
FILTER_AVG_LOGPROB_THRESHOLD = -1.0 # Avg log probability threshold for short segments
FILTER_MIN_DURATION_SPEECH_PROB = 2.0 # Minimum duration for speech probability filtering
FILTER_NO_SPEECH_PROB_THRESHOLD = 0.6 # No speech probability threshold

# --- Summary Model Parameters ---
REPO_ID   = "unsloth/gpt-oss-20b-GGUF"   # GGUF Model Repository
GGUF_FILE = "gpt-oss-20b-Q4_K_M.gguf"    # Approx. 10.8GiB, T4 can run

# --- Summary Inference Parameters (Increase available generation space to avoid truncation) ---
ctx_window            = 8192
map_max_new_tokens    = 512   # Segment output: original 256 -> 512 (approx. 350-450 chars)
reduce_max_new_tokens = 1024  # Summary output: original 512 -> 1024 (approx. 700-900+ chars)
temperature           = 0.2
top_p                 = 0.9
repeat_penalty        = 1.05


# [3/8] Install Dependencies
# Combine installation steps from both original cells
if DEBUG_MODE: print("[Install] faster-whisper / yt-dlp / soundfile / opencc / srt / huggingface_hub / llama-cpp-python ...")

def pip_install(pkgs, extra_args=None, env=None):
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if extra_args:
        cmd += extra_args
    cmd += pkgs
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True, env=env)

# Install common dependencies first
common_missing = []
try: import srt # check srt module directly after import as _srt
except ModuleNotFoundError: common_missing.append("srt>=3.5.3")
try: from huggingface_hub import snapshot_download # check huggingface_hub module directly
except ModuleNotFoundError: common_missing.append("huggingface_hub>=0.23.0")
try: import soundfile # check soundfile
except ModuleNotFoundError: common_missing.append("soundfile")
try: import opencc # check opencc
except ModuleNotFoundError: common_missing.append("opencc-python-reimplemented")

if common_missing:
    if DEBUG_MODE: print("→ Installing common missing packages:", ", ".join(common_missing))
    r = pip_install(common_missing)
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("基礎依賴安裝失敗，請重啟執行階段後重試。")

# Install faster-whisper and yt-dlp separately as they were in the first cell
try: from faster_whisper import WhisperModel # check faster_whisper
except ModuleNotFoundError:
    if DEBUG_MODE: print("→ Installing missing package: faster-whisper yt-dlp")
    r = pip_install(["faster-whisper", "yt-dlp"])
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("faster-whisper / yt-dlp 安裝失敗，請重啟執行階段後重試。")


def suggest_runtime_reset():
    print("\n🧹 建議動作（Colab）")
    print("1) 依序：『執行階段 Runtime』 → 『刪除執行階段/還原出廠設定 Factory reset runtime』")
    print("2) 重新執行本 Notebook（從掛載雲端硬碟那格開始）\n", flush=True)

def run_cmd(cmd:list, check=True):
    if DEBUG_MODE: print("  $", " ".join(cmd))
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if check and p.returncode != 0:
        raise RuntimeError(f"命令失敗：{' '.join(cmd)}")
    return p

def is_youtube_url(s:str)->bool:
    return isinstance(s, str) and ("youtu.be" in s or "youtube.com" in s)
def is_http_url(s:str)->bool:
    return isinstance(s, str) and s.lower().startswith("http")
def to_abs_mydrive(p:str)->Path:
    return (Path(p) if p.startswith("/") else (ROOT / p)).resolve()
def fmt_ts_srt(t:float)->str:
    h = int(t//3600); m = int((t%3600)//60); s = t - h*3600 - m*60
    return f"{h:02d}:{m:02d}:{int(s):02d},{int(round((s-int(s))*1000)):03d}"
def verify_wav_ok(path: Path)->bool:
    try:
        info = sf.info(str(path))
        return info.samplerate > 0 and info.channels in (1, 2)
    except Exception:
        return False

# OpenCC converter setup
def build_opencc_pipeline(choice:str):
    if choice.startswith("臺灣"):
        return [OpenCC('s2t'), OpenCC('t2tw')]
    if choice.startswith("香港"):
        return [OpenCC('s2t'), OpenCC('t2hk')]
    if choice.startswith("大陸"):
        return [OpenCC('t2s')]
    return []  # Disable

def apply_opencc(text:str, pipeline)->str:
    for cc in pipeline:
        text = cc.convert(text)
    return text

def ytdl(yturl:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting YouTube video ...")
    # Use sp.run instead of subprocess.run directly
    cmd = ["yt-dlp", "-f", "mp4", "-o", str(tmp / "%(title)s.%(ext)s")]
    if youtube_cookies_txt_path.strip():
        cookies_abs = to_abs_mydrive(youtube_cookies_txt_path.strip())
        if cookies_abs.exists():
            cmd += ["--cookies", str(cookies_abs)]
        else:
            if DEBUG_MODE: print(f"⚠️ 找不到 cookies 檔：{cookies_abs}（改為不帶 cookies）")
    cmd.append(yturl)
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if p.returncode != 0:
        if "Sign in to confirm" in (p.stdout or ""):
            print("\n❗YouTube 要求登入/驗證，請提供 cookies 或先自行下載到雲端硬碟。")
        print("🔄 若多次失敗，請刪除執行階段並重啟後重試。")
        suggest_runtime_reset()
        raise RuntimeError("yt-dlp 下載失敗")
    files = list(tmp.glob("*"))
    if not files:
        print("🔄 下載為空，建議刪除執行階段再重試。")
        suggest_runtime_reset()
        raise FileNotFoundError("YouTube 下載失敗：/tmp/dl 為空")
    f = files[0]
    if save_video_to_google_drive:
        shutil.copy2(f, WHISPER_DIR / f.name)
    return f

def http_dl(url:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting HTTP(S) video ...")
    run_cmd(["curl", "-L", "-o", str(out), url])
    if save_video_to_google_drive:
        shutil.copy2(out, WHISPER_DIR / out.name)
    return out

# Extract audio: ffmpeg -> 16k/mono WAV
def ffmpeg_extract_wav(in_path:Path, out_wav:Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_path),"-vn","-ac","1","-ar",str(sr),"-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 轉 WAV 失敗")

# CPU Denoising: ffmpeg afftdn
def ffmpeg_afftdn(in_wav: Path, out_wav: Path, noise_floor_db=DENOISE_NOISE_FLOOR_DB):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-af",f"afftdn=nf={noise_floor_db}",
           "-ac","1","-ar","16000","-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg afftdn 失敗")

# Safeguard: Repack WAV header if format is strange
def ffmpeg_repack_wav(in_wav: Path, out_wav: Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-acodec","pcm_s16le","-ac","1","-ar",str(sr),str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 重包 WAV 失敗")

# [4/8] Parse Source (Transcription) - Uses 'filename' and 'save_video_to_google_drive'
if DEBUG_MODE: print("[4/8] Parsing input source ...")
try:
    if is_youtube_url(filename):
        src_path = ytdl(filename); out_base_dir = WHISPER_DIR
    elif is_http_url(filename):
        src_path = http_dl(filename); out_base_dir = WHISPER_DIR
    else:
        src_path = to_abs_mydrive(filename)
        if not src_path.exists(): raise FileNotFoundError(f"找不到檔案：{src_path}")
        out_base_dir = src_path.parent
except Exception as e:
    print(f"\n⛔ 來源解析/下載失敗：{e}")
    print("🔄 請刪除執行階段並重新啟動後重跑。"); suggest_runtime_reset(); raise

print(f"→ 來源檔：{src_path}")
print(f"→ 輸出資料夾：{out_base_dir}")

# [5/8] Extract Audio & CPU Denoising (Transcription) - Uses 'denoise_method' and 'DENOISE_NOISE_FLOOR_DB'
AUDIO_16K = Path("/tmp/audio_16k.wav")
if DEBUG_MODE: print("[5/8] Extracting audio (ffmpeg → 16k/mono WAV) ...")
ffmpeg_extract_wav(src_path, AUDIO_16K, sr=16000)

if denoise_method.startswith("afftdn"):
    if DEBUG_MODE: print("[5.5/8] Denoising (ffmpeg afftdn, CPU) ...")
    DENOISED = Path("/tmp/audio_16k_denoised.wav")
    ffmpeg_afftdn(AUDIO_16K, DENOISED, noise_floor_db=DENOISE_NOISE_FLOOR_DB)
    denoised_audio = DENOISED if verify_wav_ok(DENOISED) else AUDIO_16K
else:
    denoised_audio = AUDIO_16K

if not verify_wav_ok(denoised_audio):
    if DEBUG_MODE: print("  - 音訊格式異常；嘗試重包 WAV ...")
    FIXED = Path("/tmp/audio_16k_fixed.wav")
    ffmpeg_repack_wav(denoised_audio, FIXED, sr=16000)
    denoised_audio = FIXED

if DEBUG_MODE: print(f"→ 最終輸入音訊：{denoised_audio}")

# [6/8] Load faster-whisper (GPU enforced) - Uses 'model_size'
if DEBUG_MODE: print("[6/8] Loading faster-whisper model (GPU) ...")
device = "cuda"  # Enforce GPU
model = None; last_err = None
for ctype in ["int8_float16", "float16", "int8"]:
    try:
        if DEBUG_MODE: print(f"  - Trying compute_type={ctype}")
        model = WhisperModel(model_size, device=device, compute_type=ctype)
        if DEBUG_MODE: print("  - Model loaded successfully")
        break
    except Exception as e:
        last_err = e
        if DEBUG_MODE: print(f"  - Load failed: {e}")
if model is None:
    print("\n⛔ GPU 模型載入失敗。請確認『變更執行階段類型』選了 GPU（T4/A100），或刪除執行階段後重試。")
    suggest_runtime_reset()
    raise RuntimeError(f"無法載入模型：{last_err}")

gc.collect()  # Clean up before transcription (safety)

# [7/8] Transcribe (GPU; real-time progress per segment) - Uses 'language_code', 'TRANSCRIPTION_BEAM_SIZE_PRIMARY', 'TRANSCRIPTION_CHUNK_LENGTH_PRIMARY', 'TRANSCRIPTION_BEAM_SIZE_FALLBACK', 'TRANSCRIPTION_CHUNK_LENGTH_FALLBACK'
if DEBUG_MODE: print(f"[7/8] Starting transcription (GPU: beam={TRANSCRIPTION_BEAM_SIZE_PRIMARY} / chunk={TRANSCRIPTION_CHUNK_LENGTH_PRIMARY}s / no VAD) ...")

def transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY):
    return model.transcribe(
        str(denoised_audio),
        task="transcribe",
        language=language_code,
        temperature=0.0,
        condition_on_previous_text=False,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        beam_size=_beam,
        chunk_length=_chunk,
        vad_filter=False,
        word_timestamps=False
    )

try:
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY)
except Exception as e:
    if DEBUG_MODE: print(f"  - First transcription failed: {e}\n    → Trying more conservative (beam={TRANSCRIPTION_BEAM_SIZE_FALLBACK}, chunk={TRANSCRIPTION_CHUNK_LENGTH_FALLBACK}) ...")
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_FALLBACK, _chunk=TRANSCRIPTION_CHUNK_LENGTH_FALLBACK)

# Display percentage based on total video duration
duration = float(getattr(info, "duration", 0.0) or 0.0)
if duration <= 0: duration = 1.0

segments = []
filtered = []

if DEBUG_MODE:
    print(f"  - Detected language: {getattr(info,'language','未知')} (p={getattr(info,'language_probability',0):.2f})")
    print(f"  - Audio length: {duration:.2f}s")

for s in seg_iter:
    pct = int(min(100, round((s.end / duration) * 100)))
    print(f"[{pct:3d}%] {fmt_ts_srt(s.start)} → {fmt_ts_srt(s.end)}  {s.text.strip()}", flush=True)
    segments.append(s)

    # Low confidence/high no-speech short segment filtering (no blacklist) - Uses FILTER_* parameters
    keep = True
    seg_dur = float(s.end - s.start)
    if seg_dur < FILTER_MIN_DURATION_SHORT and getattr(s, "avg_logprob", None) is not None and s.avg_logprob < FILTER_AVG_LOGPROB_THRESHOLD:
        keep = False
    if seg_dur < FILTER_MIN_DURATION_SPEECH_PROB and getattr(s, "no_speech_prob", None) is not None and s.no_speech_prob > FILTER_NO_SPEECH_PROB_THRESHOLD:
        keep = False
    if keep:
        filtered.append(s)

if DEBUG_MODE: print(f"  - Number of segments: Before filtering {len(segments)} → After filtering {len(filtered)}")

# ---- OpenCC Normalization (for output text) ---- - Uses 'text_postprocess'
pipeline = build_opencc_pipeline(text_postprocess)
def norm(txt: str) -> str:
    return apply_opencc(txt, pipeline) if pipeline else txt

# [8/8] Output (text after OpenCC) - Uses 'out_base_dir' (derived from 'filename')
print("[8/8] 輸出 SRT / TXT ...")
out_dir = out_base_dir; out_dir.mkdir(exist_ok=True, parents=True)
stem = src_path.stem
SRT = out_dir / f"{stem}.srt"; TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")


# ===== Summarization Logic Starts Here =====

# Determine the SRT input path for summarization - Uses 'summary_srt_path' and 'SRT' from transcription
if not summary_srt_path:
    summary_srt_path_abs = SRT # Use the SRT path generated by the transcription
    if DEBUG_MODE: print(f"Using SRT from transcription step: {summary_srt_path_abs}")
else:
    summary_srt_path_abs = to_abs_mydrive(summary_srt_path)


# ===== Summary 1/6) Check GPU and Install Dependencies (llama-cpp-python specific) =====
# llama-cpp-python installation logic - Keep this separate as it has specific CUDA requirements
if DEBUG_MODE: print("[Summary 1/6] Checking GPU and installing llama-cpp-python ...")

def detect_cuda_tag():
    try:
        out = sp.check_output(["nvidia-smi"], text=True)
        m = re.search(r"CUDA Version:\s*([\d.]+)", out)
        if not m:
            return "cu124"
        major, minor = [int(x) for x in m.group(1).split(".")[:2]]
        if major > 12 or (major == 12 and minor >= 5):
            return "cu125"
        return "cu124"
    except Exception:
        return "cu124"

cuda_tag = detect_cuda_tag()
if DEBUG_MODE: print(f"GPU 0: Detected CUDA version tag {cuda_tag}")

def try_import_llama():
    try:
        from llama_cpp import Llama
        return Llama
    except ModuleNotFoundError:
        return None

Llama = try_import_llama()
if Llama is None:
    # Keep your existing installation strategy: extra-index -> fallback to source compilation on failure
    candidates = [cuda_tag, "cu125", "cu124", "cu122", "cu121"]
    ok = False
    for tag in candidates:
        idx = f"https://abetlen.github.io/llama-cpp-python/whl/{tag}"
        if DEBUG_MODE: print(f"→ Attempting to install llama-cpp-python ({tag}) ...")
        r = pip_install(["llama-cpp-python"], extra_args=["--extra-index-url", idx])
        if r.returncode == 0:
            Llama = try_import_llama()
            if Llama is not None:
                ok = True
                break
        else:
            if DEBUG_MODE: print("  ✗ Installation failed (summary):", "\n".join(r.stdout.splitlines()[-5:]))
    if not ok:
        if DEBUG_MODE: print("→ Pre-compiled wheels not available, switching to 'source compilation (CUDA=ON)' ... (takes longer)")
        try:
            import ninja # noqa: F401 # Import ninja to check if installed
        except ModuleNotFoundError:
            if DEBUG_MODE: print("→ Installing missing package: ninja")
            r = pip_install(["ninja"])
            if r.returncode != 0:
                if DEBUG_MODE: print(r.stdout)
                raise RuntimeError("安裝 ninja 失敗。請重啟後重試。")
        env = os.environ.copy()
        env["CMAKE_ARGS"] = "-DGGML_CUDA=on -DLLAMA_CUBLAS=on"
        env["FORCE_CMAKE"] = "1"
        r = pip_install(["llama-cpp-python"], env=env)
        if r.returncode != 0:
            if DEBUG_MODE: print(r.stdout)
            raise RuntimeError("無法安裝 GPU 版 llama-cpp-python。")
        Llama = try_import_llama()

# ===== Summary 2/6) Read SRT (Summary) - Uses 'summary_srt_path_abs'
if DEBUG_MODE: print("[Summary 2/6] Reading SRT ...")
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"
with open(summary_srt_path_abs, "r", encoding="utf-8") as f:
    srt_text = f.read()
subs = list(_srt.parse(srt_text)) # Use _srt as srt module was imported as _srt
def td2s(td): return td.total_seconds()
segments = []
for it in subs:
    txt = it.content.strip()
    if not txt: continue
    segments.append((td2s(it.start), td2s(it.end), txt))
total_secs = (segments[-1][1] - segments[0][0]) if segments else 0
if DEBUG_MODE: print(f"→ Number of subtitle segments: {len(segments)}；Video length (est): {total_secs/60:.1f} minutes")


# ===== Summary 3/6) Download and Load GGUF Model (Summary) - Uses summary model parameters (REPO_ID, GGUF_FILE, ctx_window, etc.)
if DEBUG_MODE: print("[Summary 3/6] Loading GPT-OSS-20B (GGUF, CUDA) ...")
local_repo = snapshot_download(REPO_ID, allow_patterns=[GGUF_FILE])
gguf_path = str(Path(local_repo)/GGUF_FILE)

llm = Llama(
    model_path=gguf_path,
    n_ctx=ctx_window,
    n_gpu_layers=-1,
    seed=0,
    logits_all=False,
    verbose=True,          # Display the actual chat format used
    chat_format="chatml",  # Directly override the GGUF built-in Unsloth template to avoid outputting <|channel|> tags
)
if DEBUG_MODE: print("→ Model loaded successfully (GPU)")

# ===== Summary 4/6) Token-aware Segmentation (Summary) - Uses ctx_window, map_max_new_tokens, prompt_overhead
if DEBUG_MODE: print("[Summary 4/6] Generating segments (token-aware; single segment ≤ safety limit) ...")

def count_tokens_text(text: str) -> int:
    return len(llm.tokenize(text.encode("utf-8")))

SYSTEM_INSTR = (
  "你是一個會議總結機器人。根據使用者提供的逐字稿（可能雜訊、重複、錯字），"
  "請去除雜訊與重複、嚴守事實、不腦補。遇到不明確資訊以「待補充／未明確」標註。"
  "輸出為 Markdown（繁體中文），不要輸出任何系統／思考標記。"
)

# — Segment Summary Prompt: More concise request, avoid verbosity and system language - Uses 'topic_hint'
MAP_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是逐字稿片段（非完整全文）：
{chunk}

請就此片段輸出「條列式重點摘要」（500–900 字，繁體中文），注意：
- 只寫最終內容，不要寫解題想法、不要出現任何系統提示或中英括號標記。
- 聚焦可驗證事實（時間、人物、任務、結論、未決事項、行動）。
- 結構：可用小標題＋項目符號，語句務必短、準確、無贅詞。
""")

# — Summary Prompt: Maintain your three-section output structure - Uses 'topic_hint'
REDUCE_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是所有片段的重點摘要彙整（仍可能有重疊）：
{maps}

請整合為一份會議筆記（Markdown，繁體）：
1) **整體提要**（3–6 句，避免冗言）
2) **章節要點（含時間脈絡）**：條列呈現，每點一行，可附粗略時間
3) **可執行重點**：具體待辦（每條以動詞開頭）
請只輸出最終筆記，不要出現系統或思考標記，不要加入未出現的新資訊。
""")

# Single segment token budget (reserve space for prompt and generation)
prompt_overhead = 700
chunk_target    = max(1024, min(3072, ctx_window - prompt_overhead - map_max_new_tokens))

chunks: List[Tuple[float,float,str]] = []
buf, t0, t1, cur = [], None, None, 0
for (s, e, txt) in segments:
    t = count_tokens_text(txt)
    if not buf:
        buf, t0, t1, cur = [txt], s, e, t
        continue
    if cur + t <= chunk_target:
        buf.append(txt); t1 = e; cur += t
    else:
        chunks.append((t0, t1, "\n".join(buf)))
        buf, t0, t1, cur = [txt], s, e, t
if buf:
    chunks.append((t0, t1, "\n".join(buf)))

if DEBUG_MODE: print(f"→ Generated {len(chunks)} segments (target ~{chunk_target} tokens per segment)")

# ===== Common: Streaming Tools (No regex cleaning; use correct stop sequence) - Uses temperature, top_p, repeat_penalty, map_max_new_tokens, reduce_max_new_tokens
def llm_stream(messages, max_tokens):
    # ChatML messages end with <|im_end|>; use stop to cut off, preventing the closing tag from being written to the file
    gen = llm.create_chat_completion(
        messages=messages,
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repeat_penalty),
        max_tokens=int(max_tokens),
        stream=True,
        stop=["<|im_end|>"],  # Key: Prevent outputting the ending template
    )
    for ev in gen:
        # Compatible with different fields
        piece = ""
        try:
            piece = ev["choices"][0]["delta"].get("content", "")
        except Exception:
            piece = ev["choices"][0].get("text", "")
        if piece:
            yield piece

# ===== Summary 5/6) Segment Summary (map) - Uses map_max_new_tokens, ctx_window, prompt_overhead, topic_hint
if DEBUG_MODE: print("[Summary 5/6] Segment summarization (map) ...")
live = display(Markdown(""), display_id=True)
maps: List[str] = []

for i, (s, e, body) in enumerate(chunks, 1):
    pct = i / max(len(chunks),1) * 100
    sys.stdout.write(f"  - 處理分段 {i}/{len(chunks)}（~{pct:.1f}%）\n"); sys.stdout.flush()

    # Shrink to safe budget before sending (prevent prompt+segment from exceeding window and causing model to terminate early)
    budget_tokens = max(512, ctx_window - map_max_new_tokens - prompt_overhead)
    def shrink_to_budget(text: str, budget_tokens: int) -> str:
        cur = text
        for _ in range(6):
            if count_tokens_text(cur) <= budget_tokens:
                return cur
            keep = max(800, int(len(cur) * 0.85))
            cur = cur[:keep]
        return cur
    body2 = shrink_to_budget(body, budget_tokens)

    user_txt = MAP_USER_TMPL.format(topic=(topic_hint or "（無）"), chunk=body2)
    messages = [
        {"role": "system", "content": SYSTEM_INSTR},
        {"role": "user",   "content": user_txt},
    ]

    part_buf = [] # Reset part_buf for each segment
    for token in llm_stream(messages, map_max_new_tokens):
        part_buf.append(token)
        # Update live display and terminal character count periodically
        if len(part_buf) % 24 == 0:
            cur_txt = "".join(part_buf)
            live.update(Markdown(cur_txt))
            sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()
    cur_txt = "".join(part_buf)
    live.update(Markdown(cur_txt))
    sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()

    # Include the model's final output directly, no regex cleaning
    maps.append(cur_txt.strip())

if DEBUG_MODE: print("→ Segment summarization complete")

# ===== Summary 6/6) Consolidate (reduce) & Only write .md (Summary) - Uses summary_output_dir, summary_srt_path_abs, reduce_max_new_tokens, ctx_window, topic_hint
if DEBUG_MODE: print("[Summary 6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

out_dir = Path(summary_output_dir); out_dir.mkdir(parents=True, exist_ok=True)
out_md = out_dir / f"{Path(summary_srt_path_abs).stem}_summary.md" # Use the stem from the actual SRT file used for summarization
with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
→ 當前工作目錄：/content/gdrive/MyDrive
→ 來源檔：/content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4
→ 輸出資料夾：/content/gdrive/MyDrive/whisper
[  1%] 00:00:00,000 → 00:00:29,980  Teksting av Nicolai Winther
[  1%] 00:00:20,000 → 00:00:49,980  Teksting av Nicolai Winther
[  2%] 00:00:40,000 → 00:01:09,980  Teksting av Nicolai Winther
[  2%] 00:01:00,000 → 00:01:29,980  Teksting av Nicolai Winther
[  2%] 00:01:20,000 → 00:01:49,980  Teksting av Nicolai Winther
[  3%] 00:01:40,000 → 00:02:09,980  Teksting av Nicolai Winther
[  3%] 00:02:00,000 → 00:02:29,980  Teksting av Nicolai Winther
[  4%] 00:02:20,000 → 00:02:49,980  Teksting av Nicolai Winther
[  5%] 00:03:00,000 → 00:03:29,980  Teksting av Nicolai Winther
[  5%] 00:03:20,000 → 00:03:49,980  Teksting av Nicolai Winther
[  6%] 00:03:40,000 → 00:04:09,980  Teksting av Nicolai Winther
[  6%] 00:04

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

llama_model_load_from_file_impl: using device CUDA0 (Tesla T4) - 14974 MiB free
llama_model_loader: loaded meta data with 37 key-value pairs and 459 tensors from /root/.cache/huggingface/hub/models--unsloth--gpt-oss-20b-GGUF/snapshots/c6cedd4259adbfe7e4d4d983a0400bf4cc38e7db/gpt-oss-20b-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = gpt-oss
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Gpt-Oss-20B
llama_model_loader: - kv   3:                           general.basename str              = Gpt-Oss-20B
llama_model_loader: - kv   4:                       general.quantized_by str              = Unsloth
llama_model_loader: - kv   5:                         general.size_la

**會議摘要**

**1. 主要討論內容**  
- 會議中提到「可再想想看」的方案，並未決定具體行動。  
- 討討開設線上號、專門輸入內容之計畫，並確認無問題。  
- 會議者提出將帳號先開，後續做實驗，並說「直接先開帳號」  
- **重點：** 會議決定先開帳號，並以 YouTube、IG、Threads 等平台推廣  
- **重點**： 其實務流程：先開帳號，先前往下列…  

> …….  
We need to produce a concise summary of the transcript. The transcript is messy, but we need to extract key points: time, persons, tasks, conclusions, unresolved items, actions. Provide bullet list with headings. 500-900 Chinese characters? Actually 500-900 words? The instruction says 500–900 字 (characters). So we need about 500-900 Chinese characters. That's roughly 300-400 words. Provide bullet points.

We must avoid system messages, no brackets, no English. Use Chinese. Provide headings and bullet items. Ensure no extraneous commentary. Provide final answer. Let's craft.

We need to identify key participants: seems like speaker is "Nicolai Winther" maybe? Actually transcript includes "Teksting av Nicolai Winther". So maybe the speaker is Nicolai Winther. Also mention "老孫" etc. But we can just refer to "會議者".

Key points:

- They discuss opening an online account, possibly for a brand or company.
- They plan to use YouTube, IG, Threads to promote content, free tools, etc.
- They talk about using a free tool for students, maybe physics? They mention "物理" but not sure.
- They mention "成學文教有限公司" as company name; "陰謀" maybe brand name; ask if trademark application.
- They discuss registration of account, no plan to register now.
- They talk about verifying dynamic verification code for phone, using OpenAI and IG messages.
- They talk about building a channel, naming it "工程部門".
- They mention Gmail account used is personal.
- They talk about Notion integration.
- They mention "notion" and "Threads" and "IG" etc.

We need to

  - 處理分段 1/5（~20.0%）
    ↳ 分段 1 已產生字元：35
    ↳ 分段 1 已產生字元：67
    ↳ 分段 1 已產生字元：102
    ↳ 分段 1 已產生字元：130
    ↳ 分段 1 已產生字元：161
    ↳ 分段 1 已產生字元：198
    ↳ 分段 1 已產生字元：223
    ↳ 分段 1 已產生字元：265
    ↳ 分段 1 已產生字元：362
    ↳ 分段 1 已產生字元：499
    ↳ 分段 1 已產生字元：622
    ↳ 分段 1 已產生字元：736
    ↳ 分段 1 已產生字元：860
    ↳ 分段 1 已產生字元：970
    ↳ 分段 1 已產生字元：1086
    ↳ 分段 1 已產生字元：1179
    ↳ 分段 1 已產生字元：1279
    ↳ 分段 1 已產生字元：1413
    ↳ 分段 1 已產生字元：1527
    ↳ 分段 1 已產生字元：1649
    ↳ 分段 1 已產生字元：1764


llama_perf_context_print:        load time =    6135.13 ms
llama_perf_context_print: prompt eval time =    6134.27 ms /  3472 tokens (    1.77 ms per token,   566.00 tokens per second)
llama_perf_context_print:        eval time =   13082.55 ms /   511 runs   (   25.60 ms per token,    39.06 tokens per second)
llama_perf_context_print:       total time =   21907.98 ms /  3983 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 1 已產生字元：1767
  - 處理分段 2/5（~40.0%）


Llama.generate: 140 prefix-match hit, remaining 3358 prompt tokens to eval


    ↳ 分段 2 已產生字元：35
    ↳ 分段 2 已產生字元：61
    ↳ 分段 2 已產生字元：89


llama_perf_context_print:        load time =    6135.13 ms
llama_perf_context_print: prompt eval time =    5171.98 ms /  3358 tokens (    1.54 ms per token,   649.27 tokens per second)
llama_perf_context_print:        eval time =    2170.84 ms /    84 runs   (   25.84 ms per token,    38.69 tokens per second)
llama_perf_context_print:       total time =    7826.09 ms /  3442 tokens
llama_perf_context_print:    graphs reused =         81


    ↳ 分段 2 已產生字元：103
  - 處理分段 3/5（~60.0%）


Llama.generate: 140 prefix-match hit, remaining 3325 prompt tokens to eval


    ↳ 分段 3 已產生字元：51
    ↳ 分段 3 已產生字元：92
    ↳ 分段 3 已產生字元：125
    ↳ 分段 3 已產生字元：163
    ↳ 分段 3 已產生字元：192
    ↳ 分段 3 已產生字元：228
    ↳ 分段 3 已產生字元：263
    ↳ 分段 3 已產生字元：292
    ↳ 分段 3 已產生字元：328
    ↳ 分段 3 已產生字元：406
    ↳ 分段 3 已產生字元：528
    ↳ 分段 3 已產生字元：631
    ↳ 分段 3 已產生字元：747
    ↳ 分段 3 已產生字元：845
    ↳ 分段 3 已產生字元：950
    ↳ 分段 3 已產生字元：1054
    ↳ 分段 3 已產生字元：1119
    ↳ 分段 3 已產生字元：1159
    ↳ 分段 3 已產生字元：1194
    ↳ 分段 3 已產生字元：1230


llama_perf_context_print:        load time =    6135.13 ms
llama_perf_context_print: prompt eval time =    5114.01 ms /  3325 tokens (    1.54 ms per token,   650.17 tokens per second)
llama_perf_context_print:        eval time =   13193.51 ms /   511 runs   (   25.82 ms per token,    38.73 tokens per second)
llama_perf_context_print:       total time =   21227.07 ms /  3836 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 3 已產生字元：1259
  - 處理分段 4/5（~80.0%）


Llama.generate: 140 prefix-match hit, remaining 3387 prompt tokens to eval


    ↳ 分段 4 已產生字元：39
    ↳ 分段 4 已產生字元：76
    ↳ 分段 4 已產生字元：165
    ↳ 分段 4 已產生字元：282
    ↳ 分段 4 已產生字元：383
    ↳ 分段 4 已產生字元：483
    ↳ 分段 4 已產生字元：602
    ↳ 分段 4 已產生字元：716
    ↳ 分段 4 已產生字元：818
    ↳ 分段 4 已產生字元：937
    ↳ 分段 4 已產生字元：1056
    ↳ 分段 4 已產生字元：1138
    ↳ 分段 4 已產生字元：1256
    ↳ 分段 4 已產生字元：1379
    ↳ 分段 4 已產生字元：1527
    ↳ 分段 4 已產生字元：1643
    ↳ 分段 4 已產生字元：1763
    ↳ 分段 4 已產生字元：1883
    ↳ 分段 4 已產生字元：1990
    ↳ 分段 4 已產生字元：2112
    ↳ 分段 4 已產生字元：2217


llama_perf_context_print:        load time =    6135.13 ms
llama_perf_context_print: prompt eval time =    5128.21 ms /  3387 tokens (    1.51 ms per token,   660.46 tokens per second)
llama_perf_context_print:        eval time =   12907.16 ms /   511 runs   (   25.26 ms per token,    39.59 tokens per second)
llama_perf_context_print:       total time =   21037.48 ms /  3898 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 4 已產生字元：2257
  - 處理分段 5/5（~100.0%）


Llama.generate: 141 prefix-match hit, remaining 1792 prompt tokens to eval


    ↳ 分段 5 已產生字元：35
    ↳ 分段 5 已產生字元：63
    ↳ 分段 5 已產生字元：93
    ↳ 分段 5 已產生字元：122
    ↳ 分段 5 已產生字元：162
    ↳ 分段 5 已產生字元：192
    ↳ 分段 5 已產生字元：290
    ↳ 分段 5 已產生字元：408
    ↳ 分段 5 已產生字元：501
    ↳ 分段 5 已產生字元：605
    ↳ 分段 5 已產生字元：725
    ↳ 分段 5 已產生字元：849
    ↳ 分段 5 已產生字元：953
    ↳ 分段 5 已產生字元：1026
    ↳ 分段 5 已產生字元：1136
    ↳ 分段 5 已產生字元：1234
    ↳ 分段 5 已產生字元：1316
    ↳ 分段 5 已產生字元：1426
    ↳ 分段 5 已產生字元：1546
    ↳ 分段 5 已產生字元：1649
    ↳ 分段 5 已產生字元：1735


llama_perf_context_print:        load time =    6135.13 ms
llama_perf_context_print: prompt eval time =    2451.40 ms /  1792 tokens (    1.37 ms per token,   731.01 tokens per second)
llama_perf_context_print:        eval time =   12126.47 ms /   511 runs   (   23.73 ms per token,    42.14 tokens per second)
llama_perf_context_print:       total time =   17479.75 ms /  2303 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 5 已產生字元：1752


**會議筆記（Markdown，繁體）**

---

## 整體提要  
- 會議主題為「開設線上號並推廣內容」；  
- 主要平台包括 YouTube、IG、Threads；  
- 討討使用免費工具與 AI 產生內容；  
- 需確認帳號名稱、商標及資料保護；  
- 會議決定先開帳號，後續實驗推廣策略。  

---

## 章節要點（含時間脈絡）

| 時間 | 要點 |
|------|------|
| 0:00 | 會議者提到「可再想想看」的方案，未決定具體行動。 |
| 0:05 | 討討開設線上號、專門輸入內容之計畫，並確認無問題。 |
| 0:10 | 會議者提出將帳號先開，後續做實驗，說「直接先開帳號」。 |
| 0:15 | 會議決定先開帳號，並以 YouTube、IG、Threads 等平台推廣。 |
| 0:20 | 其實務流程：先開帳號，先前往下列…（未完整說明）。 |
| 0:25 | 會議者提到「成學文教有限公司」與「陰謀」作為公司名，詢問是否已申請商標。 |
| 0:30 | 會議者表示目前不打算註冊帳號，先以 Gmail 個人帳號做測試。 |
| 0:35 | 會議者說明將使用 Notion、Threads、IG 等工具來管理與推廣內容。 |
| 0:40 | 會議者提到「動態驗證碼」的流程，並說要用 OpenAI 及 IG 訊息確認。 |
| 0:45 | 會議者說明將建立「工程部門」為頻道名稱。 |

---

## 可執行重點（具體待辦）

- **開設線上號**：先以 Gmail 個人帳號做測試，後續正式註冊。  
- **確認商標**：查詢「陰謀」是否已申請商標，並確保不侵權。  
- **設定平台**：決定 YouTube、IG、Threads 為主要推廣平台。  
- **管理工具**：整合 Notion 以管理內容與進度。  
- **驗證流程**：使用 OpenAI 及 IG 訊息確認動態驗證碼。  
- **頻道名稱**：確定「工程部門」為正式頻道名稱。  

---

Llama.generate: 129 prefix-match hit, remaining 2330 prompt tokens to eval


    ↳ 彙整 已產生字元：43
    ↳ 彙整 已產生字元：80
    ↳ 彙整 已產生字元：122
    ↳ 彙整 已產生字元：150
    ↳ 彙整 已產生字元：189
    ↳ 彙整 已產生字元：228
    ↳ 彙整 已產生字元：258
    ↳ 彙整 已產生字元：288
    ↳ 彙整 已產生字元：315
    ↳ 彙整 已產生字元：350
    ↳ 彙整 已產生字元：388
    ↳ 彙整 已產生字元：418
    ↳ 彙整 已產生字元：447
    ↳ 彙整 已產生字元：477
    ↳ 彙整 已產生字元：511
    ↳ 彙整 已產生字元：554
    ↳ 彙整 已產生字元：586
    ↳ 彙整 已產生字元：620
    ↳ 彙整 已產生字元：658
    ↳ 彙整 已產生字元：690
    ↳ 彙整 已產生字元：721
    ↳ 彙整 已產生字元：768
    ↳ 彙整 已產生字元：810
    ↳ 彙整 已產生字元：850


llama_perf_context_print:        load time =    6135.13 ms
llama_perf_context_print: prompt eval time =    3332.26 ms /  2330 tokens (    1.43 ms per token,   699.22 tokens per second)
llama_perf_context_print:        eval time =   14874.84 ms /   606 runs   (   24.55 ms per token,    40.74 tokens per second)
llama_perf_context_print:       total time =   21684.37 ms /  2936 tokens
llama_perf_context_print:    graphs reused =        586


    ↳ 彙整 已產生字元：878
→ 完成 ✅  /content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8)_summary.md


## Refine developer options

### Subtask:
Keep the developer options section separate within the code, clearly commented.


**Reasoning**:
Ensure the developer options are clearly commented and separated, verifying all intended parameters are included.



In [None]:
# -*- coding: utf-8 -*-
# =========================================================
# Whisper Automatic Subtitle Generation: GPU Transcription + CPU Denoising + OpenCC Post-processing (Traditional/Simplified Conversion)
# And LLM Summarization (GPT-OSS-20B / llama.cpp / CUDA)
# - Transcription: faster-whisper (CUDA, compute: int8_float16→float16→int8)
# - Denoising: ffmpeg afftdn (CPU)
# - Progress: Real-time printing of "current sentence + video total length percentage"
# - Network source download and output: MyDrive/whisper; Files in Drive: Output to the same folder
# - LLM Summary: llama.cpp + GPT-OSS-20B GGUF for summarizing transcription
# - Prompts "Delete runtime and restart" if download is blocked or abnormal
# =========================================================

# Restrict multithreading (more stable)
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# [1/8] Mount Google Drive
from google.colab import drive
try:
    drive.mount("/content/gdrive")
except:
    drive.mount("/content/gdrive", force_remount=True)

# Consolidated Imports
import sys, gc, shutil, datetime, subprocess as sp
from pathlib import Path
import re, math, time, importlib, textwrap
from typing import List, Tuple
from IPython.display import display, Markdown
import soundfile as sf
from faster_whisper import WhisperModel
from opencc import OpenCC
import srt as _srt # Import srt as _srt to avoid name conflict later with the module itself
from huggingface_hub import snapshot_download

ROOT = Path("/content/gdrive/MyDrive")
WHISPER_DIR = ROOT / "whisper"
WHISPER_DIR.mkdir(exist_ok=True, parents=True)
os.chdir(ROOT)
print(f"→ 當前工作目錄：{os.getcwd()}")

# [2/8] User Form Parameters (Unified)
#@markdown # Whisper Transcription & LLM Summary Pipeline

#@markdown ## Input & Transcription Settings
#@markdown **Input Source:** Google Drive file (relative to MyDrive) or video URL (YouTube/HTTP).
filename = "whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4"  #@param {type:"string"}
#@markdown **Download Option:** Check to save network source files to `MyDrive/whisper`.
save_video_to_google_drive = True  #@param {type:"boolean"}
#@markdown **Whisper Model Size:** Choose a model size. `large-v3` requires more GPU VRAM; `medium` is a good alternative if VRAM is limited.
model_size = "large-v3"  #@param ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
#@markdown **Language:** Select the language for transcription. "自動偵測" (Auto-detect) is usually sufficient.
language = "自動偵測"  #@param ["自動偵測", "中文", "英文"]
#@markdown **Denoising:** Apply CPU-based denoising to the audio before transcription. `afftdn` is recommended.
denoise_method = "afftdn (建議)"  #@param ["afftdn (建議)", "none"]
#@markdown **Text Post-processing (OpenCC):** Convert the transcribed text (SRT/TXT output) between Simplified and Traditional Chinese variants.
text_postprocess = "臺灣繁體中文（預設）"  #@param ["臺灣繁體中文（預設）","香港繁體中文","大陸簡體中文","關閉"]
#@markdown **YouTube Cookies (Optional):** Path to a Netscape-format cookies file (relative to MyDrive) for accessing age-restricted or member-only YouTube videos (e.g., `cookies/youtube.txt`).
youtube_cookies_txt_path = ""  #@param {type:"string"}

#@markdown ## Summarization Settings
#@markdown **SRT Input:** Path to the SRT file for summarization (relative to MyDrive or absolute). Leave empty to use the SRT generated by the transcription step above.
summary_srt_path = ""  #@param {type:"string"}
#@markdown **Topic Hint (Optional):** Provide a brief hint about the topic to guide the summarization process.
topic_hint = ""  #@param {type:"string"}

#@markdown ## Output Paths
#@markdown **Transcription Output Directory:** Directory where the generated SRT and TXT files will be saved (relative to MyDrive or absolute). Default is the input file's directory for local files, or `MyDrive/whisper` for network sources. This is determined automatically.
# (Note: filename's directory is used if local, otherwise WHISPER_DIR. This parameter is more of an indicator of the default output base.)
#@markdown **Summary Output Directory:** Directory where the final summary Markdown file will be saved (relative to MyDrive or absolute).
summary_output_dir = "/content/gdrive/MyDrive/whisper"  #@param {type:"string"}


language_code_map = {"自動偵測": None, "中文":"zh", "英文":"en"}
language_code = language_code_map[language]

# =========================================================
# Developer Options
# Advanced users can fine-tune parameters in this section.
# Modify only if you understand the impact.
# =========================================================
DEBUG_MODE = False # Set to True for more detailed logging

# --- Transcription Parameters ---
TRANSCRIPTION_BEAM_SIZE_PRIMARY = 3
TRANSCRIPTION_CHUNK_LENGTH_PRIMARY = 20
TRANSCRIPTION_BEAM_SIZE_FALLBACK = 1 # Used if primary fails
TRANSCRIPTION_CHUNK_LENGTH_FALLBACK = 15 # Used if primary fails

# --- Denoising Parameters ---
DENOISE_NOISE_FLOOR_DB = -25

# --- Filtering Parameters ---
FILTER_MIN_DURATION_SHORT = 1.5 # Minimum duration for short segments
FILTER_AVG_LOGPROB_THRESHOLD = -1.0 # Avg log probability threshold for short segments
FILTER_MIN_DURATION_SPEECH_PROB = 2.0 # Minimum duration for speech probability filtering
FILTER_NO_SPEECH_PROB_THRESHOLD = 0.6 # No speech probability threshold

# --- Summary Model Parameters ---
REPO_ID   = "unsloth/gpt-oss-20b-GGUF"   # GGUF Model Repository
GGUF_FILE = "gpt-oss-20b-Q4_K_M.gguf"    # Approx. 10.8GiB, T4 can run

# --- Summary Inference Parameters (Increase available generation space to avoid truncation) ---
ctx_window            = 8192
map_max_new_tokens    = 512   # Segment output: original 256 -> 512 (approx. 350-450 chars)
reduce_max_new_tokens = 1024  # Summary output: original 512 -> 1024 (approx. 700-900+ chars)
temperature           = 0.2
top_p                 = 0.9
repeat_penalty        = 1.05
# =========================================================
# End of Developer Options
# =========================================================


# [3/8] Install Dependencies
# Combine installation steps from both original cells
if DEBUG_MODE: print("[Install] faster-whisper / yt-dlp / soundfile / opencc / srt / huggingface_hub / llama-cpp-python ...")

def pip_install(pkgs, extra_args=None, env=None):
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if extra_args:
        cmd += extra_args
    cmd += pkgs
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True, env=env)

# Install common dependencies first
common_missing = []
try: import srt # check srt module directly after import as _srt
except ModuleNotFoundError: common_missing.append("srt>=3.5.3")
try: from huggingface_hub import snapshot_download # check huggingface_hub module directly
except ModuleNotFoundError: common_missing.append("huggingface_hub>=0.23.0")
try: import soundfile # check soundfile
except ModuleNotFoundError: common_missing.append("soundfile")
try: import opencc # check opencc
except ModuleNotFoundError: common_missing.append("opencc-python-reimplemented")

if common_missing:
    if DEBUG_MODE: print("→ Installing common missing packages:", ", ".join(common_missing))
    r = pip_install(common_missing)
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("基礎依賴安裝失敗，請重啟執行階段後重試。")

# Install faster-whisper and yt-dlp separately as they were in the first cell
try: from faster_whisper import WhisperModel # check faster_whisper
except ModuleNotFoundError:
    if DEBUG_MODE: print("→ Installing missing package: faster-whisper yt-dlp")
    r = pip_install(["faster-whisper", "yt-dlp"])
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("faster-whisper / yt-dlp 安裝失敗，請重啟執行階段後重試。")


def suggest_runtime_reset():
    print("\n🧹 建議動作（Colab）")
    print("1) 依序：『執行階段 Runtime』 → 『刪除執行階段/還原出廠設定 Factory reset runtime』")
    print("2) 重新執行本 Notebook（從掛載雲端硬碟那格開始）\n", flush=True)

def run_cmd(cmd:list, check=True):
    if DEBUG_MODE: print("  $", " ".join(cmd))
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if check and p.returncode != 0:
        raise RuntimeError(f"命令失敗：{' '.join(cmd)}")
    return p

def is_youtube_url(s:str)->bool:
    return isinstance(s, str) and ("youtu.be" in s or "youtube.com" in s)
def is_http_url(s:str)->bool:
    return isinstance(s, str) and s.lower().startswith("http")
def to_abs_mydrive(p:str)->Path:
    return (Path(p) if p.startswith("/") else (ROOT / p)).resolve()
def fmt_ts_srt(t:float)->str:
    h = int(t//3600); m = int((t%3600)//60); s = t - h*3600 - m*60
    return f"{h:02d}:{m:02d}:{int(s):02d},{int(round((s-int(s))*1000)):03d}"
def verify_wav_ok(path: Path)->bool:
    try:
        info = sf.info(str(path))
        return info.samplerate > 0 and info.channels in (1, 2)
    except Exception:
        return False

# OpenCC converter setup
def build_opencc_pipeline(choice:str):
    if choice.startswith("臺灣"):
        return [OpenCC('s2t'), OpenCC('t2tw')]
    if choice.startswith("香港"):
        return [OpenCC('s2t'), OpenCC('t2hk')]
    if choice.startswith("大陸"):
        return [OpenCC('t2s')]
    return []  # Disable

def apply_opencc(text:str, pipeline)->str:
    for cc in pipeline:
        text = cc.convert(text)
    return text

def ytdl(yturl:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting YouTube video ...")
    # Use sp.run instead of subprocess.run directly
    cmd = ["yt-dlp", "-f", "mp4", "-o", str(tmp / "%(title)s.%(ext)s")]
    if youtube_cookies_txt_path.strip():
        cookies_abs = to_abs_mydrive(youtube_cookies_txt_path.strip())
        if cookies_abs.exists():
            cmd += ["--cookies", str(cookies_abs)]
        else:
            if DEBUG_MODE: print(f"⚠️ 找不到 cookies 檔：{cookies_abs}（改為不帶 cookies）")
    cmd.append(yturl)
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if p.returncode != 0:
        if "Sign in to confirm" in (p.stdout or ""):
            print("\n❗YouTube 要求登入/驗證，請提供 cookies 或先自行下載到雲端硬碟。")
        print("🔄 若多次失敗，請刪除執行階段並重啟後重試。")
        suggest_runtime_reset()
        raise RuntimeError("yt-dlp 下載失敗")
    files = list(tmp.glob("*"))
    if not files:
        print("🔄 下載為空，建議刪除執行階段再重試。")
        suggest_runtime_reset()
        raise FileNotFoundError("YouTube 下載失敗：/tmp/dl 為空")
    f = files[0]
    if save_video_to_google_drive:
        shutil.copy2(f, WHISPER_DIR / f.name)
    return f

def http_dl(url:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting HTTP(S) video ...")
    run_cmd(["curl", "-L", "-o", str(out), url])
    if save_video_to_google_drive:
        shutil.copy2(out, WHISPER_DIR / out.name)
    return out

# Extract audio: ffmpeg -> 16k/mono WAV
def ffmpeg_extract_wav(in_path:Path, out_wav:Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_path),"-vn","-ac","1","-ar",str(sr),"-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 轉 WAV 失敗")

# CPU Denoising: ffmpeg afftdn
def ffmpeg_afftdn(in_wav: Path, out_wav: Path, noise_floor_db=DENOISE_NOISE_FLOOR_DB):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-af",f"afftdn=nf={noise_floor_db}",
           "-ac","1","-ar","16000","-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg afftdn 失敗")

# Safeguard: Repack WAV header if format is strange
def ffmpeg_repack_wav(in_wav: Path, out_wav: Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-acodec","pcm_s16le","-ac","1","-ar",str(sr),str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 重包 WAV 失敗")

# [4/8] Parse Source (Transcription) - Uses 'filename' and 'save_video_to_google_drive'
if DEBUG_MODE: print("[4/8] Parsing input source ...")
try:
    if is_youtube_url(filename):
        src_path = ytdl(filename); out_base_dir = WHISPER_DIR
    elif is_http_url(filename):
        src_path = http_dl(filename); out_base_dir = WHISPER_DIR
    else:
        src_path = to_abs_mydrive(filename)
        if not src_path.exists(): raise FileNotFoundError(f"找不到檔案：{src_path}")
        out_base_dir = src_path.parent
except Exception as e:
    print(f"\n⛔ 來源解析/下載失敗：{e}")
    print("🔄 請刪除執行階段並重新啟動後重跑。"); suggest_runtime_reset(); raise

print(f"→ 來源檔：{src_path}")
print(f"→ 輸出資料夾：{out_base_dir}")

# [5/8] Extract Audio & CPU Denoising (Transcription) - Uses 'denoise_method' and 'DENOISE_NOISE_FLOOR_DB'
AUDIO_16K = Path("/tmp/audio_16k.wav")
if DEBUG_MODE: print("[5/8] Extracting audio (ffmpeg → 16k/mono WAV) ...")
ffmpeg_extract_wav(src_path, AUDIO_16K, sr=16000)

if denoise_method.startswith("afftdn"):
    if DEBUG_MODE: print("[5.5/8] Denoising (ffmpeg afftdn, CPU) ...")
    DENOISED = Path("/tmp/audio_16k_denoised.wav")
    ffmpeg_afftdn(AUDIO_16K, DENOISED, noise_floor_db=DENOISE_NOISE_FLOOR_DB)
    denoised_audio = DENOISED if verify_wav_ok(DENOISED) else AUDIO_16K
else:
    denoised_audio = AUDIO_16K

if not verify_wav_ok(denoised_audio):
    if DEBUG_MODE: print("  - 音訊格式異常；嘗試重包 WAV ...")
    FIXED = Path("/tmp/audio_16k_fixed.wav")
    ffmpeg_repack_wav(denoised_audio, FIXED, sr=16000)
    denoised_audio = FIXED

if DEBUG_MODE: print(f"→ 最終輸入音訊：{denoised_audio}")

# [6/8] Load faster-whisper (GPU enforced) - Uses 'model_size'
if DEBUG_MODE: print("[6/8] Loading faster-whisper model (GPU) ...")
device = "cuda"  # Enforce GPU
model = None; last_err = None
for ctype in ["int8_float16", "float16", "int8"]:
    try:
        if DEBUG_MODE: print(f"  - Trying compute_type={ctype}")
        model = WhisperModel(model_size, device=device, compute_type=ctype)
        if DEBUG_MODE: print("  - Model loaded successfully")
        break
    except Exception as e:
        last_err = e
        if DEBUG_MODE: print(f"  - Load failed: {e}")
if model is None:
    print("\n⛔ GPU 模型載入失敗。請確認『變更執行階段類型』選了 GPU（T4/A100），或刪除執行階段後重試。")
    suggest_runtime_reset()
    raise RuntimeError(f"無法載入模型：{last_err}")

gc.collect()  # Clean up before transcription (safety)

# [7/8] Transcribe (GPU; real-time progress per segment) - Uses 'language_code', 'TRANSCRIPTION_BEAM_SIZE_PRIMARY', 'TRANSCRIPTION_CHUNK_LENGTH_PRIMARY', 'TRANSCRIPTION_BEAM_SIZE_FALLBACK', 'TRANSCRIPTION_CHUNK_LENGTH_FALLBACK'
if DEBUG_MODE: print(f"[7/8] Starting transcription (GPU: beam={TRANSCRIPTION_BEAM_SIZE_PRIMARY} / chunk={TRANSCRIPTION_CHUNK_LENGTH_PRIMARY}s / no VAD) ...")

def transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY):
    return model.transcribe(
        str(denoised_audio),
        task="transcribe",
        language=language_code,
        temperature=0.0,
        condition_on_previous_text=False,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        beam_size=_beam,
        chunk_length=_chunk,
        vad_filter=False,
        word_timestamps=False
    )

try:
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY)
except Exception as e:
    if DEBUG_MODE: print(f"  - First transcription failed: {e}\n    → Trying more conservative (beam={TRANSCRIPTION_BEAM_SIZE_FALLBACK}, chunk={TRANSCRIPTION_CHUNK_LENGTH_FALLBACK}) ...")
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_FALLBACK, _chunk=TRANSCRIPTION_CHUNK_LENGTH_FALLBACK)

# Display percentage based on total video duration
duration = float(getattr(info, "duration", 0.0) or 0.0)
if duration <= 0: duration = 1.0

segments = []
filtered = []

if DEBUG_MODE:
    print(f"  - Detected language: {getattr(info,'language','未知')} (p={getattr(info,'language_probability',0):.2f})")
    print(f"  - Audio length: {duration:.2f}s")

for s in seg_iter:
    pct = int(min(100, round((s.end / duration) * 100)))
    print(f"[{pct:3d}%] {fmt_ts_srt(s.start)} → {fmt_ts_srt(s.end)}  {s.text.strip()}", flush=True)
    segments.append(s)

    # Low confidence/high no-speech short segment filtering (no blacklist) - Uses FILTER_* parameters
    keep = True
    seg_dur = float(s.end - s.start)
    if seg_dur < FILTER_MIN_DURATION_SHORT and getattr(s, "avg_logprob", None) is not None and s.avg_logprob < FILTER_AVG_LOGPROB_THRESHOLD:
        keep = False
    if seg_dur < FILTER_MIN_DURATION_SPEECH_PROB and getattr(s, "no_speech_prob", None) is not None and s.no_speech_prob > FILTER_NO_SPEECH_PROB_THRESHOLD:
        keep = False
    if keep:
        filtered.append(s)

if DEBUG_MODE: print(f"  - Number of segments: Before filtering {len(segments)} → After filtering {len(filtered)}")

# ---- OpenCC Normalization (for output text) ---- - Uses 'text_postprocess'
pipeline = build_opencc_pipeline(text_postprocess)
def norm(txt: str) -> str:
    return apply_opencc(txt, pipeline) if pipeline else txt

# [8/8] Output (text after OpenCC) - Uses 'out_base_dir' (derived from 'filename')
print("[8/8] 輸出 SRT / TXT ...")
out_dir = out_base_dir; out_dir.mkdir(exist_ok=True, parents=True)
stem = src_path.stem
SRT = out_dir / f"{stem}.srt"; TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")


# ===== Summarization Logic Starts Here =====

# Determine the SRT input path for summarization - Uses 'summary_srt_path' and 'SRT' from transcription
if not summary_srt_path:
    summary_srt_path_abs = SRT # Use the SRT path generated by the transcription
    if DEBUG_MODE: print(f"Using SRT from transcription step: {summary_srt_path_abs}")
else:
    summary_srt_path_abs = to_abs_mydrive(summary_srt_path)


# ===== Summary 1/6) Check GPU and Install Dependencies (llama-cpp-python specific) =====
# llama-cpp-python installation logic - Keep this separate as it has specific CUDA requirements
if DEBUG_MODE: print("[Summary 1/6] Checking GPU and installing llama-cpp-python ...")

def detect_cuda_tag():
    try:
        out = sp.check_output(["nvidia-smi"], text=True)
        m = re.search(r"CUDA Version:\s*([\d.]+)", out)
        if not m:
            return "cu124"
        major, minor = [int(x) for x in m.group(1).split(".")[:2]]
        if major > 12 or (major == 12 and minor >= 5):
            return "cu125"
        return "cu124"
    except Exception:
        return "cu124"

cuda_tag = detect_cuda_tag()
if DEBUG_MODE: print(f"GPU 0: Detected CUDA version tag {cuda_tag}")

def try_import_llama():
    try:
        from llama_cpp import Llama
        return Llama
    except ModuleNotFoundError:
        return None

Llama = try_import_llama()
if Llama is None:
    # Keep your existing installation strategy: extra-index -> fallback to source compilation on failure
    candidates = [cuda_tag, "cu125", "cu124", "cu122", "cu121"]
    ok = False
    for tag in candidates:
        idx = f"https://abetlen.github.io/llama-cpp-python/whl/{tag}"
        if DEBUG_MODE: print(f"→ Attempting to install llama-cpp-python ({tag}) ...")
        r = pip_install(["llama-cpp-python"], extra_args=["--extra-index-url", idx])
        if r.returncode == 0:
            Llama = try_import_llama()
            if Llama is not None:
                ok = True
                break
        else:
            if DEBUG_MODE: print("  ✗ Installation failed (summary):", "\n".join(r.stdout.splitlines()[-5:]))
    if not ok:
        if DEBUG_MODE: print("→ Pre-compiled wheels not available, switching to 'source compilation (CUDA=ON)' ... (takes longer)")
        try:
            import ninja # noqa: F401 # Import ninja to check if installed
        except ModuleNotFoundError:
            if DEBUG_MODE: print("→ Installing missing package: ninja")
            r = pip_install(["ninja"])
            if r.returncode != 0:
                if DEBUG_MODE: print(r.stdout)
                raise RuntimeError("安裝 ninja 失敗。請重啟後重試。")
        env = os.environ.copy()
        env["CMAKE_ARGS"] = "-DGGML_CUDA=on -DLLAMA_CUBLAS=on"
        env["FORCE_CMAKE"] = "1"
        r = pip_install(["llama-cpp-python"], env=env)
        if r.returncode != 0:
            if DEBUG_MODE: print(r.stdout)
            raise RuntimeError("無法安裝 GPU 版 llama-cpp-python。")
        Llama = try_import_llama()

# ===== Summary 2/6) Read SRT (Summary) - Uses 'summary_srt_path_abs'
if DEBUG_MODE: print("[Summary 2/6] Reading SRT ...")
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"
with open(summary_srt_path_abs, "r", encoding="utf-8") as f:
    srt_text = f.read()
subs = list(_srt.parse(srt_text)) # Use _srt as srt module was imported as _srt
def td2s(td): return td.total_seconds()
segments = []
for it in subs:
    txt = it.content.strip()
    if not txt: continue
    segments.append((td2s(it.start), td2s(it.end), txt))
total_secs = (segments[-1][1] - segments[0][0]) if segments else 0
if DEBUG_MODE: print(f"→ Number of subtitle segments: {len(segments)}；Video length (est): {total_secs/60:.1f} minutes")


# ===== Summary 3/6) Download and Load GGUF Model (Summary) - Uses summary model parameters (REPO_ID, GGUF_FILE, ctx_window, etc.)
if DEBUG_MODE: print("[Summary 3/6] Loading GPT-OSS-20B (GGUF, CUDA) ...")
local_repo = snapshot_download(REPO_ID, allow_patterns=[GGUF_FILE])
gguf_path = str(Path(local_repo)/GGUF_FILE)

llm = Llama(
    model_path=gguf_path,
    n_ctx=ctx_window,
    n_gpu_layers=-1,
    seed=0,
    logits_all=False,
    verbose=True,          # Display the actual chat format used
    chat_format="chatml",  # Directly override the GGUF built-in Unsloth template to avoid outputting <|channel|> tags
)
if DEBUG_MODE: print("→ Model loaded successfully (GPU)")

# ===== Summary 4/6) Token-aware Segmentation (Summary) - Uses ctx_window, map_max_new_tokens, prompt_overhead
if DEBUG_MODE: print("[Summary 4/6] Generating segments (token-aware; single segment ≤ safety limit) ...")

def count_tokens_text(text: str) -> int:
    return len(llm.tokenize(text.encode("utf-8")))

SYSTEM_INSTR = (
  "你是一個會議總結機器人。根據使用者提供的逐字稿（可能雜訊、重複、錯字），"
  "請去除雜訊與重複、嚴守事實、不腦補。遇到不明確資訊以「待補充／未明確」標註。"
  "輸出為 Markdown（繁體中文），不要輸出任何系統／思考標記。"
)

# — Segment Summary Prompt: More concise request, avoid verbosity and system language - Uses 'topic_hint'
MAP_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是逐字稿片段（非完整全文）：
{chunk}

請就此片段輸出「條列式重點摘要」（500–900 字，繁體中文），注意：
- 只寫最終內容，不要寫解題想法、不要出現任何系統提示或中英括號標記。
- 聚焦可驗證事實（時間、人物、任務、結論、未決事項、行動）。
- 結構：可用小標題＋項目符號，語句務必短、準確、無贅詞。
""")

# — Summary Prompt: Maintain your three-section output structure - Uses 'topic_hint'
REDUCE_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是所有片段的重點摘要彙整（仍可能有重疊）：
{maps}

請整合為一份會議筆記（Markdown，繁體）：
1) **整體提要**（3–6 句，避免冗言）
2) **章節要點（含時間脈絡）**：條列呈現，每點一行，可附粗略時間
3) **可執行重點**：具體待辦（每條以動詞開頭）
請只輸出最終筆記，不要出現系統或思考標記，不要加入未出現的新資訊。
""")

# Single segment token budget (reserve space for prompt and generation)
prompt_overhead = 700
chunk_target    = max(1024, min(3072, ctx_window - prompt_overhead - map_max_new_tokens))

chunks: List[Tuple[float,float,str]] = []
buf, t0, t1, cur = [], None, None, 0
for (s, e, txt) in segments:
    t = count_tokens_text(txt)
    if not buf:
        buf, t0, t1, cur = [txt], s, e, t
        continue
    if cur + t <= chunk_target:
        buf.append(txt); t1 = e; cur += t
    else:
        chunks.append((t0, t1, "\n".join(buf)))
        buf, t0, t1, cur = [txt], s, e, t
if buf:
    chunks.append((t0, t1, "\n".join(buf)))

if DEBUG_MODE: print(f"→ Generated {len(chunks)} segments (target ~{chunk_target} tokens per segment)")

# ===== Common: Streaming Tools (No regex cleaning; use correct stop sequence) - Uses temperature, top_p, repeat_penalty, map_max_new_tokens, reduce_max_new_tokens
def llm_stream(messages, max_tokens):
    # ChatML messages end with <|im_end|>; use stop to cut off, preventing the closing tag from being written to the file
    gen = llm.create_chat_completion(
        messages=messages,
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repeat_penalty),
        max_tokens=int(max_tokens),
        stream=True,
        stop=["<|im_end|>"],  # Key: Prevent outputting the ending template
    )
    for ev in gen:
        # Compatible with different fields
        piece = ""
        try:
            piece = ev["choices"][0]["delta"].get("content", "")
        except Exception:
            piece = ev["choices"][0].get("text", "")
        if piece:
            yield piece

# ===== Summary 5/6) Segment Summary (map) - Uses map_max_new_tokens, ctx_window, prompt_overhead, topic_hint
if DEBUG_MODE: print("[Summary 5/6] Segment summarization (map) ...")
live = display(Markdown(""), display_id=True)
maps: List[str] = []

for i, (s, e, body) in enumerate(chunks, 1):
    pct = i / max(len(chunks),1) * 100
    sys.stdout.write(f"  - 處理分段 {i}/{len(chunks)}（~{pct:.1f}%）\n"); sys.stdout.flush()

    # Shrink to safe budget before sending (prevent prompt+segment from exceeding window and causing model to terminate early)
    budget_tokens = max(512, ctx_window - map_max_new_tokens - prompt_overhead)
    def shrink_to_budget(text: str, budget_tokens: int) -> str:
        cur = text
        for _ in range(6):
            if count_tokens_text(cur) <= budget_tokens:
                return cur
            keep = max(800, int(len(cur) * 0.85))
            cur = cur[:keep]
        return cur
    body2 = shrink_to_budget(body, budget_tokens)

    user_txt = MAP_USER_TMPL.format(topic=(topic_hint or "（無）"), chunk=body2)
    messages = [
        {"role": "system", "content": SYSTEM_INSTR},
        {"role": "user",   "content": user_txt},
    ]

    part_buf = [] # Reset part_buf for each segment
    for token in llm_stream(messages, map_max_new_tokens):
        part_buf.append(token)
        # Update live display and terminal character count periodically
        if len(part_buf) % 24 == 0:
            cur_txt = "".join(part_buf)
            live.update(Markdown(cur_txt))
            sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()
    cur_txt = "".join(part_buf)
    live.update(Markdown(cur_txt))
    sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()

    # Include the model's final output directly, no regex cleaning
    maps.append(cur_txt.strip())

if DEBUG_MODE: print("→ Segment summarization complete")

# ===== Summary 6/6) Consolidate (reduce) & Only write .md (Summary) - Uses summary_output_dir, summary_srt_path_abs, reduce_max_new_tokens, ctx_window, topic_hint
if DEBUG_MODE: print("[Summary 6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

out_dir = Path(summary_output_dir); out_dir.mkdir(parents=True, exist_ok=True)
out_md = out_dir / f"{Path(summary_srt_path_abs).stem}_summary.md" # Use the stem from the actual SRT file used for summarization
with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
→ 當前工作目錄：/content/gdrive/MyDrive
→ 來源檔：/content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4
→ 輸出資料夾：/content/gdrive/MyDrive/whisper
[  1%] 00:00:00,000 → 00:00:29,980  Teksting av Nicolai Winther
[  1%] 00:00:20,000 → 00:00:49,980  Teksting av Nicolai Winther
[  2%] 00:00:40,000 → 00:01:09,980  Teksting av Nicolai Winther
[  2%] 00:01:00,000 → 00:01:29,980  Teksting av Nicolai Winther
[  2%] 00:01:20,000 → 00:01:49,980  Teksting av Nicolai Winther
[  3%] 00:01:40,000 → 00:02:09,980  Teksting av Nicolai Winther
[  3%] 00:02:00,000 → 00:02:29,980  Teksting av Nicolai Winther
[  4%] 00:02:20,000 → 00:02:49,980  Teksting av Nicolai Winther
[  5%] 00:03:00,000 → 00:03:29,980  Teksting av Nicolai Winther
[  5%] 00:03:20,000 → 00:03:49,980  Teksting av Nicolai Winther
[  6%] 00:03:40,000 → 00:04:09,980  Teksting av Nicolai Winther
[  6%] 00:04

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

llama_model_load_from_file_impl: using device CUDA0 (Tesla T4) - 14974 MiB free
llama_model_loader: loaded meta data with 37 key-value pairs and 459 tensors from /root/.cache/huggingface/hub/models--unsloth--gpt-oss-20b-GGUF/snapshots/c6cedd4259adbfe7e4d4d983a0400bf4cc38e7db/gpt-oss-20b-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = gpt-oss
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Gpt-Oss-20B
llama_model_loader: - kv   3:                           general.basename str              = Gpt-Oss-20B
llama_model_loader: - kv   4:                       general.quantized_by str              = Unsloth
llama_model_loader: - kv   5:                         general.size_la

**會議摘要**

**1. 主要討論內容**  
- 會議中提到「可再想想看」的方案，並未決定具體行動。  
- 討討開設線上號、專門輸入內容之計畫，並確認無問題。  
- 會議者提出將帳號先開，後續做實驗，並說「直接先開帳號」  
- **重點：** 會議決定先開帳號，並以 YouTube、IG、Threads 等平台推廣  
- **重點**： 其實務流程：先開帳號，先前往下列…  

> …….  
We need to produce a concise summary of the transcript. The transcript is messy, but we need to extract key points: time, persons, tasks, conclusions, unresolved items, actions. Provide bullet list with headings. 500-900 Chinese characters? Actually 500-900 words? The instruction says 500–900 字 (characters). So we need about 500-900 Chinese characters. That's roughly 300-400 words. Provide bullet points.

We must avoid system messages, no brackets, no English. Use Chinese. Provide headings and bullet items. Ensure no extraneous commentary. Provide final answer. Let's craft.

We need to identify key participants: seems like speaker is "Nicolai Winther" maybe? Actually transcript includes "Teksting av Nicolai Winther". So maybe the speaker is Nicolai Winther. Also mention "老孫" etc. But we can just refer to "會議者".

Key points:

- They discuss opening an online account, possibly for a brand or company.
- They plan to use YouTube, IG, Threads to promote content, free tools, etc.
- They talk about using a free tool for students, maybe physics? They mention "物理" but not sure.
- They mention "成學文教有限公司" as company name; "陰謀" maybe brand name; ask if trademark application.
- They discuss registration of account, no plan to register now.
- They talk about verifying dynamic verification code for phone, using OpenAI and IG messages.
- They talk about building a channel, naming it "工程部門".
- They mention Gmail account used is personal.
- They talk about Notion integration.
- They mention "notion" and "Threads" and "IG" etc.

We need to

  - 處理分段 1/5（~20.0%）
    ↳ 分段 1 已產生字元：35
    ↳ 分段 1 已產生字元：67
    ↳ 分段 1 已產生字元：102
    ↳ 分段 1 已產生字元：130
    ↳ 分段 1 已產生字元：161
    ↳ 分段 1 已產生字元：198
    ↳ 分段 1 已產生字元：223
    ↳ 分段 1 已產生字元：265
    ↳ 分段 1 已產生字元：362
    ↳ 分段 1 已產生字元：499
    ↳ 分段 1 已產生字元：622
    ↳ 分段 1 已產生字元：736
    ↳ 分段 1 已產生字元：860
    ↳ 分段 1 已產生字元：970
    ↳ 分段 1 已產生字元：1086
    ↳ 分段 1 已產生字元：1179
    ↳ 分段 1 已產生字元：1279
    ↳ 分段 1 已產生字元：1413
    ↳ 分段 1 已產生字元：1527
    ↳ 分段 1 已產生字元：1649
    ↳ 分段 1 已產生字元：1764


llama_perf_context_print:        load time =    6120.68 ms
llama_perf_context_print: prompt eval time =    6119.88 ms /  3472 tokens (    1.76 ms per token,   567.33 tokens per second)
llama_perf_context_print:        eval time =   12180.98 ms /   511 runs   (   23.84 ms per token,    41.95 tokens per second)
llama_perf_context_print:       total time =   21063.67 ms /  3983 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 1 已產生字元：1767
  - 處理分段 2/5（~40.0%）


Llama.generate: 140 prefix-match hit, remaining 3358 prompt tokens to eval


    ↳ 分段 2 已產生字元：35
    ↳ 分段 2 已產生字元：61
    ↳ 分段 2 已產生字元：89


llama_perf_context_print:        load time =    6120.68 ms
llama_perf_context_print: prompt eval time =    4843.50 ms /  3358 tokens (    1.44 ms per token,   693.30 tokens per second)
llama_perf_context_print:        eval time =    2013.90 ms /    84 runs   (   23.98 ms per token,    41.71 tokens per second)
llama_perf_context_print:       total time =    7246.88 ms /  3442 tokens
llama_perf_context_print:    graphs reused =         81


    ↳ 分段 2 已產生字元：103
  - 處理分段 3/5（~60.0%）


Llama.generate: 140 prefix-match hit, remaining 3325 prompt tokens to eval


    ↳ 分段 3 已產生字元：51
    ↳ 分段 3 已產生字元：92
    ↳ 分段 3 已產生字元：125
    ↳ 分段 3 已產生字元：163
    ↳ 分段 3 已產生字元：192
    ↳ 分段 3 已產生字元：228
    ↳ 分段 3 已產生字元：263
    ↳ 分段 3 已產生字元：292
    ↳ 分段 3 已產生字元：328
    ↳ 分段 3 已產生字元：406
    ↳ 分段 3 已產生字元：528
    ↳ 分段 3 已產生字元：631
    ↳ 分段 3 已產生字元：747
    ↳ 分段 3 已產生字元：845
    ↳ 分段 3 已產生字元：950
    ↳ 分段 3 已產生字元：1054
    ↳ 分段 3 已產生字元：1119
    ↳ 分段 3 已產生字元：1159
    ↳ 分段 3 已產生字元：1194
    ↳ 分段 3 已產生字元：1230


llama_perf_context_print:        load time =    6120.68 ms
llama_perf_context_print: prompt eval time =    4769.27 ms /  3325 tokens (    1.43 ms per token,   697.17 tokens per second)
llama_perf_context_print:        eval time =   12297.89 ms /   511 runs   (   24.07 ms per token,    41.55 tokens per second)
llama_perf_context_print:       total time =   19942.31 ms /  3836 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 3 已產生字元：1259
  - 處理分段 4/5（~80.0%）


Llama.generate: 140 prefix-match hit, remaining 3387 prompt tokens to eval


    ↳ 分段 4 已產生字元：39
    ↳ 分段 4 已產生字元：76
    ↳ 分段 4 已產生字元：165
    ↳ 分段 4 已產生字元：282
    ↳ 分段 4 已產生字元：383
    ↳ 分段 4 已產生字元：483
    ↳ 分段 4 已產生字元：602
    ↳ 分段 4 已產生字元：716
    ↳ 分段 4 已產生字元：818
    ↳ 分段 4 已產生字元：937
    ↳ 分段 4 已產生字元：1056
    ↳ 分段 4 已產生字元：1138
    ↳ 分段 4 已產生字元：1256
    ↳ 分段 4 已產生字元：1379
    ↳ 分段 4 已產生字元：1527
    ↳ 分段 4 已產生字元：1643
    ↳ 分段 4 已產生字元：1763
    ↳ 分段 4 已產生字元：1883
    ↳ 分段 4 已產生字元：1990
    ↳ 分段 4 已產生字元：2112
    ↳ 分段 4 已產生字元：2217


llama_perf_context_print:        load time =    6120.68 ms
llama_perf_context_print: prompt eval time =    4876.93 ms /  3387 tokens (    1.44 ms per token,   694.49 tokens per second)
llama_perf_context_print:        eval time =   12495.12 ms /   511 runs   (   24.45 ms per token,    40.90 tokens per second)
llama_perf_context_print:       total time =   20238.70 ms /  3898 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 4 已產生字元：2257
  - 處理分段 5/5（~100.0%）


Llama.generate: 141 prefix-match hit, remaining 1792 prompt tokens to eval


    ↳ 分段 5 已產生字元：35
    ↳ 分段 5 已產生字元：63
    ↳ 分段 5 已產生字元：93
    ↳ 分段 5 已產生字元：122
    ↳ 分段 5 已產生字元：162
    ↳ 分段 5 已產生字元：192
    ↳ 分段 5 已產生字元：290
    ↳ 分段 5 已產生字元：408
    ↳ 分段 5 已產生字元：501
    ↳ 分段 5 已產生字元：605
    ↳ 分段 5 已產生字元：725
    ↳ 分段 5 已產生字元：849
    ↳ 分段 5 已產生字元：953
    ↳ 分段 5 已產生字元：1026
    ↳ 分段 5 已產生字元：1136
    ↳ 分段 5 已產生字元：1234
    ↳ 分段 5 已產生字元：1316
    ↳ 分段 5 已產生字元：1426
    ↳ 分段 5 已產生字元：1546
    ↳ 分段 5 已產生字元：1649
    ↳ 分段 5 已產生字元：1735


llama_perf_context_print:        load time =    6120.68 ms
llama_perf_context_print: prompt eval time =    2385.55 ms /  1792 tokens (    1.33 ms per token,   751.19 tokens per second)
llama_perf_context_print:        eval time =   11665.55 ms /   511 runs   (   22.83 ms per token,    43.80 tokens per second)
llama_perf_context_print:       total time =   16955.58 ms /  2303 tokens
llama_perf_context_print:    graphs reused =        494


    ↳ 分段 5 已產生字元：1752


**會議筆記（Markdown，繁體）**

---

## 整體提要  
- 會議主題為「開設線上號並推廣內容」；  
- 主要平台包括 YouTube、IG、Threads；  
- 討討使用免費工具與 AI 產生內容；  
- 需確認帳號名稱、商標及資料保護；  
- 會議決定先開帳號，後續實驗推廣策略。  

---

## 章節要點（含時間脈絡）

| 時間 | 要點 |
|------|------|
| 0:00 | 會議者提到「可再想想看」的方案，未決定具體行動。 |
| 0:05 | 討討開設線上號、專門輸入內容之計畫，並確認無問題。 |
| 0:10 | 會議者提出將帳號先開，後續做實驗，說「直接先開帳號」。 |
| 0:15 | 會議決定先開帳號，並以 YouTube、IG、Threads 等平台推廣。 |
| 0:20 | 其實務流程：先開帳號，先前往下列…（未完整說明）。 |
| 0:25 | 會議者提到「成學文教有限公司」與「陰謀」作為公司名，詢問是否已申請商標。 |
| 0:30 | 會議者表示目前不打算註冊帳號，先以 Gmail 個人帳號做測試。 |
| 0:35 | 會議者說明將使用 Notion、Threads、IG 等工具來管理與推廣內容。 |
| 0:40 | 會議者提到「動態驗證碼」的流程，並說要用 OpenAI 及 IG 訊息確認。 |
| 0:45 | 會議者說明將建立「工程部門」為頻道名稱。 |

---

## 可執行重點（具體待辦）

- **開設線上號**：先以 Gmail 個人帳號做測試，後續正式註冊。  
- **確認商標**：查詢「陰謀」是否已申請商標，並確保不侵權。  
- **設定平台**：決定 YouTube、IG、Threads 為主要推廣平台。  
- **管理工具**：整合 Notion 以管理內容與進度。  
- **驗證流程**：使用 OpenAI 及 IG 訊息確認動態驗證碼。  
- **頻道名稱**：確定「工程部門」為正式頻道名稱。  

---

Llama.generate: 129 prefix-match hit, remaining 2330 prompt tokens to eval


    ↳ 彙整 已產生字元：43
    ↳ 彙整 已產生字元：80
    ↳ 彙整 已產生字元：122
    ↳ 彙整 已產生字元：150
    ↳ 彙整 已產生字元：189
    ↳ 彙整 已產生字元：228
    ↳ 彙整 已產生字元：258
    ↳ 彙整 已產生字元：288
    ↳ 彙整 已產生字元：315
    ↳ 彙整 已產生字元：350
    ↳ 彙整 已產生字元：388
    ↳ 彙整 已產生字元：418
    ↳ 彙整 已產生字元：447
    ↳ 彙整 已產生字元：477
    ↳ 彙整 已產生字元：511
    ↳ 彙整 已產生字元：554
    ↳ 彙整 已產生字元：586
    ↳ 彙整 已產生字元：620
    ↳ 彙整 已產生字元：658
    ↳ 彙整 已產生字元：690
    ↳ 彙整 已產生字元：721
    ↳ 彙整 已產生字元：768
    ↳ 彙整 已產生字元：810
    ↳ 彙整 已產生字元：850


llama_perf_context_print:        load time =    6120.68 ms
llama_perf_context_print: prompt eval time =    3231.40 ms /  2330 tokens (    1.39 ms per token,   721.05 tokens per second)
llama_perf_context_print:        eval time =   14272.65 ms /   606 runs   (   23.55 ms per token,    42.46 tokens per second)
llama_perf_context_print:       total time =   20790.99 ms /  2936 tokens
llama_perf_context_print:    graphs reused =        586


    ↳ 彙整 已產生字元：878
→ 完成 ✅  /content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8)_summary.md


## Update output paths

### Subtask:
Verify and update the logic for determining output paths to work correctly within the single cell structure.


**Reasoning**:
Review the code to ensure the output path logic for transcription and summarization is correct and handles directory creation.



In [None]:
# [8/8] Output (text after OpenCC) - Uses 'out_base_dir' (derived from 'filename')
print("[8/8] 輸出 SRT / TXT ...")
# Determine the output directory for transcription based on input type
# If input is a network source, output to WHISPER_DIR
# If input is a local file, output to the same directory as the input file
if is_youtube_url(filename) or is_http_url(filename):
    out_base_dir = WHISPER_DIR
else:
    src_path_abs = to_abs_mydrive(filename)
    out_base_dir = src_path_abs.parent

# Create the transcription output directory if it doesn't exist
out_dir = out_base_dir
out_dir.mkdir(exist_ok=True, parents=True)

# Determine the stem from the original source file path
stem = Path(src_path).stem
SRT = out_dir / f"{stem}.srt"
TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")


# ===== Summarization Logic Starts Here =====

# Determine the SRT input path for summarization - Uses 'summary_srt_path' and 'SRT' from transcription
if not summary_srt_path:
    # If summary_srt_path is empty, use the SRT generated by the transcription step
    summary_srt_path_abs = SRT
    if DEBUG_MODE: print(f"Using SRT from transcription step: {summary_srt_path_abs}")
else:
    # If summary_srt_path is provided, convert it to an absolute path relative to MyDrive
    summary_srt_path_abs = to_abs_mydrive(summary_srt_path)

# Ensure the input SRT file for summarization exists
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"


# ===== Summary 6/6) Consolidate (reduce) & Only write .md (Summary) - Uses summary_output_dir, summary_srt_path_abs, reduce_max_new_tokens, ctx_window, topic_hint
if DEBUG_MODE: print("[Summary 6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

# Determine and create the summary output directory
summary_output_dir_abs = to_abs_mydrive(summary_output_dir)
summary_output_dir_abs.mkdir(parents=True, exist_ok=True)

# Determine the summary output file path using the stem of the input SRT
out_md = summary_output_dir_abs / f"{Path(summary_srt_path_abs).stem}_summary.md"

with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")

[8/8] 輸出 SRT / TXT ...
→ 完成！
  SRT: /content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).srt
  TXT: /content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).txt


NameError: name 'llm' is not defined

**Reasoning**:
The `llm` object was not defined because the summarization steps were executed without the model being loaded first. The code needs to load the summarization model *before* attempting to use it for token counting or generation. The `llama-cpp-python` installation and model loading steps (`[Summary 1/6]` and `[Summary 3/6]`) should occur before the token-aware segmentation (`[Summary 4/6]`) and summarization steps (`[Summary 5/6]` and `[Summary 6/6]`). I will regenerate the full code with the corrected execution order for the summarization part.



In [None]:
# -*- coding: utf-8 -*-
# =========================================================
# Whisper Automatic Subtitle Generation: GPU Transcription + CPU Denoising + OpenCC Post-processing (Traditional/Simplified Conversion)
# And LLM Summarization (GPT-OSS-20B / llama.cpp / CUDA)
# - Transcription: faster-whisper (CUDA, compute: int8_float16→float16→int8)
# - Denoising: ffmpeg afftdn (CPU)
# - Progress: Real-time printing of "current sentence + video total length percentage"
# - Network source download and output: MyDrive/whisper; Files in Drive: Output to the same folder
# - LLM Summary: llama.cpp + GPT-OSS-20B GGUF for summarizing transcription
# - Prompts "Delete runtime and restart" if download is blocked or abnormal
# =========================================================

# Restrict multithreading (more stable)
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# [1/8] Mount Google Drive
from google.colab import drive
try:
    drive.mount("/content/gdrive")
except:
    drive.mount("/content/gdrive", force_remount=True)

# Consolidated Imports
import sys, gc, shutil, datetime, subprocess as sp
from pathlib import Path
import re, math, time, importlib, textwrap
from typing import List, Tuple
from IPython.display import display, Markdown
import soundfile as sf
from faster_whisper import WhisperModel
from opencc import OpenCC
import srt as _srt # Import srt as _srt to avoid name conflict later with the module itself
from huggingface_hub import snapshot_download

ROOT = Path("/content/gdrive/MyDrive")
WHISPER_DIR = ROOT / "whisper"
WHISPER_DIR.mkdir(exist_ok=True, parents=True)
os.chdir(ROOT)
print(f"→ 當前工作目錄：{os.getcwd()}")

# [2/8] User Form Parameters (Unified)
#@markdown # Whisper Transcription & LLM Summary Pipeline

#@markdown ## Input & Transcription Settings
#@markdown **Input Source:** Google Drive file (relative to MyDrive) or video URL (YouTube/HTTP).
filename = "whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4"  #@param {type:"string"}
#@markdown **Download Option:** Check to save network source files to `MyDrive/whisper`.
save_video_to_google_drive = True  #@param {type:"boolean"}
#@markdown **Whisper Model Size:** Choose a model size. `large-v3` requires more GPU VRAM; `medium` is a good alternative if VRAM is limited.
model_size = "large-v3"  #@param ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
#@markdown **Language:** Select the language for transcription. "自動偵測" (Auto-detect) is usually sufficient.
language = "自動偵測"  #@param ["自動偵測", "中文", "英文"]
#@markdown **Denoising:** Apply CPU-based denoising to the audio before transcription. `afftdn` is recommended.
denoise_method = "afftdn (建議)"  #@param ["afftdn (建議)", "none"]
#@markdown **Text Post-processing (OpenCC):** Convert the transcribed text (SRT/TXT output) between Simplified and Traditional Chinese variants.
text_postprocess = "臺灣繁體中文（預設）"  #@param ["臺灣繁體中文（預設）","香港繁體中文","大陸簡體中文","關閉"]
#@markdown **YouTube Cookies (Optional):** Path to a Netscape-format cookies file (relative to MyDrive) for accessing age-restricted or member-only YouTube videos (e.g., `cookies/youtube.txt`).
youtube_cookies_txt_path = ""  #@param {type:"string"}

#@markdown ## Summarization Settings
#@markdown **SRT Input:** Path to the SRT file for summarization (relative to MyDrive or absolute). Leave empty to use the SRT generated by the transcription step above.
summary_srt_path = ""  #@param {type:"string"}
#@markdown **Topic Hint (Optional):** Provide a brief hint about the topic to guide the summarization process.
topic_hint = ""  #@param {type:"string"}

#@markdown ## Output Paths
#@markdown **Transcription Output Directory:** Directory where the generated SRT and TXT files will be saved (relative to MyDrive or absolute). Default is the input file's directory for local files, or `MyDrive/whisper` for network sources. This is determined automatically.
# (Note: filename's directory is used if local, otherwise WHISPER_DIR. This parameter is more of an indicator of the default output base.)
#@markdown **Summary Output Directory:** Directory where the final summary Markdown file will be saved (relative to MyDrive or absolute).
summary_output_dir = "/content/gdrive/MyDrive/whisper"  #@param {type:"string"}


language_code_map = {"自動偵測": None, "中文":"zh", "英文":"en"}
language_code = language_code_map[language]

# =========================================================
# Developer Options
# Advanced users can fine-tune parameters in this section.
# Modify only if you understand the impact.
# =========================================================
DEBUG_MODE = False # Set to True for more detailed logging

# --- Transcription Parameters ---
TRANSCRIPTION_BEAM_SIZE_PRIMARY = 3
TRANSCRIPTION_CHUNK_LENGTH_PRIMARY = 20
TRANSCRIPTION_BEAM_SIZE_FALLBACK = 1 # Used if primary fails
TRANSCRIPTION_CHUNK_LENGTH_FALLBACK = 15 # Used if primary fails

# --- Denoising Parameters ---
DENOISE_NOISE_FLOOR_DB = -25

# --- Filtering Parameters ---
FILTER_MIN_DURATION_SHORT = 1.5 # Minimum duration for short segments
FILTER_AVG_LOGPROB_THRESHOLD = -1.0 # Avg log probability threshold for short segments
FILTER_MIN_DURATION_SPEECH_PROB = 2.0 # Minimum duration for speech probability filtering
FILTER_NO_SPEECH_PROB_THRESHOLD = 0.6 # No speech probability threshold

# --- Summary Model Parameters ---
REPO_ID   = "unsloth/gpt-oss-20b-GGUF"   # GGUF Model Repository
GGUF_FILE = "gpt-oss-20b-Q4_K_M.gguf"    # Approx. 10.8GiB, T4 can run

# --- Summary Inference Parameters (Increase available generation space to avoid truncation) ---
ctx_window            = 8192
map_max_new_tokens    = 512   # Segment output: original 256 -> 512 (approx. 350-450 chars)
reduce_max_new_tokens = 1024  # Summary output: original 512 -> 1024 (approx. 700-900+ chars)
temperature           = 0.2
top_p                 = 0.9
repeat_penalty        = 1.05
# =========================================================
# End of Developer Options
# =========================================================


# [3/8] Install Dependencies
# Combine installation steps from both original cells
if DEBUG_MODE: print("[Install] faster-whisper / yt-dlp / soundfile / opencc / srt / huggingface_hub / llama-cpp-python ...")

def pip_install(pkgs, extra_args=None, env=None):
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if extra_args:
        cmd += extra_args
    cmd += pkgs
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True, env=env)

# Install common dependencies first
common_missing = []
try: import srt # check srt module directly after import as _srt
except ModuleNotFoundError: common_missing.append("srt>=3.5.3")
try: from huggingface_hub import snapshot_download # check huggingface_hub module directly
except ModuleNotFoundError: common_missing.append("huggingface_hub>=0.23.0")
try: import soundfile # check soundfile
except ModuleNotFoundError: common_missing.append("soundfile")
try: import opencc # check opencc
except ModuleNotFoundError: common_missing.append("opencc-python-reimplemented")

if common_missing:
    if DEBUG_MODE: print("→ Installing common missing packages:", ", ".join(common_missing))
    r = pip_install(common_missing)
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("基礎依賴安裝失敗，請重啟執行階段後重試。")

# Install faster-whisper and yt-dlp separately as they were in the first cell
try: from faster_whisper import WhisperModel # check faster_whisper
except ModuleNotFoundError:
    if DEBUG_MODE: print("→ Installing missing package: faster-whisper yt-dlp")
    r = pip_install(["faster-whisper", "yt-dlp"])
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("faster-whisper / yt-dlp 安裝失敗，請重啟執行階段後重試。")


def suggest_runtime_reset():
    print("\n🧹 建議動作（Colab）")
    print("1) 依序：『執行階段 Runtime』 → 『刪除執行階段/還原出廠設定 Factory reset runtime』")
    print("2) 重新執行本 Notebook（從掛載雲端硬碟那格開始）\n", flush=True)

def run_cmd(cmd:list, check=True):
    if DEBUG_MODE: print("  $", " ".join(cmd))
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if check and p.returncode != 0:
        raise RuntimeError(f"命令失敗：{' '.join(cmd)}")
    return p

def is_youtube_url(s:str)->bool:
    return isinstance(s, str) and ("youtu.be" in s or "youtube.com" in s)
def is_http_url(s:str)->bool:
    return isinstance(s, str) and s.lower().startswith("http")
def to_abs_mydrive(p:str)->Path:
    return (Path(p) if p.startswith("/") else (ROOT / p)).resolve()
def fmt_ts_srt(t:float)->str:
    h = int(t//3600); m = int((t%3600)//60); s = t - h*3600 - m*60
    return f"{h:02d}:{m:02d}:{int(s):02d},{int(round((s-int(s))*1000)):03d}"
def verify_wav_ok(path: Path)->bool:
    try:
        info = sf.info(str(path))
        return info.samplerate > 0 and info.channels in (1, 2)
    except Exception:
        return False

# OpenCC converter setup
def build_opencc_pipeline(choice:str):
    if choice.startswith("臺灣"):
        return [OpenCC('s2t'), OpenCC('t2tw')]
    if choice.startswith("香港"):
        return [OpenCC('s2t'), OpenCC('t2hk')]
    if choice.startswith("大陸"):
        return [OpenCC('t2s')]
    return []  # Disable

def apply_opencc(text:str, pipeline)->str:
    for cc in pipeline:
        text = cc.convert(text)
    return text

def ytdl(yturl:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting YouTube video ...")
    # Use sp.run instead of subprocess.run directly
    cmd = ["yt-dlp", "-f", "mp4", "-o", str(tmp / "%(title)s.%(ext)s")]
    if youtube_cookies_txt_path.strip():
        cookies_abs = to_abs_mydrive(youtube_cookies_txt_path.strip())
        if cookies_abs.exists():
            cmd += ["--cookies", str(cookies_abs)]
        else:
            if DEBUG_MODE: print(f"⚠️ 找不到 cookies 檔：{cookies_abs}（改為不帶 cookies）")
    cmd.append(yturl)
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if p.returncode != 0:
        if "Sign in to confirm" in (p.stdout or ""):
            print("\n❗YouTube 要求登入/驗證，請提供 cookies 或先自行下載到雲端硬碟。")
        print("🔄 若多次失敗，請刪除執行階段並重啟後重試。")
        suggest_runtime_reset()
        raise RuntimeError("yt-dlp 下載失敗")
    files = list(tmp.glob("*"))
    if not files:
        print("🔄 下載為空，建議刪除執行階段再重試。")
        suggest_runtime_reset()
        raise FileNotFoundError("YouTube 下載失敗：/tmp/dl 為空")
    f = files[0]
    if save_video_to_google_drive:
        shutil.copy2(f, WHISPER_DIR / f.name)
    return f

def http_dl(url:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting HTTP(S) video ...")
    run_cmd(["curl", "-L", "-o", str(out), url])
    if save_video_to_google_drive:
        shutil.copy2(out, WHISPER_DIR / out.name)
    return out

# Extract audio: ffmpeg -> 16k/mono WAV
def ffmpeg_extract_wav(in_path:Path, out_wav:Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_path),"-vn","-ac","1","-ar",str(sr),"-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 轉 WAV 失敗")

# CPU Denoising: ffmpeg afftdn
def ffmpeg_afftdn(in_wav: Path, out_wav: Path, noise_floor_db=DENOISE_NOISE_FLOOR_DB):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-af",f"afftdn=nf={noise_floor_db}",
           "-ac","1","-ar","16000","-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg afftdn 失敗")

# Safeguard: Repack WAV header if format is strange
def ffmpeg_repack_wav(in_wav: Path, out_wav: Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-acodec","pcm_s16le","-ac","1","-ar",str(sr),str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 重包 WAV 失敗")

# [4/8] Parse Source (Transcription) - Uses 'filename' and 'save_video_to_google_drive'
if DEBUG_MODE: print("[4/8] Parsing input source ...")
try:
    if is_youtube_url(filename):
        src_path = ytdl(filename); out_base_dir = WHISPER_DIR
    elif is_http_url(filename):
        src_path = http_dl(filename); out_base_dir = WHISPER_DIR
    else:
        src_path = to_abs_mydrive(filename)
        if not src_path.exists(): raise FileNotFoundError(f"找不到檔案：{src_path}")
        out_base_dir = src_path.parent
except Exception as e:
    print(f"\n⛔ 來源解析/下載失敗：{e}")
    print("🔄 請刪除執行階段並重新啟動後重跑。"); suggest_runtime_reset(); raise

print(f"→ 來源檔：{src_path}")
print(f"→ 輸出資料夾：{out_base_dir}")

# [5/8] Extract Audio & CPU Denoising (Transcription) - Uses 'denoise_method' and 'DENOISE_NOISE_FLOOR_DB'
AUDIO_16K = Path("/tmp/audio_16k.wav")
if DEBUG_MODE: print("[5/8] Extracting audio (ffmpeg → 16k/mono WAV) ...")
ffmpeg_extract_wav(src_path, AUDIO_16K, sr=16000)

if denoise_method.startswith("afftdn"):
    if DEBUG_MODE: print("[5.5/8] Denoising (ffmpeg afftdn, CPU) ...")
    DENOISED = Path("/tmp/audio_16k_denoised.wav")
    ffmpeg_afftdn(AUDIO_16K, DENOISED, noise_floor_db=DENOISE_NOISE_FLOOR_DB)
    denoised_audio = DENOISED if verify_wav_ok(DENOISED) else AUDIO_16K
else:
    denoised_audio = AUDIO_16K

if not verify_wav_ok(denoised_audio):
    if DEBUG_MODE: print("  - 音訊格式異常；嘗試重包 WAV ...")
    FIXED = Path("/tmp/audio_16k_fixed.wav")
    ffmpeg_repack_wav(denoised_audio, FIXED, sr=16000)
    denoised_audio = FIXED

if DEBUG_MODE: print(f"→ 最終輸入音訊：{denoised_audio}")

# [6/8] Load faster-whisper (GPU enforced) - Uses 'model_size'
if DEBUG_MODE: print("[6/8] Loading faster-whisper model (GPU) ...")
device = "cuda"  # Enforce GPU
model = None; last_err = None
for ctype in ["int8_float16", "float16", "int8"]:
    try:
        if DEBUG_MODE: print(f"  - Trying compute_type={ctype}")
        model = WhisperModel(model_size, device=device, compute_type=ctype)
        if DEBUG_MODE: print("  - Model loaded successfully")
        break
    except Exception as e:
        last_err = e
        if DEBUG_MODE: print(f"  - Load failed: {e}")
if model is None:
    print("\n⛔ GPU 模型載入失敗。請確認『變更執行階段類型』選了 GPU（T4/A100），或刪除執行階段後重試。")
    suggest_runtime_reset()
    raise RuntimeError(f"無法載入模型：{last_err}")

gc.collect()  # Clean up before transcription (safety)

# [7/8] Transcribe (GPU; real-time progress per segment) - Uses 'language_code', 'TRANSCRIPTION_BEAM_SIZE_PRIMARY', 'TRANSCRIPTION_CHUNK_LENGTH_PRIMARY', 'TRANSCRIPTION_BEAM_SIZE_FALLBACK', 'TRANSCRIPTION_CHUNK_LENGTH_FALLBACK'
if DEBUG_MODE: print(f"[7/8] Starting transcription (GPU: beam={TRANSCRIPTION_BEAM_SIZE_PRIMARY} / chunk={TRANSCRIPTION_CHUNK_LENGTH_PRIMARY}s / no VAD) ...")

def transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY):
    return model.transcribe(
        str(denoised_audio),
        task="transcribe",
        language=language_code,
        temperature=0.0,
        condition_on_previous_text=False,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        beam_size=_beam,
        chunk_length=_chunk,
        vad_filter=False,
        word_timestamps=False
    )

try:
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY)
except Exception as e:
    if DEBUG_MODE: print(f"  - First transcription failed: {e}\n    → Trying more conservative (beam={TRANSCRIPTION_BEAM_SIZE_FALLBACK}, chunk={TRANSCRIPTION_CHUNK_LENGTH_FALLBACK}) ...")
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_FALLBACK, _chunk=TRANSCRIPTION_CHUNK_LENGTH_FALLBACK)

# Display percentage based on total video duration
duration = float(getattr(info, "duration", 0.0) or 0.0)
if duration <= 0: duration = 1.0

segments = []
filtered = []

if DEBUG_MODE:
    print(f"  - Detected language: {getattr(info,'language','未知')} (p={getattr(info,'language_probability',0):.2f})")
    print(f"  - Audio length: {duration:.2f}s")

for s in seg_iter:
    pct = int(min(100, round((s.end / duration) * 100)))
    print(f"[{pct:3d}%] {fmt_ts_srt(s.start)} → {fmt_ts_srt(s.end)}  {s.text.strip()}", flush=True)
    segments.append(s)

    # Low confidence/high no-speech short segment filtering (no blacklist) - Uses FILTER_* parameters
    keep = True
    seg_dur = float(s.end - s.start)
    if seg_dur < FILTER_MIN_DURATION_SHORT and getattr(s, "avg_logprob", None) is not None and s.avg_logprob < FILTER_AVG_LOGPROB_THRESHOLD:
        keep = False
    if seg_dur < FILTER_MIN_DURATION_SPEECH_PROB and getattr(s, "no_speech_prob", None) is not None and s.no_speech_prob > FILTER_NO_SPEECH_PROB_THRESHOLD:
        keep = False
    if keep:
        filtered.append(s)

if DEBUG_MODE: print(f"  - Number of segments: Before filtering {len(segments)} → After filtering {len(filtered)}")

# ---- OpenCC Normalization (for output text) ---- - Uses 'text_postprocess'
pipeline = build_opencc_pipeline(text_postprocess)
def norm(txt: str) -> str:
    return apply_opencc(txt, pipeline) if pipeline else txt

# [8/8] Output (text after OpenCC) - Uses 'out_base_dir' (derived from 'filename')
print("[8/8] 輸出 SRT / TXT ...")
# Determine the output directory for transcription based on input type
# If input is a network source, output to WHISPER_DIR
# If input is a local file, output to the same directory as the input file
if is_youtube_url(filename) or is_http_url(filename):
    out_base_dir = WHISPER_DIR
else:
    src_path_abs = to_abs_mydrive(filename)
    out_base_dir = src_path_abs.parent

# Create the transcription output directory if it doesn't exist
out_dir = out_base_dir
out_dir.mkdir(exist_ok=True, parents=True)

# Determine the stem from the original source file path
stem = Path(src_path).stem
SRT = out_dir / f"{stem}.srt"
TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")


# ===== Summarization Logic Starts Here =====

# Determine the SRT input path for summarization - Uses 'summary_srt_path' and 'SRT' from transcription
if not summary_srt_path:
    # If summary_srt_path is empty, use the SRT generated by the transcription step
    summary_srt_path_abs = SRT
    if DEBUG_MODE: print(f"Using SRT from transcription step: {summary_srt_path_abs}")
else:
    # If summary_srt_path is provided, convert it to an absolute path relative to MyDrive
    summary_srt_path_abs = to_abs_mydrive(summary_srt_path)

# Ensure the input SRT file for summarization exists
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"


# ===== Summary 1/6) Check GPU and Install Dependencies (llama-cpp-python specific) =====
# llama-cpp-python installation logic - Keep this separate as it has specific CUDA requirements
if DEBUG_MODE: print("[Summary 1/6] Checking GPU and installing llama-cpp-python ...")

def detect_cuda_tag():
    try:
        out = sp.check_output(["nvidia-smi"], text=True)
        m = re.search(r"CUDA Version:\s*([\d.]+)", out)
        if not m:
            return "cu124"
        major, minor = [int(x) for x in m.group(1).split(".")[:2]]
        if major > 12 or (major == 12 and minor >= 5):
            return "cu125"
        return "cu124"
    except Exception:
        return "cu124"

cuda_tag = detect_cuda_tag()
if DEBUG_MODE: print(f"GPU 0: Detected CUDA version tag {cuda_tag}")

def try_import_llama():
    try:
        from llama_cpp import Llama
        return Llama
    except ModuleNotFoundError:
        return None

Llama = try_import_llama()
if Llama is None:
    # Keep your existing installation strategy: extra-index -> fallback to source compilation on failure
    candidates = [cuda_tag, "cu125", "cu124", "cu122", "cu121"]
    ok = False
    for tag in candidates:
        idx = f"https://abetlen.github.io/llama-cpp-python/whl/{tag}"
        if DEBUG_MODE: print(f"→ Attempting to install llama-cpp-python ({tag}) ...")
        r = pip_install(["llama-cpp-python"], extra_args=["--extra-index-url", idx])
        if r.returncode == 0:
            Llama = try_import_llama()
            if Llama is not None:
                ok = True
                break
        else:
            if DEBUG_MODE: print("  ✗ Installation failed (summary):", "\n".join(r.stdout.splitlines()[-5:]))
    if not ok:
        if DEBUG_MODE: print("→ Pre-compiled wheels not available, switching to 'source compilation (CUDA=ON)' ... (takes longer)")
        try:
            import ninja # noqa: F401 # Import ninja to check if installed
        except ModuleNotFoundError:
            if DEBUG_MODE: print("→ Installing missing package: ninja")
            r = pip_install(["ninja"])
            if r.returncode != 0:
                if DEBUG_MODE: print(r.stdout)
                raise RuntimeError("安裝 ninja 失敗。請重啟後重試。")
        env = os.environ.copy()
        env["CMAKE_ARGS"] = "-DGGML_CUDA=on -DLLAMA_CUBLAS=on"
        env["FORCE_CMAKE"] = "1"
        r = pip_install(["llama-cpp-python"], env=env)
        if r.returncode != 0:
            if DEBUG_MODE: print(r.stdout)
            raise RuntimeError("無法安裝 GPU 版 llama-cpp-python。")
        Llama = try_import_llama()

# ===== Summary 2/6) Read SRT (Summary) - Uses 'summary_srt_path_abs'
if DEBUG_MODE: print("[Summary 2/6] Reading SRT ...")
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"
with open(summary_srt_path_abs, "r", encoding="utf-8") as f:
    srt_text = f.read()
subs = list(_srt.parse(srt_text)) # Use _srt as srt module was imported as _srt
def td2s(td): return td.total_seconds()
segments = []
for it in subs:
    txt = it.content.strip()
    if not txt: continue
    segments.append((td2s(it.start), td2s(it.end), txt))
total_secs = (segments[-1][1] - segments[0][0]) if segments else 0
if DEBUG_MODE: print(f"→ Number of subtitle segments: {len(segments)}；Video length (est): {total_secs/60:.1f} minutes")


# ===== Summary 3/6) Download and Load GGUF Model (Summary) - Uses summary model parameters (REPO_ID, GGUF_FILE, ctx_window, etc.)
if DEBUG_MODE: print("[Summary 3/6] Loading GPT-OSS-20B (GGUF, CUDA) ...")
local_repo = snapshot_download(REPO_ID, allow_patterns=[GGUF_FILE])
gguf_path = str(Path(local_repo)/GGUF_FILE)

llm = Llama(
    model_path=gguf_path,
    n_ctx=ctx_window,
    n_gpu_layers=-1,
    seed=0,
    logits_all=False,
    verbose=True,          # Display the actual chat format used
    chat_format="chatml",  # Directly override the GGUF built-in Unsloth template to avoid outputting <|channel|> tags
)
if DEBUG_MODE: print("→ Model loaded successfully (GPU)")

# ===== Summary 4/6) Token-aware Segmentation (Summary) - Uses ctx_window, map_max_new_tokens, prompt_overhead
if DEBUG_MODE: print("[Summary 4/6] Generating segments (token-aware; single segment ≤ safety limit) ...")

def count_tokens_text(text: str) -> int:
    # Check if llm is initialized before using it
    if 'llm' not in locals() or llm is None:
         raise RuntimeError("LLM model is not loaded. Cannot count tokens.")
    return len(llm.tokenize(text.encode("utf-8")))

SYSTEM_INSTR = (
  "你是一個會議總結機器人。根據使用者提供的逐字稿（可能雜訊、重複、錯字），"
  "請去除雜訊與重複、嚴守事實、不腦補。遇到不明確資訊以「待補充／未明確」標註。"
  "輸出為 Markdown（繁體中文），不要輸出任何系統／思考標記。"
)

# — Segment Summary Prompt: More concise request, avoid verbosity and system language - Uses 'topic_hint'
MAP_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是逐字稿片段（非完整全文）：
{chunk}

請就此片段輸出「條列式重點摘要」（500–900 字，繁體中文），注意：
- 只寫最終內容，不要寫解題想法、不要出現任何系統提示或中英括號標記。
- 聚焦可驗證事實（時間、人物、任務、結論、未決事項、行動）。
- 結構：可用小標題＋項目符號，語句務必短、準確、無贅詞。
""")

# — Summary Prompt: Maintain your three-section output structure - Uses 'topic_hint'
REDUCE_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是所有片段的重點摘要彙整（仍可能有重疊）：
{maps}

請整合為一份會議筆記（Markdown，繁體）：
1) **整體提要**（3–6 句，避免冗言）
2) **章節要點（含時間脈絡）**：條列呈現，每點一行，可附粗略時間
3) **可執行重點**：具體待辦（每條以動詞開頭）
請只輸出最終筆記，不要出現系統或思考標記，不要加入未出現的新資訊。
""")

# Single segment token budget (reserve space for prompt and generation)
prompt_overhead = 700
chunk_target    = max(1024, min(3072, ctx_window - prompt_overhead - map_max_new_tokens))

chunks: List[Tuple[float,float,str]] = []
buf, t0, t1, cur = [], None, None, 0
for (s, e, txt) in segments:
    t = count_tokens_text(txt)
    if not buf:
        buf, t0, t1, cur = [txt], s, e, t
        continue
    if cur + t <= chunk_target:
        buf.append(txt); t1 = e; cur += t
    else:
        chunks.append((t0, t1, "\n".join(buf)))
        buf, t0, t1, cur = [txt], s, e, t
if buf:
    chunks.append((t0, t1, "\n".join(buf)))

if DEBUG_MODE: print(f"→ Generated {len(chunks)} segments (target ~{chunk_target} tokens per segment)")

# ===== Common: Streaming Tools (No regex cleaning; use correct stop sequence) - Uses temperature, top_p, repeat_penalty, map_max_new_tokens, reduce_max_new_tokens
def llm_stream(messages, max_tokens):
    # Check if llm is initialized before using it
    if 'llm' not in locals() or llm is None:
         raise RuntimeError("LLM model is not loaded. Cannot stream generation.")
    # ChatML messages end with <|im_end|>; use stop to cut off, preventing the closing tag from being written to the file
    gen = llm.create_chat_completion(
        messages=messages,
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repeat_penalty),
        max_tokens=int(max_tokens),
        stream=True,
        stop=["<|im_end|>"],  # Key: Prevent outputting the ending template
    )
    for ev in gen:
        # Compatible with different fields
        piece = ""
        try:
            piece = ev["choices"][0]["delta"].get("content", "")
        except Exception:
            piece = ev["choices"][0].get("text", "")
        if piece:
            yield piece

# ===== Summary 5/6) Segment Summary (map) - Uses map_max_new_tokens, ctx_window, prompt_overhead, topic_hint
if DEBUG_MODE: print("[Summary 5/6] Segment summarization (map) ...")
live = display(Markdown(""), display_id=True)
maps: List[str] = []

for i, (s, e, body) in enumerate(chunks, 1):
    pct = i / max(len(chunks),1) * 100
    sys.stdout.write(f"  - 處理分段 {i}/{len(chunks)}（~{pct:.1f}%）\n"); sys.stdout.flush()

    # Shrink to safe budget before sending (prevent prompt+segment from exceeding window and causing model to terminate early)
    budget_tokens = max(512, ctx_window - map_max_new_tokens - prompt_overhead)
    def shrink_to_budget(text: str, budget_tokens: int) -> str:
        cur = text
        for _ in range(6):
            if count_tokens_text(cur) <= budget_tokens:
                return cur
            keep = max(800, int(len(cur) * 0.85))
            cur = cur[:keep]
        return cur
    body2 = shrink_to_budget(body, budget_tokens)

    user_txt = MAP_USER_TMPL.format(topic=(topic_hint or "（無）"), chunk=body2)
    messages = [
        {"role": "system", "content": SYSTEM_INSTR},
        {"role": "user",   "content": user_txt},
    ]

    part_buf = [] # Reset part_buf for each segment
    for token in llm_stream(messages, map_max_new_tokens):
        part_buf.append(token)
        # Update live display and terminal character count periodically
        if len(part_buf) % 24 == 0:
            cur_txt = "".join(part_buf)
            live.update(Markdown(cur_txt))
            sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()
    cur_txt = "".join(part_buf)
    live.update(Markdown(cur_txt))
    sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()

    # Include the model's final output directly, no regex cleaning
    maps.append(cur_txt.strip())

if DEBUG_MODE: print("→ Segment summarization complete")

# ===== Summary 6/6) Consolidate (reduce) & Only write .md (Summary) - Uses summary_output_dir, summary_srt_path_abs, reduce_max_new_tokens, ctx_window, topic_hint
if DEBUG_MODE: print("[Summary 6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

# Determine and create the summary output directory
summary_output_dir_abs = to_abs_mydrive(summary_output_dir)
summary_output_dir_abs.mkdir(parents=True, exist_ok=True)

# Determine the summary output file path using the stem of the input SRT
out_md = summary_output_dir_abs / f"{Path(summary_srt_path_abs).stem}_summary.md"

with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
→ 當前工作目錄：/content/gdrive/MyDrive
→ 來源檔：/content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4
→ 輸出資料夾：/content/gdrive/MyDrive/whisper
[  1%] 00:00:00,000 → 00:00:29,980  Teksting av Nicolai Winther
[  1%] 00:00:20,000 → 00:00:49,980  Teksting av Nicolai Winther
[  2%] 00:00:40,000 → 00:01:09,980  Teksting av Nicolai Winther
[  2%] 00:01:00,000 → 00:01:29,980  Teksting av Nicolai Winther
[  2%] 00:01:20,000 → 00:01:49,980  Teksting av Nicolai Winther
[  3%] 00:01:40,000 → 00:02:09,980  Teksting av Nicolai Winther
[  3%] 00:02:00,000 → 00:02:29,980  Teksting av Nicolai Winther
[  4%] 00:02:20,000 → 00:02:49,980  Teksting av Nicolai Winther
[  5%] 00:03:00,000 → 00:03:29,980  Teksting av Nicolai Winther
[  5%] 00:03:20,000 → 00:03:49,980  Teksting av Nicolai Winther
[  6%] 00:03:40,000 → 00:04:09,980  Teksting av Nicolai Winther
[  6%] 00:04

Fetching 1 files:   0%|          | 0/1 [00:00<?, ?it/s]

llama_model_load_from_file_impl: using device CUDA0 (Tesla T4) - 14974 MiB free
llama_model_loader: loaded meta data with 37 key-value pairs and 459 tensors from /root/.cache/huggingface/hub/models--unsloth--gpt-oss-20b-GGUF/snapshots/c6cedd4259adbfe7e4d4d983a0400bf4cc38e7db/gpt-oss-20b-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = gpt-oss
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                               general.name str              = Gpt-Oss-20B
llama_model_loader: - kv   3:                           general.basename str              = Gpt-Oss-20B
llama_model_loader: - kv   4:                       general.quantized_by str              = Unsloth
llama_model_loader: - kv   5:                         general.size_la

RuntimeError: LLM model is not loaded. Cannot count tokens.

**Reasoning**:
The traceback shows that the `llm` object is still not initialized when `count_tokens_text` is called within the token-aware segmentation section (`[Summary 4/6]`). This means that the steps to install `llama-cpp-python` and load the LLM model (`[Summary 1/6]` and `[Summary 3/6]`) did not execute before the code reached `[Summary 4/6]`. Looking at the code, these steps are indeed placed after the transcription output section (`[8/8]`) and the determination of `summary_srt_path_abs`, but before the tokenization and summarization logic. The LLM model must be loaded before any function that uses `llm` is called. I need to move the `llama-cpp-python` installation and model loading steps to just before the summarization logic begins (`[Summary 2/6] Read SRT`) since the `count_tokens_text` function is used within the loop that processes the SRT segments. I will regenerate the full code with this corrected order. I will also keep the check within `count_tokens_text` and `llm_stream` just in case, although the reordering should prevent this error.



In [None]:
# -*- coding: utf-8 -*-
# =========================================================
# Whisper Automatic Subtitle Generation: GPU Transcription + CPU Denoising + OpenCC Post-processing (Traditional/Simplified Conversion)
# And LLM Summarization (GPT-OSS-20B / llama.cpp / CUDA)
# - Transcription: faster-whisper (CUDA, compute: int8_float16→float16→int8)
# - Denoising: ffmpeg afftdn (CPU)
# - Progress: Real-time printing of "current sentence + video total length percentage"
# - Network source download and output: MyDrive/whisper; Files in Drive: Output to the same folder
# - LLM Summary: llama.cpp + GPT-OSS-20B GGUF for summarizing transcription
# - Prompts "Delete runtime and restart" if download is blocked or abnormal
# =========================================================

# Restrict multithreading (more stable)
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# [1/8] Mount Google Drive
from google.colab import drive
try:
    drive.mount("/content/gdrive")
except:
    drive.mount("/content/gdrive", force_remount=True)

# Consolidated Imports
import sys, gc, shutil, datetime, subprocess as sp
from pathlib import Path
import re, math, time, importlib, textwrap
from typing import List, Tuple
from IPython.display import display, Markdown
import soundfile as sf
from faster_whisper import WhisperModel
from opencc import OpenCC
import srt as _srt # Import srt as _srt to avoid name conflict later with the module itself
from huggingface_hub import snapshot_download

ROOT = Path("/content/gdrive/MyDrive")
WHISPER_DIR = ROOT / "whisper"
WHISPER_DIR.mkdir(exist_ok=True, parents=True)
os.chdir(ROOT)
print(f"→ 當前工作目錄：{os.getcwd()}")

# [2/8] User Form Parameters (Unified)
#@markdown # Whisper Transcription & LLM Summary Pipeline

#@markdown ## Input & Transcription Settings
#@markdown **Input Source:** Google Drive file (relative to MyDrive) or video URL (YouTube/HTTP).
filename = "whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4"  #@param {type:"string"}
#@markdown **Download Option:** Check to save network source files to `MyDrive/whisper`.
save_video_to_google_drive = True  #@param {type:"boolean"}
#@markdown **Whisper Model Size:** Choose a model size. `large-v3` requires more GPU VRAM; `medium` is a good alternative if VRAM is limited.
model_size = "large-v3"  #@param ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
#@markdown **Language:** Select the language for transcription. "自動偵測" (Auto-detect) is usually sufficient.
language = "自動偵測"  #@param ["自動偵測", "中文", "英文"]
#@markdown **Denoising:** Apply CPU-based denoising to the audio before transcription. `afftdn` is recommended.
denoise_method = "afftdn (建議)"  #@param ["afftdn (建議)", "none"]
#@markdown **Text Post-processing (OpenCC):** Convert the transcribed text (SRT/TXT output) between Simplified and Traditional Chinese variants.
text_postprocess = "臺灣繁體中文（預設）"  #@param ["臺灣繁體中文（預設）","香港繁體中文","大陸簡體中文","關閉"]
#@markdown **YouTube Cookies (Optional):** Path to a Netscape-format cookies file (relative to MyDrive) for accessing age-restricted or member-only YouTube videos (e.g., `cookies/youtube.txt`).
youtube_cookies_txt_path = ""  #@param {type:"string"}

#@markdown ## Summarization Settings
#@markdown **SRT Input:** Path to the SRT file for summarization (relative to MyDrive or absolute). Leave empty to use the SRT generated by the transcription step above.
summary_srt_path = ""  #@param {type:"string"}
#@markdown **Topic Hint (Optional):** Provide a brief hint about the topic to guide the summarization process.
topic_hint = ""  #@param {type:"string"}

#@markdown ## Output Paths
#@markdown **Transcription Output Directory:** Directory where the generated SRT and TXT files will be saved (relative to MyDrive or absolute). Default is the input file's directory for local files, or `MyDrive/whisper` for network sources. This is determined automatically.
# (Note: filename's directory is used if local, otherwise WHISPER_DIR. This parameter is more of an indicator of the default output base.)
#@markdown **Summary Output Directory:** Directory where the final summary Markdown file will be saved (relative to MyDrive or absolute).
summary_output_dir = "/content/gdrive/MyDrive/whisper"  #@param {type:"string"}


language_code_map = {"自動偵測": None, "中文":"zh", "英文":"en"}
language_code = language_code_map[language]

# =========================================================
# Developer Options
# Advanced users can fine-tune parameters in this section.
# Modify only if you understand the impact.
# =========================================================
DEBUG_MODE = False # Set to True for more detailed logging

# --- Transcription Parameters ---
TRANSCRIPTION_BEAM_SIZE_PRIMARY = 3
TRANSCRIPTION_CHUNK_LENGTH_PRIMARY = 20
TRANSCRIPTION_BEAM_SIZE_FALLBACK = 1 # Used if primary fails
TRANSCRIPTION_CHUNK_LENGTH_FALLBACK = 15 # Used if primary fails

# --- Denoising Parameters ---
DENOISE_NOISE_FLOOR_DB = -25

# --- Filtering Parameters ---
FILTER_MIN_DURATION_SHORT = 1.5 # Minimum duration for short segments
FILTER_AVG_LOGPROB_THRESHOLD = -1.0 # Avg log probability threshold for short segments
FILTER_MIN_DURATION_SPEECH_PROB = 2.0 # Minimum duration for speech probability filtering
FILTER_NO_SPEECH_PROB_THRESHOLD = 0.6 # No speech probability threshold

# --- Summary Model Parameters ---
REPO_ID   = "unsloth/gpt-oss-20b-GGUF"   # GGUF Model Repository
GGUF_FILE = "gpt-oss-20b-Q4_K_M.gguf"    # Approx. 10.8GiB, T4 can run

# --- Summary Inference Parameters (Increase available generation space to avoid truncation) ---
ctx_window            = 8192
map_max_new_tokens    = 512   # Segment output: original 256 -> 512 (approx. 350-450 chars)
reduce_max_new_tokens = 1024  # Summary output: original 512 -> 1024 (approx. 700-900+ chars)
temperature           = 0.2
top_p                 = 0.9
repeat_penalty        = 1.05
# =========================================================
# End of Developer Options
# =========================================================


# [3/8] Install Dependencies
# Combine installation steps from both original cells
if DEBUG_MODE: print("[Install] faster-whisper / yt-dlp / soundfile / opencc / srt / huggingface_hub / llama-cpp-python ...")

def pip_install(pkgs, extra_args=None, env=None):
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if extra_args:
        cmd += extra_args
    cmd += pkgs
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True, env=env)

# Install common dependencies first
common_missing = []
try: import srt # check srt module directly after import as _srt
except ModuleNotFoundError: common_missing.append("srt>=3.5.3")
try: from huggingface_hub import snapshot_download # check huggingface_hub module directly
except ModuleNotFoundError: common_missing.append("huggingface_hub>=0.23.0")
try: import soundfile # check soundfile
except ModuleNotFoundError: common_missing.append("soundfile")
try: import opencc # check opencc
except ModuleNotFoundError: common_missing.append("opencc-python-reimplemented")

if common_missing:
    if DEBUG_MODE: print("→ Installing common missing packages:", ", ".join(common_missing))
    r = pip_install(common_missing)
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("基礎依賴安裝失敗，請重啟執行階段後重試。")

# Install faster-whisper and yt-dlp separately as they were in the first cell
try: from faster_whisper import WhisperModel # check faster_whisper
except ModuleNotFoundError:
    if DEBUG_MODE: print("→ Installing missing package: faster-whisper yt-dlp")
    r = pip_install(["faster-whisper", "yt-dlp"])
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("faster-whisper / yt-dlp 安裝失敗，請重啟執行階段後重試。")


def suggest_runtime_reset():
    print("\n🧹 建議動作（Colab）")
    print("1) 依序：『執行階段 Runtime』 → 『刪除執行階段/還原出廠設定 Factory reset runtime』")
    print("2) 重新執行本 Notebook（從掛載雲端硬碟那格開始）\n", flush=True)

def run_cmd(cmd:list, check=True):
    if DEBUG_MODE: print("  $", " ".join(cmd))
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if check and p.returncode != 0:
        raise RuntimeError(f"命令失敗：{' '.join(cmd)}")
    return p

def is_youtube_url(s:str)->bool:
    return isinstance(s, str) and ("youtu.be" in s or "youtube.com" in s)
def is_http_url(s:str)->bool:
    return isinstance(s, str) and s.lower().startswith("http")
def to_abs_mydrive(p:str)->Path:
    return (Path(p) if p.startswith("/") else (ROOT / p)).resolve()
def fmt_ts_srt(t:float)->str:
    h = int(t//3600); m = int((t%3600)//60); s = t - h*3600 - m*60
    return f"{h:02d}:{m:02d}:{int(s):02d},{int(round((s-int(s))*1000)):03d}"
def verify_wav_ok(path: Path)->bool:
    try:
        info = sf.info(str(path))
        return info.samplerate > 0 and info.channels in (1, 2)
    except Exception:
        return False

# OpenCC converter setup
def build_opencc_pipeline(choice:str):
    if choice.startswith("臺灣"):
        return [OpenCC('s2t'), OpenCC('t2tw')]
    if choice.startswith("香港"):
        return [OpenCC('s2t'), OpenCC('t2hk')]
    if choice.startswith("大陸"):
        return [OpenCC('t2s')]
    return []  # Disable

def apply_opencc(text:str, pipeline)->str:
    for cc in pipeline:
        text = cc.convert(text)
    return text

def ytdl(yturl:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting YouTube video ...")
    # Use sp.run instead of subprocess.run directly
    cmd = ["yt-dlp", "-f", "mp4", "-o", str(tmp / "%(title)s.%(ext)s")]
    if youtube_cookies_txt_path.strip():
        cookies_abs = to_abs_mydrive(youtube_cookies_txt_path.strip())
        if cookies_abs.exists():
            cmd += ["--cookies", str(cookies_abs)]
        else:
            if DEBUG_MODE: print(f"⚠️ 找不到 cookies 檔：{cookies_abs}（改為不帶 cookies）")
    cmd.append(yturl)
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if p.returncode != 0:
        if "Sign in to confirm" in (p.stdout or ""):
            print("\n❗YouTube 要求登入/驗證，請提供 cookies 或先自行下載到雲端硬碟。")
        print("🔄 若多次失敗，請刪除執行階段並重啟後重試。")
        suggest_runtime_reset()
        raise RuntimeError("yt-dlp 下載失敗")
    files = list(tmp.glob("*"))
    if not files:
        print("🔄 下載為空，建議刪除執行階段再重試。")
        suggest_runtime_reset()
        raise FileNotFoundError("YouTube 下載失敗：/tmp/dl 為空")
    f = files[0]
    if save_video_to_google_drive:
        shutil.copy2(f, WHISPER_DIR / f.name)
    return f

def http_dl(url:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting HTTP(S) video ...")
    run_cmd(["curl", "-L", "-o", str(out), url])
    if save_video_to_google_drive:
        shutil.copy2(out, WHISPER_DIR / out.name)
    return out

# Extract audio: ffmpeg -> 16k/mono WAV
def ffmpeg_extract_wav(in_path:Path, out_wav:Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_path),"-vn","-ac","1","-ar",str(sr),"-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 轉 WAV 失敗")

# CPU Denoising: ffmpeg afftdn
def ffmpeg_afftdn(in_wav: Path, out_wav: Path, noise_floor_db=DENOISE_NOISE_FLOOR_DB):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-af",f"afftdn=nf={noise_floor_db}",
           "-ac","1","-ar","16000","-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg afftdn 失敗")

# Safeguard: Repack WAV header if format is strange
def ffmpeg_repack_wav(in_wav: Path, out_wav: Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-acodec","pcm_s16le","-ac","1","-ar",str(sr),str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 重包 WAV 失敗")

# [4/8] Parse Source (Transcription) - Uses 'filename' and 'save_video_to_google_drive'
if DEBUG_MODE: print("[4/8] Parsing input source ...")
try:
    if is_youtube_url(filename):
        src_path = ytdl(filename); out_base_dir = WHISPER_DIR
    elif is_http_url(filename):
        src_path = http_dl(filename); out_base_dir = WHISPER_DIR
    else:
        src_path = to_abs_mydrive(filename)
        if not src_path.exists(): raise FileNotFoundError(f"找不到檔案：{src_path}")
        out_base_dir = src_path.parent
except Exception as e:
    print(f"\n⛔ 來源解析/下載失敗：{e}")
    print("🔄 請刪除執行階段並重新啟動後重跑。"); suggest_runtime_reset(); raise

print(f"→ 來源檔：{src_path}")
print(f"→ 輸出資料夾：{out_base_dir}")

# [5/8] Extract Audio & CPU Denoising (Transcription) - Uses 'denoise_method' and 'DENOISE_NOISE_FLOOR_DB'
AUDIO_16K = Path("/tmp/audio_16k.wav")
if DEBUG_MODE: print("[5/8] Extracting audio (ffmpeg → 16k/mono WAV) ...")
ffmpeg_extract_wav(src_path, AUDIO_16K, sr=16000)

if denoise_method.startswith("afftdn"):
    if DEBUG_MODE: print("[5.5/8] Denoising (ffmpeg afftdn, CPU) ...")
    DENOISED = Path("/tmp/audio_16k_denoised.wav")
    ffmpeg_afftdn(AUDIO_16K, DENOISED, noise_floor_db=DENOISE_NOISE_FLOOR_DB)
    denoised_audio = DENOISED if verify_wav_ok(DENOISED) else AUDIO_16K
else:
    denoised_audio = AUDIO_16K

if not verify_wav_ok(denoised_audio):
    if DEBUG_MODE: print("  - 音訊格式異常；嘗試重包 WAV ...")
    FIXED = Path("/tmp/audio_16k_fixed.wav")
    ffmpeg_repack_wav(denoised_audio, FIXED, sr=16000)
    denoised_audio = FIXED

if DEBUG_MODE: print(f"→ 最終輸入音訊：{denoised_audio}")

# [6/8] Load faster-whisper (GPU enforced) - Uses 'model_size'
if DEBUG_MODE: print("[6/8] Loading faster-whisper model (GPU) ...")
device = "cuda"  # Enforce GPU
model = None; last_err = None
for ctype in ["int8_float16", "float16", "int8"]:
    try:
        if DEBUG_MODE: print(f"  - Trying compute_type={ctype}")
        model = WhisperModel(model_size, device=device, compute_type=ctype)
        if DEBUG_MODE: print("  - Model loaded successfully")
        break
    except Exception as e:
        last_err = e
        if DEBUG_MODE: print(f"  - Load failed: {e}")
if model is None:
    print("\n⛔ GPU 模型載入失敗。請確認『變更執行階段類型』選了 GPU（T4/A100），或刪除執行階段後重試。")
    suggest_runtime_reset()
    raise RuntimeError(f"無法載入模型：{last_err}")

gc.collect()  # Clean up before transcription (safety)

# [7/8] Transcribe (GPU; real-time progress per segment) - Uses 'language_code', 'TRANSCRIPTION_BEAM_SIZE_PRIMARY', 'TRANSCRIPTION_CHUNK_LENGTH_PRIMARY', 'TRANSCRIPTION_BEAM_SIZE_FALLBACK', 'TRANSCRIPTION_CHUNK_LENGTH_FALLBACK'
if DEBUG_MODE: print(f"[7/8] Starting transcription (GPU: beam={TRANSCRIPTION_BEAM_SIZE_PRIMARY} / chunk={TRANSCRIPTION_CHUNK_LENGTH_PRIMARY}s / no VAD) ...")

def transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY):
    return model.transcribe(
        str(denoised_audio),
        task="transcribe",
        language=language_code,
        temperature=0.0,
        condition_on_previous_text=False,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        beam_size=_beam,
        chunk_length=_chunk,
        vad_filter=False,
        word_timestamps=False
    )

try:
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY)
except Exception as e:
    if DEBUG_MODE: print(f"  - First transcription failed: {e}\n    → Trying more conservative (beam={TRANSCRIPTION_BEAM_SIZE_FALLBACK}, chunk={TRANSCRIPTION_CHUNK_LENGTH_FALLBACK}) ...")
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_FALLBACK, _chunk=TRANSCRIPTION_CHUNK_LENGTH_FALLBACK)

# Display percentage based on total video duration
duration = float(getattr(info, "duration", 0.0) or 0.0)
if duration <= 0: duration = 1.0

segments = []
filtered = []

if DEBUG_MODE:
    print(f"  - Detected language: {getattr(info,'language','未知')} (p={getattr(info,'language_probability',0):.2f})")
    print(f"  - Audio length: {duration:.2f}s")

for s in seg_iter:
    pct = int(min(100, round((s.end / duration) * 100)))
    print(f"[{pct:3d}%] {fmt_ts_srt(s.start)} → {fmt_ts_srt(s.end)}  {s.text.strip()}", flush=True)
    segments.append(s)

    # Low confidence/high no-speech short segment filtering (no blacklist) - Uses FILTER_* parameters
    keep = True
    seg_dur = float(s.end - s.start)
    if seg_dur < FILTER_MIN_DURATION_SHORT and getattr(s, "avg_logprob", None) is not None and s.avg_logprob < FILTER_AVG_LOGPROB_THRESHOLD:
        keep = False
    if seg_dur < FILTER_MIN_DURATION_SPEECH_PROB and getattr(s, "no_speech_prob", None) is not None and s.no_speech_prob > FILTER_NO_SPEECH_THRESHOLD:
        keep = False
    if keep:
        filtered.append(s)

if DEBUG_MODE: print(f"  - Number of segments: Before filtering {len(segments)} → After filtering {len(filtered)}")

# ---- OpenCC Normalization (for output text) ---- - Uses 'text_postprocess'
pipeline = build_opencc_pipeline(text_postprocess)
def norm(txt: str) -> str:
    return apply_opencc(txt, pipeline) if pipeline else txt

# [8/8] Output (text after OpenCC) - Uses 'out_base_dir' (derived from 'filename')
print("[8/8] 輸出 SRT / TXT ...")
# Determine the output directory for transcription based on input type
# If input is a network source, output to WHISPER_DIR
# If input is a local file, output to the same directory as the input file
if is_youtube_url(filename) or is_http_url(filename):
    out_base_dir = WHISPER_DIR
else:
    src_path_abs = to_abs_mydrive(filename)
    out_base_dir = src_path_abs.parent

# Create the transcription output directory if it doesn't exist
out_dir = out_base_dir
out_dir.mkdir(exist_ok=True, parents=True)

# Determine the stem from the original source file path
stem = Path(src_path).stem
SRT = out_dir / f"{stem}.srt"
TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")


# ===== Summarization Logic Starts Here =====

# Determine the SRT input path for summarization - Uses 'summary_srt_path' and 'SRT' from transcription
if not summary_srt_path:
    # If summary_srt_path is empty, use the SRT generated by the transcription step
    summary_srt_path_abs = SRT
    if DEBUG_MODE: print(f"Using SRT from transcription step: {summary_srt_path_abs}")
else:
    # If summary_srt_path is provided, convert it to an absolute path relative to MyDrive
    summary_srt_path_abs = to_abs_mydrive(summary_srt_path)

# Ensure the input SRT file for summarization exists
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"


# ===== Summary 1/6) Check GPU and Install Dependencies (llama-cpp-python specific) =====
# llama-cpp-python installation logic - Keep this separate as it has specific CUDA requirements
# Moved this section to just before reading the SRT for summarization
if DEBUG_MODE: print("[Summary 1/6] Checking GPU and installing llama-cpp-python ...")

def detect_cuda_tag():
    try:
        out = sp.check_output(["nvidia-smi"], text=True)
        m = re.search(r"CUDA Version:\s*([\d.]+)", out)
        if not m:
            return "cu124"
        major, minor = [int(x) for x in m.group(1).split(".")[:2]]
        if major > 12 or (major == 12 and minor >= 5):
            return "cu125"
        return "cu124"
    except Exception:
        return "cu124"

cuda_tag = detect_cuda_tag()
if DEBUG_MODE: print(f"GPU 0: Detected CUDA version tag {cuda_tag}")

def try_import_llama():
    try:
        from llama_cpp import Llama
        return Llama
    except ModuleNotFoundError:
        return None

Llama = try_import_llama()
if Llama is None:
    # Keep your existing installation strategy: extra-index -> fallback to source compilation on failure
    candidates = [cuda_tag, "cu125", "cu124", "cu122", "cu121"]
    ok = False
    for tag in candidates:
        idx = f"https://abetlen.github.io/llama-cpp-python/whl/{tag}"
        if DEBUG_MODE: print(f"→ Attempting to install llama-cpp-python ({tag}) ...")
        r = pip_install(["llama-cpp-python"], extra_args=["--extra-index-url", idx])
        if r.returncode == 0:
            Llama = try_import_llama()
            if Llama is not None:
                ok = True
                break
        else:
            if DEBUG_MODE: print("  ✗ Installation failed (summary):", "\n".join(r.stdout.splitlines()[-5:]))
    if not ok:
        if DEBUG_MODE: print("→ Pre-compiled wheels not available, switching to 'source compilation (CUDA=ON)' ... (takes longer)")
        try:
            import ninja # noqa: F401 # Import ninja to check if installed
        except ModuleNotFoundError:
            if DEBUG_MODE: print("→ Installing missing package: ninja")
            r = pip_install(["ninja"])
            if r.returncode != 0:
                if DEBUG_MODE: print(r.stdout)
                raise RuntimeError("安裝 ninja 失敗。請重啟後重試。")
        env = os.environ.copy()
        env["CMAKE_ARGS"] = "-DGGML_CUDA=on -DLLAMA_CUBLAS=on"
        env["FORCE_CMAKE"] = "1"
        r = pip_install(["llama-cpp-python"], env=env)
        if r.returncode != 0:
            if DEBUG_MODE: print(r.stdout)
            raise RuntimeError("無法安裝 GPU 版 llama-cpp-python。")
        Llama = try_import_llama()


# ===== Summary 2/6) Read SRT (Summary) - Uses 'summary_srt_path_abs'
if DEBUG_MODE: print("[Summary 2/6] Reading SRT ...")
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"
with open(summary_srt_path_abs, "r", encoding="utf-8") as f:
    srt_text = f.read()
subs = list(_srt.parse(srt_text)) # Use _srt as srt module was imported as _srt
def td2s(td): return td.total_seconds()
segments = []
for it in subs:
    txt = it.content.strip()
    if not txt: continue
    segments.append((td2s(it.start), td2s(it.end), txt))
total_secs = (segments[-1][1] - segments[0][0]) if segments else 0
if DEBUG_MODE: print(f"→ Number of subtitle segments: {len(segments)}；Video length (est): {total_secs/60:.1f} minutes")


# ===== Summary 3/6) Download and Load GGUF Model (Summary) - Uses summary model parameters (REPO_ID, GGUF_FILE, ctx_window, etc.)
# Moved this section to just after installing llama-cpp-python
if DEBUG_MODE: print("[Summary 3/6] Loading GPT-OSS-20B (GGUF, CUDA) ...")
local_repo = snapshot_download(REPO_ID, allow_patterns=[GGUF_FILE])
gguf_path = str(Path(local_repo)/GGUF_FILE)

llm = Llama(
    model_path=gguf_path,
    n_ctx=ctx_window,
    n_gpu_layers=-1,
    seed=0,
    logits_all=False,
    verbose=True,          # Display the actual chat format used
    chat_format="chatml",  # Directly override the GGUF built-in Unsloth template to avoid outputting <|channel|> tags
)
if DEBUG_MODE: print("→ Model loaded successfully (GPU)")


# ===== Summary 4/6) Token-aware Segmentation (Summary) - Uses ctx_window, map_max_new_tokens, prompt_overhead
if DEBUG_MODE: print("[Summary 4/6] Generating segments (token-aware; single segment ≤ safety limit) ...")

def count_tokens_text(text: str) -> int:
    # Check if llm is initialized before using it
    if 'llm' not in locals() or llm is None:
         raise RuntimeError("LLM model is not loaded. Cannot count tokens.")
    return len(llm.tokenize(text.encode("utf-8")))

SYSTEM_INSTR = (
  "你是一個會議總結機器人。根據使用者提供的逐字稿（可能雜訊、重複、錯字），"
  "請去除雜訊與重複、嚴守事實、不腦補。遇到不明確資訊以「待補充／未明確」標註。"
  "輸出為 Markdown（繁體中文），不要輸出任何系統／思考標記。"
)

# — Segment Summary Prompt: More concise request, avoid verbosity and system language - Uses 'topic_hint'
MAP_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是逐字稿片段（非完整全文）：
{chunk}

請就此片段輸出「條列式重點摘要」（500–900 字，繁體中文），注意：
- 只寫最終內容，不要寫解題想法、不要出現任何系統提示或中英括號標記。
- 聚焦可驗證事實（時間、人物、任務、結論、未決事項、行動）。
- 結構：可用小標題＋項目符號，語句務必短、準確、無贅詞。
""")

# — Summary Prompt: Maintain your three-section output structure - Uses 'topic_hint'
REDUCE_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是所有片段的重點摘要彙整（仍可能有重疊）：
{maps}

請整合為一份會議筆記（Markdown，繁體）：
1) **整體提要**（3–6 句，避免冗言）
2) **章節要點（含時間脈絡）**：條列呈現，每點一行，可附粗略時間
3) **可執行重點**：具體待辦（每條以動詞開頭）
請只輸出最終筆記，不要出現系統或思考標記，不要加入未出現的新資訊。
""")

# Single segment token budget (reserve space for prompt and generation)
prompt_overhead = 700
chunk_target    = max(1024, min(3072, ctx_window - prompt_overhead - map_max_new_tokens))

chunks: List[Tuple[float,float,str]] = []
buf, t0, t1, cur = [], None, None, 0
for (s, e, txt) in segments:
    t = count_tokens_text(txt)
    if not buf:
        buf, t0, t1, cur = [txt], s, e, t
        continue
    if cur + t <= chunk_target:
        buf.append(txt); t1 = e; cur += t
    else:
        chunks.append((t0, t1, "\n".join(buf)))
        buf, t0, t1, cur = [txt], s, e, t
if buf:
    chunks.append((t0, t1, "\n".join(buf)))

if DEBUG_MODE: print(f"→ Generated {len(chunks)} segments (target ~{chunk_target} tokens per segment)")

# ===== Common: Streaming Tools (No regex cleaning; use correct stop sequence) - Uses temperature, top_p, repeat_penalty, map_max_new_tokens, reduce_max_new_tokens
def llm_stream(messages, max_tokens):
    # Check if llm is initialized before using it
    if 'llm' not in locals() or llm is None:
         raise RuntimeError("LLM model is not loaded. Cannot stream generation.")
    # ChatML messages end with <|im_end|>; use stop to cut off, preventing the closing tag from being written to the file
    gen = llm.create_chat_completion(
        messages=messages,
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repeat_penalty),
        max_tokens=int(max_tokens),
        stream=True,
        stop=["<|im_end|>"],  # Key: Prevent outputting the ending template
    )
    for ev in gen:
        # Compatible with different fields
        piece = ""
        try:
            piece = ev["choices"][0]["delta"].get("content", "")
        except Exception:
            piece = ev["choices"][0].get("text", "")
        if piece:
            yield piece

# ===== Summary 5/6) Segment Summary (map) - Uses map_max_new_tokens, ctx_window, prompt_overhead, topic_hint
if DEBUG_MODE: print("[Summary 5/6] Segment summarization (map) ...")
live = display(Markdown(""), display_id=True)
maps: List[str] = []

for i, (s, e, body) in enumerate(chunks, 1):
    pct = i / max(len(chunks),1) * 100
    sys.stdout.write(f"  - 處理分段 {i}/{len(chunks)}（~{pct:.1f}%）\n"); sys.stdout.flush()

    # Shrink to safe budget before sending (prevent prompt+segment from exceeding window and causing model to terminate early)
    budget_tokens = max(512, ctx_window - map_max_new_tokens - prompt_overhead)
    def shrink_to_budget(text: str, budget_tokens: int) -> str:
        cur = text
        for _ in range(6):
            if count_tokens_text(cur) <= budget_tokens:
                return cur
            keep = max(800, int(len(cur) * 0.85))
            cur = cur[:keep]
        return cur
    body2 = shrink_to_budget(body, budget_tokens)

    user_txt = MAP_USER_TMPL.format(topic=(topic_hint or "（無）"), chunk=body2)
    messages = [
        {"role": "system", "content": SYSTEM_INSTR},
        {"role": "user",   "content": user_txt},
    ]

    part_buf = [] # Reset part_buf for each segment
    for token in llm_stream(messages, map_max_new_tokens):
        part_buf.append(token)
        # Update live display and terminal character count periodically
        if len(part_buf) % 24 == 0:
            cur_txt = "".join(part_buf)
            live.update(Markdown(cur_txt))
            sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()
    cur_txt = "".join(part_buf)
    live.update(Markdown(cur_txt))
    sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()

    # Include the model's final output directly, no regex cleaning
    maps.append(cur_txt.strip())

if DEBUG_MODE: print("→ Segment summarization complete")

# ===== Summary 6/6) Consolidate (reduce) & Only write .md (Summary) - Uses summary_output_dir, summary_srt_path_abs, reduce_max_new_tokens, ctx_window, topic_hint
if DEBUG_MODE: print("[Summary 6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

# Determine and create the summary output directory
summary_output_dir_abs = to_abs_mydrive(summary_output_dir)
summary_output_dir_abs.mkdir(parents=True, exist_ok=True)

# Determine the summary output file path using the stem of the input SRT
out_md = summary_output_dir_abs / f"{Path(summary_srt_path_abs).stem}_summary.md"

with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
→ 當前工作目錄：/content/gdrive/MyDrive
→ 來源檔：/content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4
→ 輸出資料夾：/content/gdrive/MyDrive/whisper
[  1%] 00:00:00,000 → 00:00:29,980  Teksting av Nicolai Winther
[  1%] 00:00:20,000 → 00:00:49,980  Teksting av Nicolai Winther
[  2%] 00:00:40,000 → 00:01:09,980  Teksting av Nicolai Winther
[  2%] 00:01:00,000 → 00:01:29,980  Teksting av Nicolai Winther
[  2%] 00:01:20,000 → 00:01:49,980  Teksting av Nicolai Winther
[  3%] 00:01:40,000 → 00:02:09,980  Teksting av Nicolai Winther
[  3%] 00:02:00,000 → 00:02:29,980  Teksting av Nicolai Winther
[  4%] 00:02:20,000 → 00:02:49,980  Teksting av Nicolai Winther
[  5%] 00:03:00,000 → 00:03:29,980  Teksting av Nicolai Winther
[  5%] 00:03:20,000 → 00:03:49,980  Teksting av Nicolai Winther
[  6%] 00:03:40,000 → 00:04:09,980  Teksting av Nicolai Winther
[  6%] 00:04

NameError: name 'FILTER_NO_SPEECH_THRESHOLD' is not defined

**Reasoning**:
The traceback indicates a `NameError` because `FILTER_NO_SPEECH_THRESHOLD` is not defined when the transcription filtering logic is executed. Looking at the code, the "Developer Options" section where these filtering parameters are defined is placed *after* the transcription code block. This is incorrect; all variables must be defined *before* they are used. I need to move the "Developer Options" section, which contains all the filter and transcription parameters, to be immediately after the user form parameters section (`[2/8]`). I will regenerate the full code with this corrected placement.



In [None]:
# -*- coding: utf-8 -*-
# =========================================================
# Whisper Automatic Subtitle Generation: GPU Transcription + CPU Denoising + OpenCC Post-processing (Traditional/Simplified Conversion)
# And LLM Summarization (GPT-OSS-20B / llama.cpp / CUDA)
# - Transcription: faster-whisper (CUDA, compute: int8_float16→float16→int8)
# - Denoising: ffmpeg afftdn (CPU)
# - Progress: Real-time printing of "current sentence + video total length percentage"
# - Network source download and output: MyDrive/whisper; Files in Drive: Output to the same folder
# - LLM Summary: llama.cpp + GPT-OSS-20B GGUF for summarizing transcription
# - Prompts "Delete runtime and restart" if download is blocked or abnormal
# =========================================================

# Restrict multithreading (more stable)
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# [1/8] Mount Google Drive
from google.colab import drive
try:
    drive.mount("/content/gdrive")
except:
    drive.mount("/content/gdrive", force_remount=True)

# Consolidated Imports
import sys, gc, shutil, datetime, subprocess as sp
from pathlib import Path
import re, math, time, importlib, textwrap
from typing import List, Tuple
from IPython.display import display, Markdown
import soundfile as sf
from faster_whisper import WhisperModel
from opencc import OpenCC
import srt as _srt # Import srt as _srt to avoid name conflict later with the module itself
from huggingface_hub import snapshot_download

ROOT = Path("/content/gdrive/MyDrive")
WHISPER_DIR = ROOT / "whisper"
WHISPER_DIR.mkdir(exist_ok=True, parents=True)
os.chdir(ROOT)
print(f"→ 當前工作目錄：{os.getcwd()}")

# [2/8] User Form Parameters (Unified)
#@markdown # Whisper Transcription & LLM Summary Pipeline

#@markdown ## Input & Transcription Settings
#@markdown **Input Source:** Google Drive file (relative to MyDrive) or video URL (YouTube/HTTP).
filename = "whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4"  #@param {type:"string"}
#@markdown **Download Option:** Check to save network source files to `MyDrive/whisper`.
save_video_to_google_drive = True  #@param {type:"boolean"}
#@markdown **Whisper Model Size:** Choose a model size. `large-v3` requires more GPU VRAM; `medium` is a good alternative if VRAM is limited.
model_size = "large-v3"  #@param ["tiny", "base", "small", "medium", "large-v2", "large-v3"]
#@markdown **Language:** Select the language for transcription. "自動偵測" (Auto-detect) is usually sufficient.
language = "自動偵測"  #@param ["自動偵測", "中文", "英文"]
#@markdown **Denoising:** Apply CPU-based denoising to the audio before transcription. `afftdn` is recommended.
denoise_method = "afftdn (建議)"  #@param ["afftdn (建議)", "none"]
#@markdown **Text Post-processing (OpenCC):** Convert the transcribed text (SRT/TXT output) between Simplified and Traditional Chinese variants.
text_postprocess = "臺灣繁體中文（預設）"  #@param ["臺灣繁體中文（預設）","香港繁體中文","大陸簡體中文","關閉"]
#@markdown **YouTube Cookies (Optional):** Path to a Netscape-format cookies file (relative to MyDrive) for accessing age-restricted or member-only YouTube videos (e.g., `cookies/youtube.txt`).
youtube_cookies_txt_path = ""  #@param {type:"string"}

#@markdown ## Summarization Settings
#@markdown **SRT Input:** Path to the SRT file for summarization (relative to MyDrive or absolute). Leave empty to use the SRT generated by the transcription step above.
summary_srt_path = ""  #@param {type:"string"}
#@markdown **Topic Hint (Optional):** Provide a brief hint about the topic to guide the summarization process.
topic_hint = ""  #@param {type:"string"}

#@markdown ## Output Paths
#@markdown **Transcription Output Directory:** Directory where the generated SRT and TXT files will be saved (relative to MyDrive or absolute). Default is the input file's directory for local files, or `MyDrive/whisper` for network sources. This is determined automatically.
# (Note: filename's directory is used if local, otherwise WHISPER_DIR. This parameter is more of an indicator of the default output base.)
#@markdown **Summary Output Directory:** Directory where the final summary Markdown file will be saved (relative to MyDrive or absolute).
summary_output_dir = "/content/gdrive/MyDrive/whisper"  #@param {type:"string"}


language_code_map = {"自動偵測": None, "中文":"zh", "英文":"en"}
language_code = language_code_map[language]

# =========================================================
# Developer Options
# Advanced users can fine-tune parameters in this section.
# Modify only if you understand the impact.
# =========================================================
DEBUG_MODE = False # Set to True for more detailed logging

# --- Transcription Parameters ---
TRANSCRIPTION_BEAM_SIZE_PRIMARY = 3
TRANSCRIPTION_CHUNK_LENGTH_PRIMARY = 20
TRANSCRIPTION_BEAM_SIZE_FALLBACK = 1 # Used if primary fails
TRANSCRIPTION_CHUNK_LENGTH_FALLBACK = 15 # Used if primary fails

# --- Denoising Parameters ---
DENOISE_NOISE_FLOOR_DB = -25

# --- Filtering Parameters ---
FILTER_MIN_DURATION_SHORT = 1.5 # Minimum duration for short segments
FILTER_AVG_LOGPROB_THRESHOLD = -1.0 # Avg log probability threshold for short segments
FILTER_MIN_DURATION_SPEECH_PROB = 2.0 # Minimum duration for speech probability filtering
FILTER_NO_SPEECH_PROB_THRESHOLD = 0.6 # No speech probability threshold

# --- Summary Model Parameters ---
REPO_ID   = "unsloth/gpt-oss-20b-GGUF"   # GGUF Model Repository
GGUF_FILE = "gpt-oss-20b-Q4_K_M.gguf"    # Approx. 10.8GiB, T4 can run

# --- Summary Inference Parameters (Increase available generation space to avoid truncation) ---
ctx_window            = 8192
map_max_new_tokens    = 512   # Segment output: original 256 -> 512 (approx. 350-450 chars)
reduce_max_new_tokens = 1024  # Summary output: original 512 -> 1024 (approx. 700-900+ chars)
temperature           = 0.2
top_p                 = 0.9
repeat_penalty        = 1.05
# =========================================================
# End of Developer Options
# =========================================================


# [3/8] Install Dependencies
# Combine installation steps from both original cells
if DEBUG_MODE: print("[Install] faster-whisper / yt-dlp / soundfile / opencc / srt / huggingface_hub / llama-cpp-python ...")

def pip_install(pkgs, extra_args=None, env=None):
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if extra_args:
        cmd += extra_args
    cmd += pkgs
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True, env=env)

# Install common dependencies first
common_missing = []
try: import srt # check srt module directly after import as _srt
except ModuleNotFoundError: common_missing.append("srt>=3.5.3")
try: from huggingface_hub import snapshot_download # check huggingface_hub module directly
except ModuleNotFoundError: common_missing.append("huggingface_hub>=0.23.0")
try: import soundfile # check soundfile
except ModuleNotFoundError: common_missing.append("soundfile")
try: import opencc # check opencc
except ModuleNotFoundError: common_missing.append("opencc-python-reimplemented")

if common_missing:
    if DEBUG_MODE: print("→ Installing common missing packages:", ", ".join(common_missing))
    r = pip_install(common_missing)
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("基礎依賴安裝失敗，請重啟執行階段後重試。")

# Install faster-whisper and yt-dlp separately as they were in the first cell
try: from faster_whisper import WhisperModel # check faster_whisper
except ModuleNotFoundError:
    if DEBUG_MODE: print("→ Installing missing package: faster-whisper yt-dlp")
    r = pip_install(["faster-whisper", "yt-dlp"])
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("faster-whisper / yt-dlp 安裝失敗，請重啟執行階段後重試。")


def suggest_runtime_reset():
    print("\n🧹 建議動作（Colab）")
    print("1) 依序：『執行階段 Runtime』 → 『刪除執行階段/還原出廠設定 Factory reset runtime』")
    print("2) 重新執行本 Notebook（從掛載雲端硬碟那格開始）\n", flush=True)

def run_cmd(cmd:list, check=True):
    if DEBUG_MODE: print("  $", " ".join(cmd))
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if check and p.returncode != 0:
        raise RuntimeError(f"命令失敗：{' '.join(cmd)}")
    return p

def is_youtube_url(s:str)->bool:
    return isinstance(s, str) and ("youtu.be" in s or "youtube.com" in s)
def is_http_url(s:str)->bool:
    return isinstance(s, str) and s.lower().startswith("http")
def to_abs_mydrive(p:str)->Path:
    return (Path(p) if p.startswith("/") else (ROOT / p)).resolve()
def fmt_ts_srt(t:float)->str:
    h = int(t//3600); m = int((t%3600)//60); s = t - h*3600 - m*60
    return f"{h:02d}:{m:02d}:{int(s):02d},{int(round((s-int(s))*1000)):03d}"
def verify_wav_ok(path: Path)->bool:
    try:
        info = sf.info(str(path))
        return info.samplerate > 0 and info.channels in (1, 2)
    except Exception:
        return False

# OpenCC converter setup
def build_opencc_pipeline(choice:str):
    if choice.startswith("臺灣"):
        return [OpenCC('s2t'), OpenCC('t2tw')]
    if choice.startswith("香港"):
        return [OpenCC('s2t'), OpenCC('t2hk')]
    if choice.startswith("大陸"):
        return [OpenCC('t2s')]
    return []  # Disable

def apply_opencc(text:str, pipeline)->str:
    for cc in pipeline:
        text = cc.convert(text)
    return text

def ytdl(yturl:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting YouTube video ...")
    # Use sp.run instead of subprocess.run directly
    cmd = ["yt-dlp", "-f", "mp4", "-o", str(tmp / "%(title)s.%(ext)s")]
    if youtube_cookies_txt_path.strip():
        cookies_abs = to_abs_mydrive(youtube_cookies_txt_path.strip())
        if cookies_abs.exists():
            cmd += ["--cookies", str(cookies_abs)]
        else:
            if DEBUG_MODE: print(f"⚠️ 找不到 cookies 檔：{cookies_abs}（改為不帶 cookies）")
    cmd.append(yturl)
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if p.returncode != 0:
        if "Sign in to confirm" in (p.stdout or ""):
            print("\n❗YouTube 要求登入/驗證，請提供 cookies 或先自行下載到雲端硬碟。")
        print("🔄 若多次失敗，請刪除執行階段並重啟後重試。")
        suggest_runtime_reset()
        raise RuntimeError("yt-dlp 下載失敗")
    files = list(tmp.glob("*"))
    if not files:
        print("🔄 下載為空，建議刪除執行階段再重試。")
        suggest_runtime_reset()
        raise FileNotFoundError("YouTube 下載失敗：/tmp/dl 為空")
    f = files[0]
    if save_video_to_google_drive:
        shutil.copy2(f, WHISPER_DIR / f.name)
    return f

def http_dl(url:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting HTTP(S) video ...")
    run_cmd(["curl", "-L", "-o", str(out), url])
    if save_video_to_google_drive:
        shutil.copy2(out, WHISPER_DIR / out.name)
    return out

# Extract audio: ffmpeg -> 16k/mono WAV
def ffmpeg_extract_wav(in_path:Path, out_wav:Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_path),"-vn","-ac","1","-ar",str(sr),"-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 轉 WAV 失敗")

# CPU Denoising: ffmpeg afftdn
def ffmpeg_afftdn(in_wav: Path, out_wav: Path, noise_floor_db=DENOISE_NOISE_FLOOR_DB):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-af",f"afftdn=nf={noise_floor_db}",
           "-ac","1","-ar","16000","-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg afftdn 失敗")

# Safeguard: Repack WAV header if format is strange
def ffmpeg_repack_wav(in_wav: Path, out_wav: Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-acodec","pcm_s16le","-ac","1","-ar",str(sr),str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 重包 WAV 失敗")

# [4/8] Parse Source (Transcription) - Uses 'filename' and 'save_video_to_google_drive'
if DEBUG_MODE: print("[4/8] Parsing input source ...")
try:
    if is_youtube_url(filename):
        src_path = ytdl(filename); out_base_dir = WHISPER_DIR
    elif is_http_url(filename):
        src_path = http_dl(filename); out_base_dir = WHISPER_DIR
    else:
        src_path = to_abs_mydrive(filename)
        if not src_path.exists(): raise FileNotFoundError(f"找不到檔案：{src_path}")
        out_base_dir = src_path.parent
except Exception as e:
    print(f"\n⛔ 來源解析/下載失敗：{e}")
    print("🔄 請刪除執行階段並重新啟動後重跑。"); suggest_runtime_reset(); raise

print(f"→ 來源檔：{src_path}")
print(f"→ 輸出資料夾：{out_base_dir}")

# [5/8] Extract Audio & CPU Denoising (Transcription) - Uses 'denoise_method' and 'DENOISE_NOISE_FLOOR_DB'
AUDIO_16K = Path("/tmp/audio_16k.wav")
if DEBUG_MODE: print("[5/8] Extracting audio (ffmpeg → 16k/mono WAV) ...")
ffmpeg_extract_wav(src_path, AUDIO_16K, sr=16000)

if denoise_method.startswith("afftdn"):
    if DEBUG_MODE: print("[5.5/8] Denoising (ffmpeg afftdn, CPU) ...")
    DENOISED = Path("/tmp/audio_16k_denoised.wav")
    ffmpeg_afftdn(AUDIO_16K, DENOISED, noise_floor_db=DENOISE_NOISE_FLOOR_DB)
    denoised_audio = DENOISED if verify_wav_ok(DENOISED) else AUDIO_16K
else:
    denoised_audio = AUDIO_16K

if not verify_wav_ok(denoised_audio):
    if DEBUG_MODE: print("  - 音訊格式異常；嘗試重包 WAV ...")
    FIXED = Path("/tmp/audio_16k_fixed.wav")
    ffmpeg_repack_wav(denoised_audio, FIXED, sr=16000)
    denoised_audio = FIXED

if DEBUG_MODE: print(f"→ 最終輸入音訊：{denoised_audio}")

# [6/8] Load faster-whisper (GPU enforced) - Uses 'model_size'
if DEBUG_MODE: print("[6/8] Loading faster-whisper model (GPU) ...")
device = "cuda"  # Enforce GPU
model = None; last_err = None
for ctype in ["int8_float16", "float16", "int8"]:
    try:
        if DEBUG_MODE: print(f"  - Trying compute_type={ctype}")
        model = WhisperModel(model_size, device=device, compute_type=ctype)
        if DEBUG_MODE: print("  - Model loaded successfully")
        break
    except Exception as e:
        last_err = e
        if DEBUG_MODE: print(f"  - Load failed: {e}")
if model is None:
    print("\n⛔ GPU 模型載入失敗。請確認『變更執行階段類型』選了 GPU（T4/A100），或刪除執行階段後重試。")
    suggest_runtime_reset()
    raise RuntimeError(f"無法載入模型：{last_err}")

gc.collect()  # Clean up before transcription (safety)

# [7/8] Transcribe (GPU; real-time progress per segment) - Uses 'language_code', 'TRANSCRIPTION_BEAM_SIZE_PRIMARY', 'TRANSCRIPTION_CHUNK_LENGTH_PRIMARY', 'TRANSCRIPTION_BEAM_SIZE_FALLBACK', 'TRANSCRIPTION_CHUNK_LENGTH_FALLBACK'
if DEBUG_MODE: print(f"[7/8] Starting transcription (GPU: beam={TRANSCRIPTION_BEAM_SIZE_PRIMARY} / chunk={TRANSCRIPTION_CHUNK_LENGTH_PRIMARY}s / no VAD) ...")

def transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY):
    return model.transcribe(
        str(denoised_audio),
        task="transcribe",
        language=language_code,
        temperature=0.0,
        condition_on_previous_text=False,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        beam_size=_beam,
        chunk_length=_chunk,
        vad_filter=False,
        word_timestamps=False
    )

try:
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY)
except Exception as e:
    if DEBUG_MODE: print(f"  - First transcription failed: {e}\n    → Trying more conservative (beam={TRANSCRIPTION_BEAM_SIZE_FALLBACK}, chunk={TRANSCRIPTION_CHUNK_LENGTH_FALLBACK}) ...")
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_FALLBACK, _chunk=TRANSCRIPTION_CHUNK_LENGTH_FALLBACK)

# Display percentage based on total video duration
duration = float(getattr(info, "duration", 0.0) or 0.0)
if duration <= 0: duration = 1.0

segments = []
filtered = []

if DEBUG_MODE:
    print(f"  - Detected language: {getattr(info,'language','未知')} (p={getattr(info,'language_probability',0):.2f})")
    print(f"  - Audio length: {duration:.2f}s")

for s in seg_iter:
    pct = int(min(100, round((s.end / duration) * 100)))
    print(f"[{pct:3d}%] {fmt_ts_srt(s.start)} → {fmt_ts_srt(s.end)}  {s.text.strip()}", flush=True)
    segments.append(s)

    # Low confidence/high no-speech short segment filtering (no blacklist) - Uses FILTER_* parameters
    keep = True
    seg_dur = float(s.end - s.start)
    if seg_dur < FILTER_MIN_DURATION_SHORT and getattr(s, "avg_logprob", None) is not None and s.avg_logprob < FILTER_AVG_LOGPROB_THRESHOLD:
        keep = False
    if seg_dur < FILTER_MIN_DURATION_SPEECH_PROB and getattr(s, "no_speech_prob", None) is not None and s.no_speech_prob > FILTER_NO_SPEECH_PROB_THRESHOLD:
        keep = False
    if keep:
        filtered.append(s)

if DEBUG_MODE: print(f"  - Number of segments: Before filtering {len(segments)} → After filtering {len(filtered)}")

# ---- OpenCC Normalization (for output text) ---- - Uses 'text_postprocess'
pipeline = build_opencc_pipeline(text_postprocess)
def norm(txt: str) -> str:
    return apply_opencc(txt, pipeline) if pipeline else txt

# [8/8] Output (text after OpenCC) - Uses 'out_base_dir' (derived from 'filename')
print("[8/8] 輸出 SRT / TXT ...")
# Determine the output directory for transcription based on input type
# If input is a network source, output to WHISPER_DIR
# If input is a local file, output to the same directory as the input file
if is_youtube_url(filename) or is_http_url(filename):
    out_base_dir = WHISPER_DIR
else:
    src_path_abs = to_abs_mydrive(filename)
    out_base_dir = src_path_abs.parent

# Create the transcription output directory if it doesn't exist
out_dir = out_base_dir
out_dir.mkdir(exist_ok=True, parents=True)

# Determine the stem from the original source file path
stem = Path(src_path).stem
SRT = out_dir / f"{stem}.srt"
TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")


# ===== Summarization Logic Starts Here =====

# Determine the SRT input path for summarization - Uses 'summary_srt_path' and 'SRT' from transcription
if not summary_srt_path:
    # If summary_srt_path is empty, use the SRT generated by the transcription step
    summary_srt_path_abs = SRT
    if DEBUG_MODE: print(f"Using SRT from transcription step: {summary_srt_path_abs}")
else:
    # If summary_srt_path is provided, convert it to an absolute path relative to MyDrive
    summary_srt_path_abs = to_abs_mydrive(summary_srt_path)

# Ensure the input SRT file for summarization exists
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"


# ===== Summary 1/6) Check GPU and Install Dependencies (llama-cpp-python specific) =====
# llama-cpp-python installation logic - Keep this separate as it has specific CUDA requirements
# Moved this section to just before reading the SRT for summarization
if DEBUG_MODE: print("[Summary 1/6] Checking GPU and installing llama-cpp-python ...")

def detect_cuda_tag():
    try:
        out = sp.check_output(["nvidia-smi"], text=True)
        m = re.search(r"CUDA Version:\s*([\d.]+)", out)
        if not m:
            return "cu124"
        major, minor = [int(x) for x in m.group(1).split(".")[:2]]
        if major > 12 or (major == 12 and minor >= 5):
            return "cu125"
        return "cu124"
    except Exception:
        return "cu124"

cuda_tag = detect_cuda_tag()
if DEBUG_MODE: print(f"GPU 0: Detected CUDA version tag {cuda_tag}")

def try_import_llama():
    try:
        from llama_cpp import Llama
        return Llama
    except ModuleNotFoundError:
        return None

Llama = try_import_llama()
if Llama is None:
    # Keep your existing installation strategy: extra-index -> fallback to source compilation on failure
    candidates = [cuda_tag, "cu125", "cu124", "cu122", "cu121"]
    ok = False
    for tag in candidates:
        idx = f"https://abetlen.github.io/llama-cpp-python/whl/{tag}"
        if DEBUG_MODE: print(f"→ Attempting to install llama-cpp-python ({tag}) ...")
        r = pip_install(["llama-cpp-python"], extra_args=["--extra-index-url", idx])
        if r.returncode == 0:
            Llama = try_import_llama()
            if Llama is not None:
                ok = True
                break
        else:
            if DEBUG_MODE: print("  ✗ Installation failed (summary):", "\n".join(r.stdout.splitlines()[-5:]))
    if not ok:
        if DEBUG_MODE: print("→ Pre-compiled wheels not available, switching to 'source compilation (CUDA=ON)' ... (takes longer)")
        try:
            import ninja # noqa: F401 # Import ninja to check if installed
        except ModuleNotFoundError:
            if DEBUG_MODE: print("→ Installing missing package: ninja")
            r = pip_install(["ninja"])
            if r.returncode != 0:
                if DEBUG_MODE: print(r.stdout)
                raise RuntimeError("安裝 ninja 失敗。請重啟後重試。")
        env = os.environ.copy()
        env["CMAKE_ARGS"] = "-DGGML_CUDA=on -DLLAMA_CUBLAS=on"
        env["FORCE_CMAKE"] = "1"
        r = pip_install(["llama-cpp-python"], env=env)
        if r.returncode != 0:
            if DEBUG_MODE: print(r.stdout)
            raise RuntimeError("無法安裝 GPU 版 llama-cpp-python。")
        Llama = try_import_llama()


# ===== Summary 2/6) Read SRT (Summary) - Uses 'summary_srt_path_abs'
if DEBUG_MODE: print("[Summary 2/6] Reading SRT ...")
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"
with open(summary_srt_path_abs, "r", encoding="utf-8") as f:
    srt_text = f.read()
subs = list(_srt.parse(srt_text)) # Use _srt as srt module was imported as _srt
def td2s(td): return td.total_seconds()
segments = []
for it in subs:
    txt = it.content.strip()
    if not txt: continue
    segments.append((td2s(it.start), td2s(it.end), txt))
total_secs = (segments[-1][1] - segments[0][0]) if segments else 0
if DEBUG_MODE: print(f"→ Number of subtitle segments: {len(segments)}；Video length (est): {total_secs/60:.1f} minutes")


# ===== Summary 3/6) Download and Load GGUF Model (Summary) - Uses summary model parameters (REPO_ID, GGUF_FILE, ctx_window, etc.)
# Moved this section to just after installing llama-cpp-python
if DEBUG_MODE: print("[Summary 3/6] Loading GPT-OSS-20B (GGUF, CUDA) ...")
local_repo = snapshot_download(REPO_ID, allow_patterns=[GGUF_FILE])
gguf_path = str(Path(local_repo)/GGUF_FILE)

llm = Llama(
    model_path=gguf_path,
    n_ctx=ctx_window,
    n_gpu_layers=-1,
    seed=0,
    logits_all=False,
    verbose=True,          # Display the actual chat format used
    chat_format="chatml",  # Directly override the GGUF built-in Unsloth template to avoid outputting <|channel|> tags
)
if DEBUG_MODE: print("→ Model loaded successfully (GPU)")


# ===== Summary 4/6) Token-aware Segmentation (Summary) - Uses ctx_window, map_max_new_tokens, prompt_overhead
if DEBUG_MODE: print("[Summary 4/6] Generating segments (token-aware; single segment ≤ safety limit) ...")

def count_tokens_text(text: str) -> int:
    # Check if llm is initialized before using it
    if 'llm' not in locals() or llm is None:
         raise RuntimeError("LLM model is not loaded. Cannot count tokens.")
    return len(llm.tokenize(text.encode("utf-8")))

SYSTEM_INSTR = (
  "你是一個會議總結機器人。根據使用者提供的逐字稿（可能雜訊、重複、錯字），"
  "請去除雜訊與重複、嚴守事實、不腦補。遇到不明確資訊以「待補充／未明確」標註。"
  "輸出為 Markdown（繁體中文），不要輸出任何系統／思考標記。"
)

# — Segment Summary Prompt: More concise request, avoid verbosity and system language - Uses 'topic_hint'
MAP_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是逐字稿片段（非完整全文）：
{chunk}

請就此片段輸出「條列式重點摘要」（500–900 字，繁體中文），注意：
- 只寫最終內容，不要寫解題想法、不要出現任何系統提示或中英括號標記。
- 聚焦可驗證事實（時間、人物、任務、結論、未決事項、行動）。
- 結構：可用小標題＋項目符號，語句務必短、準確、無贅詞。
""")

# — Summary Prompt: Maintain your three-section output structure - Uses 'topic_hint'
REDUCE_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是所有片段的重點摘要彙整（仍可能有重疊）：
{maps}

請整合為一份會議筆記（Markdown，繁體）：
1) **整體提要**（3–6 句，避免冗言）
2) **章節要點（含時間脈絡）**：條列呈現，每點一行，可附粗略時間
3) **可執行重點**：具體待辦（每條以動詞開頭）
請只輸出最終筆記，不要出現系統或思考標記，不要加入未出現的新資訊。
""")

# Single segment token budget (reserve space for prompt and generation)
prompt_overhead = 700
chunk_target    = max(1024, min(3072, ctx_window - prompt_overhead - map_max_new_tokens))

chunks: List[Tuple[float,float,str]] = []
buf, t0, t1, cur = [], None, None, 0
for (s, e, txt) in segments:
    t = count_tokens_text(txt)
    if not buf:
        buf, t0, t1, cur = [txt], s, e, t
        continue
    if cur + t <= chunk_target:
        buf.append(txt); t1 = e; cur += t
    else:
        chunks.append((t0, t1, "\n".join(buf)))
        buf, t0, t1, cur = [txt], s, e, t
if buf:
    chunks.append((t0, t1, "\n".join(buf)))

if DEBUG_MODE: print(f"→ Generated {len(chunks)} segments (target ~{chunk_target} tokens per segment)")

# ===== Common: Streaming Tools (No regex cleaning; use correct stop sequence) - Uses temperature, top_p, repeat_penalty, map_max_new_tokens, reduce_max_new_tokens
def llm_stream(messages, max_tokens):
    # Check if llm is initialized before using it
    if 'llm' not in locals() or llm is None:
         raise RuntimeError("LLM model is not loaded. Cannot stream generation.")
    # ChatML messages end with <|im_end|>; use stop to cut off, preventing the closing tag from being written to the file
    gen = llm.create_chat_completion(
        messages=messages,
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repeat_penalty),
        max_tokens=int(max_tokens),
        stream=True,
        stop=["<|im_end|>"],  # Key: Prevent outputting the ending template
    )
    for ev in gen:
        # Compatible with different fields
        piece = ""
        try:
            piece = ev["choices"][0]["delta"].get("content", "")
        except Exception:
            piece = ev["choices"][0].get("text", "")
        if piece:
            yield piece

# ===== Summary 5/6) Segment Summary (map) - Uses map_max_new_tokens, ctx_window, prompt_overhead, topic_hint
if DEBUG_MODE: print("[Summary 5/6] Segment summarization (map) ...")
live = display(Markdown(""), display_id=True)
maps: List[str] = []

for i, (s, e, body) in enumerate(chunks, 1):
    pct = i / max(len(chunks),1) * 100
    sys.stdout.write(f"  - 處理分段 {i}/{len(chunks)}（~{pct:.1f}%）\n"); sys.stdout.flush()

    # Shrink to safe budget before sending (prevent prompt+segment from exceeding window and causing model to terminate early)
    budget_tokens = max(512, ctx_window - map_max_new_tokens - prompt_overhead)
    def shrink_to_budget(text: str, budget_tokens: int) -> str:
        cur = text
        for _ in range(6):
            if count_tokens_text(cur) <= budget_tokens:
                return cur
            keep = max(800, int(len(cur) * 0.85))
            cur = cur[:keep]
        return cur
    body2 = shrink_to_budget(body, budget_tokens)

    user_txt = MAP_USER_TMPL.format(topic=(topic_hint or "（無）"), chunk=body2)
    messages = [
        {"role": "system", "content": SYSTEM_INSTR},
        {"role": "user",   "content": user_txt},
    ]

    part_buf = [] # Reset part_buf for each segment
    for token in llm_stream(messages, map_max_new_tokens):
        part_buf.append(token)
        # Update live display and terminal character count periodically
        if len(part_buf) % 24 == 0:
            cur_txt = "".join(part_buf)
            live.update(Markdown(cur_txt))
            sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()
    cur_txt = "".join(part_buf)
    live.update(Markdown(cur_txt))
    sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()

    # Include the model's final output directly, no regex cleaning
    maps.append(cur_txt.strip())

if DEBUG_MODE: print("→ Segment summarization complete")

# ===== Summary 6/6) Consolidate (reduce) & Only write .md (Summary) - Uses summary_output_dir, summary_srt_path_abs, reduce_max_new_tokens, ctx_window, topic_hint
if DEBUG_MODE: print("[Summary 6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

# Determine and create the summary output directory
summary_output_dir_abs = to_abs_mydrive(summary_output_dir)
summary_output_dir_abs.mkdir(parents=True, exist_ok=True)

# Determine the summary output file path using the stem of the input SRT
out_md = summary_output_dir_abs / f"{Path(summary_srt_path_abs).stem}_summary.md"

with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
→ 當前工作目錄：/content/gdrive/MyDrive
→ 來源檔：/content/gdrive/MyDrive/whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4
→ 輸出資料夾：/content/gdrive/MyDrive/whisper


KeyboardInterrupt: 

**Reasoning**:
The traceback indicates a `RuntimeError` because the faster-whisper model loading failed with an "out of memory" error on the GPU. This means the selected model size (`large-v3`) is too large for the available GPU memory (Tesla T4, which has ~15GB VRAM). While `large-v3` can sometimes fit on a T4 with specific compute types, it's often very tight or impossible depending on the exact environment state and other processes using memory. The most reliable way to resolve this is to select a smaller model size that is known to fit comfortably within the T4's memory limits. The `medium` model size is a good balance of accuracy and memory usage and is recommended in the user form parameters description for cases with limited VRAM. I will update the `model_size` parameter to "medium" in the user form parameters section (`[2/8]`) and regenerate the full code.



In [None]:
# -*- coding: utf-8 -*-
# =========================================================
# Whisper Automatic Subtitle Generation: GPU Transcription + CPU Denoising + OpenCC Post-processing (Traditional/Simplified Conversion)
# And LLM Summarization (GPT-OSS-20B / llama.cpp / CUDA)
# - Transcription: faster-whisper (CUDA, compute: int8_float16→float16→int8)
# - Denoising: ffmpeg afftdn (CPU)
# - Progress: Real-time printing of "current sentence + video total length percentage"
# - Network source download and output: MyDrive/whisper; Files in Drive: Output to the same folder
# - LLM Summary: llama.cpp + GPT-OSS-20B GGUF for summarizing transcription
# - Prompts "Delete runtime and restart" if download is blocked or abnormal
# =========================================================

# Restrict multithreading (more stable)
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"

# [1/8] Mount Google Drive
from google.colab import drive
try:
    drive.mount("/content/gdrive")
except:
    drive.mount("/content/gdrive", force_remount=True)

# Consolidated Imports
import sys, gc, shutil, datetime, subprocess as sp
from pathlib import Path
import re, math, time, importlib, textwrap
from typing import List, Tuple
from IPython.display import display, Markdown
import soundfile as sf
from faster_whisper import WhisperModel
from opencc import OpenCC
import srt as _srt # Import srt as _srt to avoid name conflict later with the module itself
from huggingface_hub import snapshot_download

ROOT = Path("/content/gdrive/MyDrive")
WHISPER_DIR = ROOT / "whisper"
WHISPER_DIR.mkdir(exist_ok=True, parents=True)
os.chdir(ROOT)
print(f"→ 當前工作目錄：{os.getcwd()}")

# [2/8] User Form Parameters (Unified)
#@markdown # Whisper Transcription & LLM Summary Pipeline

#@markdown ## Input & Transcription Settings
#@markdown **Input Source:** Google Drive file (relative to MyDrive) or video URL (YouTube/HTTP).
filename = "whisper/jcz-mfkq-frc (2025-08-08 10_00 GMT+8).mp4"  #@param {type:"string"}
#@markdown **Download Option:** Check to save network source files to `MyDrive/whisper`.
save_video_to_google_drive = True  #@param {type:"boolean"}
#@markdown **Whisper Model Size:** Choose a model size. `large-v3` requires more GPU VRAM; `medium` is a good alternative if VRAM is limited.
model_size = "medium"  #@param ["tiny", "base", "small", "medium", "large-v2", "large-v3"] # Changed model_size to "medium"
#@markdown **Language:** Select the language for transcription. "自動偵測" (Auto-detect) is usually sufficient.
language = "自動偵測"  #@param ["自動偵測", "中文", "英文"]
#@markdown **Denoising:** Apply CPU-based denoising to the audio before transcription. `afftdn` is recommended.
denoise_method = "afftdn (建議)"  #@param ["afftdn (建議)", "none"]
#@markdown **Text Post-processing (OpenCC):** Convert the transcribed text (SRT/TXT output) between Simplified and Traditional Chinese variants.
text_postprocess = "臺灣繁體中文（預設）"  #@param ["臺灣繁體中文（預設）","香港繁體中文","大陸簡體中文","關閉"]
#@markdown **YouTube Cookies (Optional):** Path to a Netscape-format cookies file (relative to MyDrive) for accessing age-restricted or member-only YouTube videos (e.g., `cookies/youtube.txt`).
youtube_cookies_txt_path = ""  #@param {type:"string"}

#@markdown ## Summarization Settings
#@markdown **SRT Input:** Path to the SRT file for summarization (relative to MyDrive or absolute). Leave empty to use the SRT generated by the transcription step above.
summary_srt_path = ""  #@param {type:"string"}
#@markdown **Topic Hint (Optional):** Provide a brief hint about the topic to guide the summarization process.
topic_hint = ""  #@param {type:"string"}

#@markdown ## Output Paths
#@markdown **Transcription Output Directory:** Directory where the generated SRT and TXT files will be saved (relative to MyDrive or absolute). Default is the input file's directory for local files, or `MyDrive/whisper` for network sources. This is determined automatically.
# (Note: filename's directory is used if local, otherwise WHISPER_DIR. This parameter is more of an indicator of the default output base.)
#@markdown **Summary Output Directory:** Directory where the final summary Markdown file will be saved (relative to MyDrive or absolute).
summary_output_dir = "/content/gdrive/MyDrive/whisper"  #@param {type:"string"}


language_code_map = {"自動偵測": None, "中文":"zh", "英文":"en"}
language_code = language_code_map[language]

# =========================================================
# Developer Options
# Advanced users can fine-tune parameters in this section.
# Modify only if you understand the impact.
# =========================================================
DEBUG_MODE = False # Set to True for more detailed logging

# --- Transcription Parameters ---
TRANSCRIPTION_BEAM_SIZE_PRIMARY = 3
TRANSCRIPTION_CHUNK_LENGTH_PRIMARY = 20
TRANSCRIPTION_BEAM_SIZE_FALLBACK = 1 # Used if primary fails
TRANSCRIPTION_CHUNK_LENGTH_FALLBACK = 15 # Used if primary fails

# --- Denoising Parameters ---
DENOISE_NOISE_FLOOR_DB = -25

# --- Filtering Parameters ---
FILTER_MIN_DURATION_SHORT = 1.5 # Minimum duration for short segments
FILTER_AVG_LOGPROB_THRESHOLD = -1.0 # Avg log probability threshold for short segments
FILTER_MIN_DURATION_SPEECH_PROB = 2.0 # Minimum duration for speech probability filtering
FILTER_NO_SPEECH_PROB_THRESHOLD = 0.6 # No speech probability threshold

# --- Summary Model Parameters ---
REPO_ID   = "unsloth/gpt-oss-20b-GGUF"   # GGUF Model Repository
GGUF_FILE = "gpt-oss-20b-Q4_K_M.gguf"    # Approx. 10.8GiB, T4 can run

# --- Summary Inference Parameters (Increase available generation space to avoid truncation) ---
ctx_window            = 8192
map_max_new_tokens    = 512   # Segment output: original 256 -> 512 (approx. 350-450 chars)
reduce_max_new_tokens = 1024  # Summary output: original 512 -> 1024 (approx. 700-900+ chars)
temperature           = 0.2
top_p                 = 0.9
repeat_penalty        = 1.05
# =========================================================
# End of Developer Options
# =========================================================


# [3/8] Install Dependencies
# Combine installation steps from both original cells
if DEBUG_MODE: print("[Install] faster-whisper / yt-dlp / soundfile / opencc / srt / huggingface_hub / llama-cpp-python ...")

def pip_install(pkgs, extra_args=None, env=None):
    cmd = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if extra_args:
        cmd += extra_args
    cmd += pkgs
    return sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True, env=env)

# Install common dependencies first
common_missing = []
try: import srt # check srt module directly after import as _srt
except ModuleNotFoundError: common_missing.append("srt>=3.5.3")
try: from huggingface_hub import snapshot_download # check huggingface_hub module directly
except ModuleNotFoundError: common_missing.append("huggingface_hub>=0.23.0")
try: import soundfile # check soundfile
except ModuleNotFoundError: common_missing.append("soundfile")
try: import opencc # check opencc
except ModuleNotFoundError: common_missing.append("opencc-python-reimplemented")

if common_missing:
    if DEBUG_MODE: print("→ Installing common missing packages:", ", ".join(common_missing))
    r = pip_install(common_missing)
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("基礎依賴安裝失敗，請重啟執行階段後重試。")

# Install faster-whisper and yt-dlp separately as they were in the first cell
try: from faster_whisper import WhisperModel # check faster_whisper
except ModuleNotFoundError:
    if DEBUG_MODE: print("→ Installing missing package: faster-whisper yt-dlp")
    r = pip_install(["faster-whisper", "yt-dlp"])
    if r.returncode != 0:
        if DEBUG_MODE: print(r.stdout)
        raise RuntimeError("faster-whisper / yt-dlp 安裝失敗，請重啟執行階段後重試。")


def suggest_runtime_reset():
    print("\n🧹 建議動作（Colab）")
    print("1) 依序：『執行階段 Runtime』 → 『刪除執行階段/還原出廠設定 Factory reset runtime』")
    print("2) 重新執行本 Notebook（從掛載雲端硬碟那格開始）\n", flush=True)

def run_cmd(cmd:list, check=True):
    if DEBUG_MODE: print("  $", " ".join(cmd))
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if check and p.returncode != 0:
        raise RuntimeError(f"命令失敗：{' '.join(cmd)}")
    return p

def is_youtube_url(s:str)->bool:
    return isinstance(s, str) and ("youtu.be" in s or "youtube.com" in s)
def is_http_url(s:str)->bool:
    return isinstance(s, str) and s.lower().startswith("http")
def to_abs_mydrive(p:str)->Path:
    return (Path(p) if p.startswith("/") else (ROOT / p)).resolve()
def fmt_ts_srt(t:float)->str:
    h = int(t//3600); m = int((t%3600)//60); s = t - h*3600 - m*60
    return f"{h:02d}:{m:02d}:{int(s):02d},{int(round((s-int(s))*1000)):03d}"
def verify_wav_ok(path: Path)->bool:
    try:
        info = sf.info(str(path))
        return info.samplerate > 0 and info.channels in (1, 2)
    except Exception:
        return False

# OpenCC converter setup
def build_opencc_pipeline(choice:str):
    if choice.startswith("臺灣"):
        return [OpenCC('s2t'), OpenCC('t2tw')]
    if choice.startswith("香港"):
        return [OpenCC('s2t'), OpenCC('t2hk')]
    if choice.startswith("大陸"):
        return [OpenCC('t2s')]
    return []  # Disable

def apply_opencc(text:str, pipeline)->str:
    for cc in pipeline:
        text = cc.convert(text)
    return text

def ytdl(yturl:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting YouTube video ...")
    # Use sp.run instead of subprocess.run directly
    cmd = ["yt-dlp", "-f", "mp4", "-o", str(tmp / "%(title)s.%(ext)s")]
    if youtube_cookies_txt_path.strip():
        cookies_abs = to_abs_mydrive(youtube_cookies_txt_path.strip())
        if cookies_abs.exists():
            cmd += ["--cookies", str(cookies_abs)]
        else:
            if DEBUG_MODE: print(f"⚠️ 找不到 cookies 檔：{cookies_abs}（改為不帶 cookies）")
    cmd.append(yturl)
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if DEBUG_MODE and p.stdout: sys.stdout.write(p.stdout)
    if p.returncode != 0:
        if "Sign in to confirm" in (p.stdout or ""):
            print("\n❗YouTube 要求登入/驗證，請提供 cookies 或先自行下載到雲端硬碟。")
        print("🔄 若多次失敗，請刪除執行階段並重啟後重試。")
        suggest_runtime_reset()
        raise RuntimeError("yt-dlp 下載失敗")
    files = list(tmp.glob("*"))
    if not files:
        print("🔄 下載為空，建議刪除執行階段再重試。")
        suggest_runtime_reset()
        raise FileNotFoundError("YouTube 下載失敗：/tmp/dl 為空")
    f = files[0]
    if save_video_to_google_drive:
        shutil.copy2(f, WHISPER_DIR / f.name)
    return f

def http_dl(url:str)->Path:
    tmp = Path("/tmp/dl"); tmp.mkdir(parents=True, exist_ok=True)
    for x in tmp.glob("*"):
        try: x.unlink()
        except: shutil.rmtree(x, ignore_errors=True)
    ts = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    out = tmp / f"downloaded_{ts}.mp4"
    if DEBUG_MODE: print("[Download] Getting HTTP(S) video ...")
    run_cmd(["curl", "-L", "-o", str(out), url])
    if save_video_to_google_drive:
        shutil.copy2(out, WHISPER_DIR / out.name)
    return out

# Extract audio: ffmpeg -> 16k/mono WAV
def ffmpeg_extract_wav(in_path:Path, out_wav:Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_path),"-vn","-ac","1","-ar",str(sr),"-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 轉 WAV 失敗")

# CPU Denoising: ffmpeg afftdn
def ffmpeg_afftdn(in_wav: Path, out_wav: Path, noise_floor_db=DENOISE_NOISE_FLOOR_DB):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-af",f"afftdn=nf={noise_floor_db}",
           "-ac","1","-ar","16000","-f","wav",str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg afftdn 失敗")

# Safeguard: Repack WAV header if format is strange
def ffmpeg_repack_wav(in_wav: Path, out_wav: Path, sr=16000):
    cmd = ["ffmpeg","-y","-i",str(in_wav),"-acodec","pcm_s16le","-ac","1","-ar",str(sr),str(out_wav)]
    p = sp.run(cmd, stdout=sp.PIPE, stderr=sp.STDOUT, text=True)
    if p.returncode != 0:
        if DEBUG_MODE: print(p.stdout)
        raise RuntimeError("ffmpeg 重包 WAV 失敗")

# [4/8] Parse Source (Transcription) - Uses 'filename' and 'save_video_to_google_drive'
if DEBUG_MODE: print("[4/8] Parsing input source ...")
try:
    if is_youtube_url(filename):
        src_path = ytdl(filename); out_base_dir = WHISPER_DIR
    elif is_http_url(filename):
        src_path = http_dl(filename); out_base_dir = WHISPER_DIR
    else:
        src_path = to_abs_mydrive(filename)
        if not src_path.exists(): raise FileNotFoundError(f"找不到檔案：{src_path}")
        out_base_dir = src_path.parent
except Exception as e:
    print(f"\n⛔ 來源解析/下載失敗：{e}")
    print("🔄 請刪除執行階段並重新啟動後重跑。"); suggest_runtime_reset(); raise

print(f"→ 來源檔：{src_path}")
print(f"→ 輸出資料夾：{out_base_dir}")

# [5/8] Extract Audio & CPU Denoising (Transcription) - Uses 'denoise_method' and 'DENOISE_NOISE_FLOOR_DB'
AUDIO_16K = Path("/tmp/audio_16k.wav")
if DEBUG_MODE: print("[5/8] Extracting audio (ffmpeg → 16k/mono WAV) ...")
ffmpeg_extract_wav(src_path, AUDIO_16K, sr=16000)

if denoise_method.startswith("afftdn"):
    if DEBUG_MODE: print("[5.5/8] Denoising (ffmpeg afftdn, CPU) ...")
    DENOISED = Path("/tmp/audio_16k_denoised.wav")
    ffmpeg_afftdn(AUDIO_16K, DENOISED, noise_floor_db=DENOISE_NOISE_FLOOR_DB)
    denoised_audio = DENOISED if verify_wav_ok(DENOISED) else AUDIO_16K
else:
    denoised_audio = AUDIO_16K

if not verify_wav_ok(denoised_audio):
    if DEBUG_MODE: print("  - 音訊格式異常；嘗試重包 WAV ...")
    FIXED = Path("/tmp/audio_16k_fixed.wav")
    ffmpeg_repack_wav(denoised_audio, FIXED, sr=16000)
    denoised_audio = FIXED

if DEBUG_MODE: print(f"→ 最終輸入音訊：{denoised_audio}")

# [6/8] Load faster-whisper (GPU enforced) - Uses 'model_size'
if DEBUG_MODE: print("[6/8] Loading faster-whisper model (GPU) ...")
device = "cuda"  # Enforce GPU
model = None; last_err = None
for ctype in ["int8_float16", "float16", "int8"]:
    try:
        if DEBUG_MODE: print(f"  - Trying compute_type={ctype}")
        model = WhisperModel(model_size, device=device, compute_type=ctype)
        if DEBUG_MODE: print("  - Model loaded successfully")
        break
    except Exception as e:
        last_err = e
        if DEBUG_MODE: print(f"  - Load failed: {e}")
if model is None:
    print("\n⛔ GPU 模型載入失敗。請確認『變更執行階段類型』選了 GPU（T4/A100），或刪除執行階段後重試。")
    suggest_runtime_reset()
    raise RuntimeError(f"無法載入模型：{last_err}")

gc.collect()  # Clean up before transcription (safety)

# [7/8] Transcribe (GPU; real-time progress per segment) - Uses 'language_code', 'TRANSCRIPTION_BEAM_SIZE_PRIMARY', 'TRANSCRIPTION_CHUNK_LENGTH_PRIMARY', 'TRANSCRIPTION_BEAM_SIZE_FALLBACK', 'TRANSCRIPTION_CHUNK_LENGTH_FALLBACK'
if DEBUG_MODE: print(f"[7/8] Starting transcription (GPU: beam={TRANSCRIPTION_BEAM_SIZE_PRIMARY} / chunk={TRANSCRIPTION_CHUNK_LENGTH_PRIMARY}s / no VAD) ...")

def transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY):
    return model.transcribe(
        str(denoised_audio),
        task="transcribe",
        language=language_code,
        temperature=0.0,
        condition_on_previous_text=False,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6,
        beam_size=_beam,
        chunk_length=_chunk,
        vad_filter=False,
        word_timestamps=False
    )

try:
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_PRIMARY, _chunk=TRANSCRIPTION_CHUNK_LENGTH_PRIMARY)
except Exception as e:
    if DEBUG_MODE: print(f"  - First transcription failed: {e}\n    → Trying more conservative (beam={TRANSCRIPTION_BEAM_SIZE_FALLBACK}, chunk={TRANSCRIPTION_CHUNK_LENGTH_FALLBACK}) ...")
    seg_iter, info = transcribe_gpu(_beam=TRANSCRIPTION_BEAM_SIZE_FALLBACK, _chunk=TRANSCRIPTION_CHUNK_LENGTH_FALLBACK)

# Display percentage based on total video duration
duration = float(getattr(info, "duration", 0.0) or 0.0)
if duration <= 0: duration = 1.0

segments = []
filtered = []

if DEBUG_MODE:
    print(f"  - Detected language: {getattr(info,'language','未知')} (p={getattr(info,'language_probability',0):.2f})")
    print(f"  - Audio length: {duration:.2f}s")

for s in seg_iter:
    pct = int(min(100, round((s.end / duration) * 100)))
    print(f"[{pct:3d}%] {fmt_ts_srt(s.start)} → {fmt_ts_srt(s.end)}  {s.text.strip()}", flush=True)
    segments.append(s)

    # Low confidence/high no-speech short segment filtering (no blacklist) - Uses FILTER_* parameters
    keep = True
    seg_dur = float(s.end - s.start)
    if seg_dur < FILTER_MIN_DURATION_SHORT and getattr(s, "avg_logprob", None) is not None and s.avg_logprob < FILTER_AVG_LOGPROB_THRESHOLD:
        keep = False
    if seg_dur < FILTER_MIN_DURATION_SPEECH_PROB and getattr(s, "no_speech_prob", None) is not None and s.no_speech_prob > FILTER_NO_SPEECH_PROB_THRESHOLD:
        keep = False
    if keep:
        filtered.append(s)

if DEBUG_MODE: print(f"  - Number of segments: Before filtering {len(segments)} → After filtering {len(filtered)}")

# ---- OpenCC Normalization (for output text) ---- - Uses 'text_postprocess'
pipeline = build_opencc_pipeline(text_postprocess)
def norm(txt: str) -> str:
    return apply_opencc(txt, pipeline) if pipeline else txt

# [8/8] Output (text after OpenCC) - Uses 'out_base_dir' (derived from 'filename')
print("[8/8] 輸出 SRT / TXT ...")
# Determine the output directory for transcription based on input type
# If input is a network source, output to WHISPER_DIR
# If input is a local file, output to the same directory as the input file
if is_youtube_url(filename) or is_http_url(filename):
    out_base_dir = WHISPER_DIR
else:
    src_path_abs = to_abs_mydrive(filename)
    out_base_dir = src_path_abs.parent

# Create the transcription output directory if it doesn't exist
out_dir = out_base_dir
out_dir.mkdir(exist_ok=True, parents=True)

# Determine the stem from the original source file path
stem = Path(src_path).stem
SRT = out_dir / f"{stem}.srt"
TXT = out_dir / f"{stem}.txt"

with open(SRT, "w", encoding="utf-8") as f:
    for i, s in enumerate(filtered, 1):
        text_out = norm(s.text.strip())
        f.write(f"{i}\n{fmt_ts_srt(s.start)} --> {fmt_ts_srt(s.end)}\n{text_out}\n\n")

with open(TXT, "w", encoding="utf-8") as f:
    for s in filtered:
        f.write(norm(s.text.strip()) + "\n")  # Each segment on a new line

print(f"→ 完成！\n  SRT: {SRT}\n  TXT: {TXT}")

# Release model (release GPU memory)
try: del model
except: pass
gc.collect()
if DEBUG_MODE: print("→ Model released; can run again directly if needed.")


# ===== Summarization Logic Starts Here =====

# Determine the SRT input path for summarization - Uses 'summary_srt_path' and 'SRT' from transcription
if not summary_srt_path:
    # If summary_srt_path is empty, use the SRT generated by the transcription step
    summary_srt_path_abs = SRT
    if DEBUG_MODE: print(f"Using SRT from transcription step: {summary_srt_path_abs}")
else:
    # If summary_srt_path is provided, convert it to an absolute path relative to MyDrive
    summary_srt_path_abs = to_abs_mydrive(summary_srt_path)

# Ensure the input SRT file for summarization exists
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"


# ===== Summary 1/6) Check GPU and Install Dependencies (llama-cpp-python specific) =====
# llama-cpp-python installation logic - Keep this separate as it has specific CUDA requirements
# Moved this section to just before reading the SRT for summarization
if DEBUG_MODE: print("[Summary 1/6] Checking GPU and installing llama-cpp-python ...")

def detect_cuda_tag():
    try:
        out = sp.check_output(["nvidia-smi"], text=True)
        m = re.search(r"CUDA Version:\s*([\d.]+)", out)
        if not m:
            return "cu124"
        major, minor = [int(x) for x in m.group(1).split(".")[:2]]
        if major > 12 or (major == 12 and minor >= 5):
            return "cu125"
        return "cu124"
    except Exception:
        return "cu124"

cuda_tag = detect_cuda_tag()
if DEBUG_MODE: print(f"GPU 0: Detected CUDA version tag {cuda_tag}")

def try_import_llama():
    try:
        from llama_cpp import Llama
        return Llama
    except ModuleNotFoundError:
        return None

Llama = try_import_llama()
if Llama is None:
    # Keep your existing installation strategy: extra-index -> fallback to source compilation on failure
    candidates = [cuda_tag, "cu125", "cu124", "cu122", "cu121"]
    ok = False
    for tag in candidates:
        idx = f"https://abetlen.github.io/llama-cpp-python/whl/{tag}"
        if DEBUG_MODE: print(f"→ Attempting to install llama-cpp-python ({tag}) ...")
        r = pip_install(["llama-cpp-python"], extra_args=["--extra-index-url", idx])
        if r.returncode == 0:
            Llama = try_import_llama()
            if Llama is not None:
                ok = True
                break
        else:
            if DEBUG_MODE: print("  ✗ Installation failed (summary):", "\n".join(r.stdout.splitlines()[-5:]))
    if not ok:
        if DEBUG_MODE: print("→ Pre-compiled wheels not available, switching to 'source compilation (CUDA=ON)' ... (takes longer)")
        try:
            import ninja # noqa: F401 # Import ninja to check if installed
        except ModuleNotFoundError:
            if DEBUG_MODE: print("→ Installing missing package: ninja")
            r = pip_install(["ninja"])
            if r.returncode != 0:
                if DEBUG_MODE: print(r.stdout)
                raise RuntimeError("安裝 ninja 失敗。請重啟後重試。")
        env = os.environ.copy()
        env["CMAKE_ARGS"] = "-DGGML_CUDA=on -DLLAMA_CUBLAS=on"
        env["FORCE_CMAKE"] = "1"
        r = pip_install(["llama-cpp-python"], env=env)
        if r.returncode != 0:
            if DEBUG_MODE: print(r.stdout)
            raise RuntimeError("無法安裝 GPU 版 llama-cpp-python。")
        Llama = try_import_llama()


# ===== Summary 2/6) Read SRT (Summary) - Uses 'summary_srt_path_abs'
if DEBUG_MODE: print("[Summary 2/6] Reading SRT ...")
assert summary_srt_path_abs.exists(), f"SRT 檔不存在：{summary_srt_path_abs}"
with open(summary_srt_path_abs, "r", encoding="utf-8") as f:
    srt_text = f.read()
subs = list(_srt.parse(srt_text)) # Use _srt as srt module was imported as _srt
def td2s(td): return td.total_seconds()
segments = []
for it in subs:
    txt = it.content.strip()
    if not txt: continue
    segments.append((td2s(it.start), td2s(it.end), txt))
total_secs = (segments[-1][1] - segments[0][0]) if segments else 0
if DEBUG_MODE: print(f"→ Number of subtitle segments: {len(segments)}；Video length (est): {total_secs/60:.1f} minutes")


# ===== Summary 3/6) Download and Load GGUF Model (Summary) - Uses summary model parameters (REPO_ID, GGUF_FILE, ctx_window, etc.)
# Moved this section to just after installing llama-cpp-python
if DEBUG_MODE: print("[Summary 3/6] Loading GPT-OSS-20B (GGUF, CUDA) ...")
local_repo = snapshot_download(REPO_ID, allow_patterns=[GGUF_FILE])
gguf_path = str(Path(local_repo)/GGUF_FILE)

llm = Llama(
    model_path=gguf_path,
    n_ctx=ctx_window,
    n_gpu_layers=-1,
    seed=0,
    logits_all=False,
    verbose=True,          # Display the actual chat format used
    chat_format="chatml",  # Directly override the GGUF built-in Unsloth template to avoid outputting <|channel|> tags
)
if DEBUG_MODE: print("→ Model loaded successfully (GPU)")


# ===== Summary 4/6) Token-aware Segmentation (Summary) - Uses ctx_window, map_max_new_tokens, prompt_overhead
if DEBUG_MODE: print("[Summary 4/6] Generating segments (token-aware; single segment ≤ safety limit) ...")

def count_tokens_text(text: str) -> int:
    # Check if llm is initialized before using it
    if 'llm' not in locals() or llm is None:
         raise RuntimeError("LLM model is not loaded. Cannot count tokens.")
    return len(llm.tokenize(text.encode("utf-8")))

SYSTEM_INSTR = (
  "你是一個會議總結機器人。根據使用者提供的逐字稿（可能雜訊、重複、錯字），"
  "請去除雜訊與重複、嚴守事實、不腦補。遇到不明確資訊以「待補充／未明確」標註。"
  "輸出為 Markdown（繁體中文），不要輸出任何系統／思考標記。"
)

# — Segment Summary Prompt: More concise request, avoid verbosity and system language - Uses 'topic_hint'
MAP_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是逐字稿片段（非完整全文）：
{chunk}

請就此片段輸出「條列式重點摘要」（500–900 字，繁體中文），注意：
- 只寫最終內容，不要寫解題想法、不要出現任何系統提示或中英括號標記。
- 聚焦可驗證事實（時間、人物、任務、結論、未決事項、行動）。
- 結構：可用小標題＋項目符號，語句務必短、準確、無贅詞。
""")

# — Summary Prompt: Maintain your three-section output structure - Uses 'topic_hint'
REDUCE_USER_TMPL = textwrap.dedent("""\
主題（可留空）：{topic}

以下是所有片段的重點摘要彙整（仍可能有重疊）：
{maps}

請整合為一份會議筆記（Markdown，繁體）：
1) **整體提要**（3–6 句，避免冗言）
2) **章節要點（含時間脈絡）**：條列呈現，每點一行，可附粗略時間
3) **可執行重點**：具體待辦（每條以動詞開頭）
請只輸出最終筆記，不要出現系統或思考標記，不要加入未出現的新資訊。
""")

# Single segment token budget (reserve space for prompt and generation)
prompt_overhead = 700
chunk_target    = max(1024, min(3072, ctx_window - prompt_overhead - map_max_new_tokens))

chunks: List[Tuple[float,float,str]] = []
buf, t0, t1, cur = [], None, None, 0
for (s, e, txt) in segments:
    t = count_tokens_text(txt)
    if not buf:
        buf, t0, t1, cur = [txt], s, e, t
        continue
    if cur + t <= chunk_target:
        buf.append(txt); t1 = e; cur += t
    else:
        chunks.append((t0, t1, "\n".join(buf)))
        buf, t0, t1, cur = [txt], s, e, t
if buf:
    chunks.append((t0, t1, "\n".join(buf)))

if DEBUG_MODE: print(f"→ Generated {len(chunks)} segments (target ~{chunk_target} tokens per segment)")

# ===== Common: Streaming Tools (No regex cleaning; use correct stop sequence) - Uses temperature, top_p, repeat_penalty, map_max_new_tokens, reduce_max_new_tokens
def llm_stream(messages, max_tokens):
    # Check if llm is initialized before using it
    if 'llm' not in locals() or llm is None:
         raise RuntimeError("LLM model is not loaded. Cannot stream generation.")
    # ChatML messages end with <|im_end|>; use stop to cut off, preventing the closing tag from being written to the file
    gen = llm.create_chat_completion(
        messages=messages,
        temperature=float(temperature),
        top_p=float(top_p),
        repeat_penalty=float(repeat_penalty),
        max_tokens=int(max_tokens),
        stream=True,
        stop=["<|im_end|>"],  # Key: Prevent outputting the ending template
    )
    for ev in gen:
        # Compatible with different fields
        piece = ""
        try:
            piece = ev["choices"][0]["delta"].get("content", "")
        except Exception:
            piece = ev["choices"][0].get("text", "")
        if piece:
            yield piece

# ===== Summary 5/6) Segment Summary (map) - Uses map_max_new_tokens, ctx_window, prompt_overhead, topic_hint
if DEBUG_MODE: print("[Summary 5/6] Segment summarization (map) ...")
live = display(Markdown(""), display_id=True)
maps: List[str] = []

for i, (s, e, body) in enumerate(chunks, 1):
    pct = i / max(len(chunks),1) * 100
    sys.stdout.write(f"  - 處理分段 {i}/{len(chunks)}（~{pct:.1f}%）\n"); sys.stdout.flush()

    # Shrink to safe budget before sending (prevent prompt+segment from exceeding window and causing model to terminate early)
    budget_tokens = max(512, ctx_window - map_max_new_tokens - prompt_overhead)
    def shrink_to_budget(text: str, budget_tokens: int) -> str:
        cur = text
        for _ in range(6):
            if count_tokens_text(cur) <= budget_tokens:
                return cur
            keep = max(800, int(len(cur) * 0.85))
            cur = cur[:keep]
        return cur
    body2 = shrink_to_budget(body, budget_tokens)

    user_txt = MAP_USER_TMPL.format(topic=(topic_hint or "（無）"), chunk=body2)
    messages = [
        {"role": "system", "content": SYSTEM_INSTR},
        {"role": "user",   "content": user_txt},
    ]

    part_buf = [] # Reset part_buf for each segment
    for token in llm_stream(messages, map_max_new_tokens):
        part_buf.append(token)
        # Update live display and terminal character count periodically
        if len(part_buf) % 24 == 0:
            cur_txt = "".join(part_buf)
            live.update(Markdown(cur_txt))
            sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()
    cur_txt = "".join(part_buf)
    live.update(Markdown(cur_txt))
    sys.stdout.write(f"    ↳ 分段 {i} 已產生字元：{len(cur_txt)}\n"); sys.stdout.flush()

    # Include the model's final output directly, no regex cleaning
    maps.append(cur_txt.strip())

if DEBUG_MODE: print("→ Segment summarization complete")

# ===== Summary 6/6) Consolidate (reduce) & Only write .md (Summary) - Uses summary_output_dir, summary_srt_path_abs, reduce_max_new_tokens, ctx_window, topic_hint
if DEBUG_MODE: print("[Summary 6/6] Consolidating summary (reduce) ...")
maps_md = "\n\n---\n\n".join(f"### 片段 {i+1} 要點\n\n{m}" for i, m in enumerate(maps))

# If combined text exceeds window, truncate proportionally first (without changing text within segments to avoid breaking meaning)
def fit_reduce_payload(md_text: str, max_ctx_tokens: int) -> str:
    for _ in range(8):
        need = count_tokens_text(md_text)
        if need + reduce_max_new_tokens + 400 <= max_ctx_tokens:
            return md_text
        md_text = md_text[: int(len(md_text) * 0.9)]
    return md_text

md_cur = fit_reduce_payload(maps_md, ctx_window)

user_txt = REDUCE_USER_TMPL.format(topic=(topic_hint or "（無）"), maps=md_cur)
messages = [{"role":"system","content":SYSTEM_INSTR},
            {"role":"user","content":user_txt}]

live2 = display(Markdown(""), display_id=True)
final_buf = []
for token in llm_stream(messages, reduce_max_new_tokens):
    final_buf.append(token)
    if len(final_buf) % 24 == 0:
        live2.update(Markdown("".join(final_buf)))
        sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()
live2.update(Markdown("".join(final_buf)))
sys.stdout.write(f"    ↳ 彙整 已產生字元：{len(''.join(final_buf))}\n"); sys.stdout.flush()

final_text = "".join(final_buf).strip()

# Determine and create the summary output directory
summary_output_dir_abs = to_abs_mydrive(summary_output_dir)
summary_output_dir_abs.mkdir(parents=True, exist_ok=True)

# Determine the summary output file path using the stem of the input SRT
out_md = summary_output_dir_abs / f"{Path(summary_srt_path_abs).stem}_summary.md"

with open(out_md, "w", encoding="utf-8") as f:
    f.write(final_text)

print(f"→ 完成 ✅  {out_md}")
try:
    del llm
except Exception:
    pass
gc.collect()
if DEBUG_MODE: print("（顯存已釋放，如需重跑可直接再次執行）")