In [None]:
# 初加载模型

import whisper
import os
import argparse
import torch
from pathlib import Path

whisper_size = "large-v2"

torch.cuda.empty_cache()
print('加载模型 Loading model...')
model = whisper.load_model(whisper_size)
print('加载完成')

In [5]:
# 参数设置

# 语言
language = "japanese"

# 如果为 "真"，模型的前一个输出将作为下一个窗口的提示。
# 禁用可能会使不同窗口的文本不一致，但模型变得不容易
# 陷入失败循环，例如重复循环或时间戳不同步。
condition_on_previous_text = True


In [None]:
#运行Whisper/Run Whisper

import os
import time
from pathlib import Path
import torch

def save_srt(segments, output_path):
    with open(output_path, 'w', encoding='utf-8') as f:
        for i, segment in enumerate(segments):
            start = segment['start']
            end = segment['end']
            text = segment['text']

            # 格式化时间
            start_hours = int(start // 3600)
            start_minutes = int((start % 3600) // 60)
            start_seconds = int(start % 60)
            start_milliseconds = int((start - int(start)) * 1000)
            end_hours = int(end // 3600)
            end_minutes = int((end % 3600) // 60)
            end_seconds = int(end % 60)
            end_milliseconds = int((end - int(end)) * 1000)

            tm1 = f"{start_hours:02}:{start_minutes:02}:{start_seconds:02},{start_milliseconds:03} --> "
            tm2 = f"{end_hours:02}:{end_minutes:02}:{end_seconds:02},{end_milliseconds:03}"
            # 写入文件
            f.write(f"{i + 1}\n")
            f.write(f"{tm1}{tm2}\n")
            f.write(f"{text}\n\n")
            print(f"{tm1}{tm2}")
            print(text)


def process_files(upload_path, output_dir, model, language, condition_on_previous_text):
    for root, _, files in os.walk(upload_path):
        for file_name in files:
            if file_name.endswith(".wav"):
                file_path = os.path.join(root, file_name)
                rel_path = os.path.relpath(file_path, upload_path)
                srt_path = os.path.join(output_dir, os.path.splitext(rel_path)[0] + ".srt")
                os.makedirs(os.path.dirname(srt_path), exist_ok=True)

                print(f'{file_path} 识别中...')
                start_time = time.time()
                result = model.transcribe(
                    audio=file_path,
                    language=language,
                    condition_on_previous_text=condition_on_previous_text
                )
                end_time = time.time()
                processing_time = end_time - start_time
                print(f"{processing_time:.2f} seconds")

                save_srt(result["segments"], srt_path)
                print(f'{srt_path} 已保存 Saved')


upload_path = "./raw_audio/"
output_dir = "./srt/"

os.makedirs(output_dir, exist_ok=True)
# 处理文件
process_files(upload_path, output_dir, model, language, condition_on_previous_text)
torch.cuda.empty_cache()
print('字幕生成完毕')



