In [13]:
import whisper

# 選擇 model: https://github.com/openai/whisper#available-models-and-languages
model = whisper.load_model("large-v3").to("cuda")  # large: 2.88G

In [None]:
import os

file_dir = "F:/Voice//01_WAV版"
file_name = ".wav"

file_path = os.path.join(file_dir, file_name)


# 列出資料夾內所有檔案
# print(f'資料夾內所有檔案: {os.listdir(file_dir)}')


# 讀取檔案並顯示其大小
if os.path.exists(file_path):
    file_size = os.path.getsize(file_path)
    file_size_MB = file_size / 1024 / 1024
    print(f"{file_path} with length {file_size_MB:.2f} MB")
else:
    print(f"File {file_path} does not exist.")

In [None]:
# 參數設置
options_setting = {
    "language": "japanese",
    "verbose": True,
    "logprob_threshold": -1.0,
    "hallucination_silence_threshold": 5.0,
    "condition_on_previous_text": False,
    "word_timestamps": True,
}

# 開始轉譯文字
result = model.transcribe(
    file_path, **options_setting
)  # 假設 `model` 是 whisper 的轉譯模型

In [None]:
from whisper.utils import get_writer

# 產生輸出的 .srt 檔案名稱，並放置於與原檔案相同的目錄
srt_file_name = f"{os.path.splitext(file_name)[0]}.srt"
output_srt_path = os.path.join(file_dir, srt_file_name)

try:
    # 初始化 whisper 的 writer，這裡設置為 'srt' 格式
    writer = get_writer("srt", file_dir)

    print(writer.output_dir)

    # 將結果寫入 .srt 檔案
    writer(result, output_srt_path)

    print(f"轉譯完成並儲存為: {output_srt_path}")
except Exception as e:
    print(e)

In [None]:
# 將秒轉換為分鐘和秒的格式
def format_time(seconds):
    hours = int(seconds // 3600)
    minutes = int((seconds % 3600) // 60)
    seconds = seconds % 60
    if hours > 0:
        return f"{hours:02}:{minutes:02}:{seconds:06.3f}"
    else:
        return f"{minutes:02}:{seconds:06.3f}"

segments = result["segments"]

# 轉換格式
timestamps_and_text = []
for item in segments:
    start = format_time(item["start"])
    end = format_time(item["end"])
    text = item["text"]
    timestamps_and_text.append(f"[{start} --> {end}] {text}")

# 輸出結果
for line in timestamps_and_text:
    print(line)

# 產生輸出的 .txt 檔案名稱，並放置於與原檔案相同的目錄
txt_file_name = f"{os.path.splitext(file_name)[0]}.txt"
output_txt_path = os.path.join(file_dir, txt_file_name)
# 將結果輸出為 txt 檔案
with open(output_txt_path, "w", encoding="utf-8") as file:
    for line in timestamps_and_text:
        file.write(line + "\n")

print(f"檔案已成功輸出為 {txt_file_name}")

In [None]:
import torch

print("PyTorch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("Number of GPUs:", torch.cuda.device_count())

In [None]:
import torch
import gc

# 加載模型並運行在 GPU 上
# model = whisper.load_model("large-v3").to("cuda")

# 完成使用後，刪除模型變數
del model

# 釋放 GPU 內存
torch.cuda.empty_cache()

# 同時進行垃圾回收
gc.collect()

In [None]:
import json

# 將字典轉換為 JSON 格式
json_str = json.dumps(result["segments"], indent=4)  # indent=4 用於格式化輸出

# 打印 JSON 字符串
print(json_str)

# print(result["segments"])