In [None]:
pip install openai-whisper

In [None]:
import whisper
import os
import re
from pydub import AudioSegment

model = whisper.load_model("large-v3").to("cuda")

audio_folder = "/content/drive/MyDrive/TAVE 16th 심화플젝/data/split_scenes/clips_2_audio_wav"
output_folder = "/content/drive/MyDrive/TAVE 16th 심화플젝/data/STT_AUDIO"

os.makedirs(output_folder, exist_ok=True)

# 모든 WAV 파일 정렬
audio_files = sorted([f for f in os.listdir(audio_folder) if f.endswith(".wav")])

def extract_scene_number(filename):
    match = re.search(r"Scene[-_ ]?(\d+)", filename, re.IGNORECASE)
    return int(match.group(1)) if match else None

def extract_scene_id_str(filename):
    num = extract_scene_number(filename)
    return f"Scene-{num:03d}" if num is not None else "UNKNOWN"

# Scene 번호 → 파일명 매핑
scene_dict = {extract_scene_number(f): f for f in audio_files}

def merge_audio(scene_num):
    """scene_num 기준 -2, -1, 0, +1, +2 오디오 합치기"""
    merged = AudioSegment.silent(duration=10)

    for offset in [-2, -1, 0, 1, 2]:
        target_num = scene_num + offset
        if target_num in scene_dict:
            path = os.path.join(audio_folder, scene_dict[target_num])
            merged += AudioSegment.from_wav(path)

    temp_path = "/content/temp_merge.wav"
    merged.export(temp_path, format="wav")
    return temp_path


# 전체 처리
for idx, filename in enumerate(audio_files):
    scene_num = extract_scene_number(filename)
    if scene_num is None:
        continue

    scene_id = extract_scene_id_str(filename)
    print(f"[{idx+1}/{len(audio_files)}] Processing {scene_id}")

    # 오디오 5개 구간 합치기
    merged_path = merge_audio(scene_num)

    # Whisper STT 수행
    try:
        result = model.transcribe(
            merged_path,
            language="ko",
            task="transcribe",
            fp16=True
        )
        text = result["text"].strip()
    except Exception as e:
        print("Whisper 오류:", e)
        text = ""

    # 광고성/불필요 문구 필터링
    forbidden_pattern = r"(감사합니다|다음 영상|자막 제공|광고|인터넷 방송국|만나요)"
    if re.search(forbidden_pattern, text):
        print("금지문구 포함 → 대사 없음 처리")
        text = ""

    # 최종 저장
    save_path = os.path.join(output_folder, f"stt_{scene_id}.txt")
    with open(save_path, "w", encoding="utf-8") as f:
        f.write(text)

print("전체 STT 저장 완료:", output_folder)