# Generate Transcripts from Folder Audio/Video Files

This notebook allows you to process all audio or video files in a specified folder, split them into clips, and transcribe the content using Whisper.cpp. You can configure the settings using interactive widgets. Transcripts will be named using the convention `foldername_filename`.

## Step 1: Setup and Configuration

Configure the input folder, output directory, and other settings using interactive widgets.

In [None]:
import os
import ipywidgets as widgets
from IPython.display import display

# Default configuration values
DEFAULT_INPUT_FOLDER = "../data/demo"
# DEFAULT_INPUT_FOLDER = "/Users/macmini/Downloads/梵公子《YY备忘录》"
DEFAULT_OUTPUT_DIR = "../data/output_clips"
DEFAULT_CLIP_DURATION_MIN = 1  # in minutes
DEFAULT_WHISPER_EXEC = "../whisper.cpp/build/bin/whisper-cli"
DEFAULT_WHISPER_MODEL = "../whisper.cpp/models/ggml-medium.bin"
DEFAULT_LANGUAGE = "zh"
DEFAULT_WORKERS = 3
DEFAULT_USE_THREADS = False
DEFAULT_REST_TIME = 180  # in seconds, default rest time between transcriptions

# Widgets for configuration
input_folder_widget = widgets.Text(
    value=DEFAULT_INPUT_FOLDER,
    placeholder='Enter input folder path',
    description='Input Folder:',
    layout={'width': '500px'}
)

output_dir_widget = widgets.Text(
    value=DEFAULT_OUTPUT_DIR,
    placeholder='Enter output directory',
    description='Output Dir:',
    layout={'width': '500px'}
)

clip_duration_widget = widgets.IntSlider(
    value=DEFAULT_CLIP_DURATION_MIN,
    min=1,
    max=30,
    step=1,
    description='Clip Duration (min):',
    layout={'width': '500px'}
)

whisper_exec_widget = widgets.Text(
    value=DEFAULT_WHISPER_EXEC,
    placeholder='Enter Whisper.cpp executable path',
    description='Whisper Exec:',
    layout={'width': '500px'}
)

whisper_model_widget = widgets.Text(
    value=DEFAULT_WHISPER_MODEL,
    placeholder='Enter Whisper model path',
    description='Whisper Model:',
    layout={'width': '500px'}
)

language_widget = widgets.Dropdown(
    options=[('Chinese (zh)', 'zh'), ('English (en)', 'en')],
    value=DEFAULT_LANGUAGE,
    description='Language:',
    layout={'width': '500px'}
)

workers_widget = widgets.IntSlider(
    value=DEFAULT_WORKERS,
    min=1,
    max=8,
    step=1,
    description='Workers:',
    layout={'width': '500px'}
)

use_threads_widget = widgets.Dropdown(
    options=[('Multithreading', True), ('Multiprocessing', False)],
    value=DEFAULT_USE_THREADS,
    description='Parallel Method:',
    layout={'width': '500px'}
)

rest_time_widget = widgets.IntSlider(
    value=DEFAULT_REST_TIME,
    min=0,
    max=600,
    step=10,
    description='Rest Time (sec):',
    layout={'width': '500px'}
)

# Display widgets
display(input_folder_widget)
display(output_dir_widget)
display(clip_duration_widget)
display(whisper_exec_widget)
display(whisper_model_widget)
display(language_widget)
display(workers_widget)
display(use_threads_widget)
display(rest_time_widget)

## Step 2: Import Audio Processing Functions

Import the necessary functions for processing audio files from `voice2transcripts.py`.

In [None]:
import os
import sys

# Add the scripts directory to the path so we can import the functions
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../scripts')))
from voice2transcripts import clear_output_folder, convert_to_wav, split_audio, transcribe_audio
from time_stamp_cleaner import clean_transcription, save_cleaned_transcription, convert_to_srt

## Step 3: Process Audio/Video Files and Generate Transcripts

Run the processing pipeline to convert, split, transcribe, and clean each audio/video file in the specified folder.

In [None]:
import time
from IPython.display import clear_output
import ipywidgets as widgets

try:
    # Get values from widgets
    input_folder = input_folder_widget.value
    output_dir = output_dir_widget.value
    clip_duration_sec = clip_duration_widget.value * 60  # Convert minutes to seconds
    whisper_exec = whisper_exec_widget.value
    whisper_model = whisper_model_widget.value
    language = language_widget.value
    workers = workers_widget.value
    use_threads = use_threads_widget.value
    rest_time = rest_time_widget.value

    # Check if input folder exists
    if not os.path.exists(input_folder):
        raise FileNotFoundError(f"❌ 找不到輸入資料夾：{input_folder}")

    # Get list of audio/video files in the folder
    supported_extensions = ('.mp3', '.mp4', '.m4a', '.wav', '.flac', '.ogg', '.webm', '.mkv')
    input_files = [f for f in os.listdir(input_folder) if f.lower().endswith(supported_extensions)]
    
    if not input_files:
        raise FileNotFoundError(f"❌ 在 {input_folder} 中找不到任何音訊或影片檔案")

    print(f"📁 找到 {len(input_files)} 個檔案進行處理")
    
    # Progress bar widget
    progress_bar = widgets.FloatProgress(
        value=0.0,
        min=0.0,
        max=100.0,
        description='Progress:',
        bar_style='info',
        orientation='horizontal',
        layout={'width': '500px'}
    )
    progress_label = widgets.Label(value="0.0%")
    display(progress_bar)
    display(progress_label)
    
    # Process each file
    folder_name = os.path.basename(input_folder)
    for idx, input_file in enumerate(input_files, 1):
        input_file_path = os.path.join(input_folder, input_file)
        base_name = os.path.splitext(input_file)[0]
        transcript_filename = f"{folder_name}_{base_name}.txt"
        print(f"🚀 處理檔案 {idx}/{len(input_files)}: {input_file}...")
        
        # Clear old files for this iteration
        clear_output_folder(output_dir)

        # Execute the processing pipeline
        wav_file = convert_to_wav(input_file_path, output_dir)
        clip_files = split_audio(wav_file, clip_duration_sec, output_dir)
        total_clips = len(clip_files)
        
        # Update progress for each clip (simulated since we can't hook directly into transcribe_audio)
        transcribe_audio(clip_files, output_dir, whisper_exec, whisper_model, language, transcript_filename, workers=workers, use_threads=use_threads)
        
        # Update progress bar to full after transcription
        progress_bar.value = 100.0
        progress_label.value = "100.0%"
        print(f"✅ 檔案 {idx}/{len(input_files)} 處理完成！轉錄結果已儲存至 {os.path.join(output_dir, '../transcripts/' + transcript_filename)}")
        
        # Rest between transcriptions to avoid overheating, but not after the last file
        if idx < len(input_files) and rest_time > 0:
            print(f"⏳ 休息 {rest_time} 秒以避免過熱...")
            time.sleep(rest_time)
        
        # Reset progress bar for the next file
        progress_bar.value = 0.0
        progress_label.value = "0.0%"
    
    print(f"🎉 所有檔案處理完成！")
except Exception as e:
    print(f"❌ 處理過程中發生錯誤：{e}")

## Step 4: Clean Transcriptions

Clean the transcriptions by removing per-sentence timestamps and formatting the content for all generated transcript files.

### Step 4.1: Configure Cleaned Transcript Filenames

In [None]:
try:
    # Get the transcript directory based on the output directory structure
    transcript_dir = os.path.join(os.path.dirname(output_dir), 'transcripts')
    
    if not os.path.exists(transcript_dir):
        raise FileNotFoundError(f"❌ 找不到轉錄資料夾：{transcript_dir}")
    
    # Get list of transcript files
    transcript_files = [f for f in os.listdir(transcript_dir) if f.endswith('.txt') and not f.startswith('clean_')]
    
    if not transcript_files:
        raise FileNotFoundError(f"❌ 在 {transcript_dir} 中找不到任何轉錄檔案")
    
    print(f"🧹 找到 {len(transcript_files)} 個轉錄檔案進行清理")
    
    # Dictionary to store widgets for cleaned filenames
    cleaned_filename_widgets = {}
    for transcript_file in transcript_files:
        default_cleaned_name = f"clean_{transcript_file}"
        widget = widgets.Text(
            value=default_cleaned_name,
            placeholder=f'Enter cleaned filename (default: {default_cleaned_name})',
            description=f'Cleaned {transcript_file}:',
            layout={'width': '500px'}
        )
        cleaned_filename_widgets[transcript_file] = widget
        display(widget)
except FileNotFoundError as e:
    print(f"❌ 找不到檔案：{e}")
except Exception as e:
    print(f"❌ 配置清理檔案名稱時發生錯誤：{e}")

### Step 4.2: Clean and Save Transcriptions

In [None]:
try:
    # Get the transcript directory based on the output directory structure
    transcript_dir = os.path.join(os.path.dirname(output_dir), 'transcripts')
    
    if not os.path.exists(transcript_dir):
        raise FileNotFoundError(f"❌ 找不到轉錄資料夾：{transcript_dir}")
    
    # Get list of transcript files
    transcript_files = [f for f in os.listdir(transcript_dir) if f.endswith('.txt') and not f.startswith('clean_')]
    
    if not transcript_files:
        raise FileNotFoundError(f"❌ 在 {transcript_dir} 中找不到任何轉錄檔案")
    
    print(f"🧹 開始清理 {len(transcript_files)} 個轉錄檔案")
    
    # Store cleaned segments for SRT conversion
    global cleaned_segments_dict
    cleaned_segments_dict = {}
    
    for idx, transcript_file in enumerate(transcript_files, 1):
        transcript_path = os.path.join(transcript_dir, transcript_file)
        cleaned_filename = cleaned_filename_widgets[transcript_file].value if transcript_file in cleaned_filename_widgets else f"clean_{transcript_file}"
        cleaned_transcript_path = os.path.join(transcript_dir, cleaned_filename)
        
        print(f"清理檔案 {idx}/{len(transcript_files)}: {transcript_file}...")
        if os.path.exists(transcript_path):
            with open(transcript_path, 'r', encoding='utf-8') as f:
                text = f.read()
            cleaned_segments = clean_transcription(text)
            cleaned_segments_dict[transcript_file] = cleaned_segments
            if cleaned_segments:
                save_cleaned_transcription(cleaned_segments, cleaned_transcript_path)
                print(f"✅ 清理完成！清理後的轉錄結果已儲存至 {cleaned_transcript_path}")
            else:
                print(f"⚠️ 沒有找到有效的轉錄內容，無法儲存清理後的檔案: {transcript_file}")
        else:
            print(f"❌ 轉錄檔案不存在：{transcript_path}")
    
    print(f"🎉 所有轉錄檔案清理完成！")
except FileNotFoundError as e:
    print(f"❌ 找不到檔案：{e}")
except IOError as e:
    print(f"❌ 讀取檔案時發生錯誤：{e}")
except Exception as e:
    print(f"❌ 清理轉錄過程中發生未知錯誤：{e}")

## Step 5: Convert Transcriptions to SRT Format

Convert the cleaned transcriptions to SRT subtitle format for all generated transcript files.

### Step 5.1: Configure SRT Filenames

In [None]:
try:
    # Get the transcript directory based on the output directory structure
    transcript_dir = os.path.join(os.path.dirname(output_dir), 'transcripts')
    
    if not os.path.exists(transcript_dir):
        raise FileNotFoundError(f"❌ 找不到轉錄資料夾：{transcript_dir}")
    
    # Get list of transcript files
    transcript_files = [f for f in os.listdir(transcript_dir) if f.endswith('.txt') and not f.startswith('clean_')]
    
    if not transcript_files:
        raise FileNotFoundError(f"❌ 在 {transcript_dir} 中找不到任何轉錄檔案")
    
    print(f"📝 找到 {len(transcript_files)} 個轉錄檔案進行 SRT 轉換")
    
    # Dictionary to store widgets for SRT filenames
    srt_filename_widgets = {}
    for transcript_file in transcript_files:
        default_srt_name = f"{os.path.splitext(transcript_file)[0]}.srt"
        widget = widgets.Text(
            value=default_srt_name,
            placeholder=f'Enter SRT filename (default: {default_srt_name})',
            description=f'SRT {transcript_file}:',
            layout={'width': '500px'}
        )
        srt_filename_widgets[transcript_file] = widget
        display(widget)
except FileNotFoundError as e:
    print(f"❌ 找不到檔案：{e}")
except Exception as e:
    print(f"❌ 配置 SRT 檔案名稱時發生錯誤：{e}")

### Step 5.2: Generate SRT Files

In [None]:
try:
    # Get the transcript directory based on the output directory structure
    transcript_dir = os.path.join(os.path.dirname(output_dir), 'transcripts')
    
    if not os.path.exists(transcript_dir):
        raise FileNotFoundError(f"❌ 找不到轉錄資料夾：{transcript_dir}")
    
    # Get list of transcript files
    transcript_files = [f for f in os.listdir(transcript_dir) if f.endswith('.txt') and not f.startswith('clean_')]
    
    if not transcript_files:
        raise FileNotFoundError(f"❌ 在 {transcript_dir} 中找不到任何轉錄檔案")
    
    print(f"📝 開始轉換 {len(transcript_files)} 個檔案為 SRT 格式")
    
    for idx, transcript_file in enumerate(transcript_files, 1):
        srt_filename = srt_filename_widgets[transcript_file].value if transcript_file in srt_filename_widgets else f"{os.path.splitext(transcript_file)[0]}.srt"
        srt_path = os.path.join(transcript_dir, srt_filename)
        
        print(f"轉換檔案 {idx}/{len(transcript_files)}: {transcript_file}...")
        cleaned_segments = cleaned_segments_dict.get(transcript_file, [])
        if cleaned_segments:
            convert_to_srt(cleaned_segments, srt_path)
            print(f"✅ 轉換完成！SRT 字幕檔已儲存至 {srt_path}")
        else:
            print(f"⚠️ 沒有找到有效的轉錄內容，無法轉換為 SRT 格式: {transcript_file}")
    
    print(f"🎉 所有轉錄檔案轉換為 SRT 格式完成！")
except FileNotFoundError as e:
    print(f"❌ 找不到檔案：{e}")
except IOError as e:
    print(f"❌ 讀取檔案時發生錯誤：{e}")
except Exception as e:
    print(f"❌ 轉換 SRT 格式過程中發生未知錯誤：{e}")