# Generate Transcript from Audio

This notebook allows you to process an audio file, split it into clips, and transcribe the content using Whisper.cpp. You can configure the settings using interactive widgets.

## Step 1: Setup and Configuration

Configure the input file, output directory, and other settings using interactive widgets.

In [2]:
import os
import ipywidgets as widgets
from IPython.display import display

# Default configuration values
DEFAULT_INPUT_FILE = "./data/audio/full_audio.m4a"
DEFAULT_OUTPUT_DIR = "./data/output_clips"
DEFAULT_CLIP_DURATION_MIN = 10  # in minutes
DEFAULT_WHISPER_EXEC = "./whisper.cpp/build/bin/whisper-cli"
DEFAULT_WHISPER_MODEL = "whisper.cpp/models/ggml-medium.bin"
DEFAULT_LANGUAGE = "zh"
DEFAULT_TRANSCRIPT_FILENAME = "transcription.txt"

# Widgets for configuration
input_file_widget = widgets.Text(
    value=DEFAULT_INPUT_FILE,
    placeholder='Enter input audio file path',
    description='Input File:',
    layout={'width': '500px'}
)

output_dir_widget = widgets.Text(
    value=DEFAULT_OUTPUT_DIR,
    placeholder='Enter output directory',
    description='Output Dir:',
    layout={'width': '500px'}
)

clip_duration_widget = widgets.IntSlider(
    value=DEFAULT_CLIP_DURATION_MIN,
    min=1,
    max=30,
    step=1,
    description='Clip Duration (min):',
    layout={'width': '500px'}
)

whisper_exec_widget = widgets.Text(
    value=DEFAULT_WHISPER_EXEC,
    placeholder='Enter Whisper.cpp executable path',
    description='Whisper Exec:',
    layout={'width': '500px'}
)

whisper_model_widget = widgets.Text(
    value=DEFAULT_WHISPER_MODEL,
    placeholder='Enter Whisper model path',
    description='Whisper Model:',
    layout={'width': '500px'}
)

language_widget = widgets.Dropdown(
    options=[('Chinese (zh)', 'zh'), ('English (en)', 'en')],
    value=DEFAULT_LANGUAGE,
    description='Language:',
    layout={'width': '500px'}
)

transcript_filename_widget = widgets.Text(
    value='',
    placeholder=f'Enter transcript filename (default: {DEFAULT_TRANSCRIPT_FILENAME})',
    description='Transcript File:',
    layout={'width': '500px'}
)

# Display widgets
display(input_file_widget)
display(output_dir_widget)
display(clip_duration_widget)
display(whisper_exec_widget)
display(whisper_model_widget)
display(language_widget)
display(transcript_filename_widget)

Text(value='./data/audio/full_audio.m4a', description='Input File:', layout=Layout(width='500px'), placeholder…

Text(value='./data/output_clips', description='Output Dir:', layout=Layout(width='500px'), placeholder='Enter …

IntSlider(value=10, description='Clip Duration (min):', layout=Layout(width='500px'), max=30, min=1)

Text(value='./whisper.cpp/build/bin/whisper-cli', description='Whisper Exec:', layout=Layout(width='500px'), p…

Text(value='whisper.cpp/models/ggml-medium.bin', description='Whisper Model:', layout=Layout(width='500px'), p…

Dropdown(description='Language:', layout=Layout(width='500px'), options=(('Chinese (zh)', 'zh'), ('English (en…

Text(value='', description='Transcript File:', layout=Layout(width='500px'), placeholder='Enter transcript fil…

## Step 2: Import Audio Processing Functions

Import the necessary functions for processing audio files from `voice2transcripts.py`.

In [3]:
import os
import sys

# Add the scripts directory to the path so we can import the functions
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'scripts')))
from voice2transcripts import clear_output_folder, convert_to_wav, split_audio, transcribe_audio

NameError: name '__file__' is not defined

## Step 3: Process Audio and Generate Transcript

Run the processing pipeline to convert, split, and transcribe the audio file.

In [None]:
try:
    # Get values from widgets
    input_file = input_file_widget.value
    output_dir = output_dir_widget.value
    clip_duration_sec = clip_duration_widget.value * 60  # Convert minutes to seconds
    whisper_exec = whisper_exec_widget.value
    whisper_model = whisper_model_widget.value
    language = language_widget.value
    transcript_filename = transcript_filename_widget.value if transcript_filename_widget.value.strip() else DEFAULT_TRANSCRIPT_FILENAME

    # Check if input file exists
    if not os.path.exists(input_file):
        raise FileNotFoundError(f"❌ 找不到輸入音訊檔案：{input_file}")

    # Clear old files
    clear_output_folder(output_dir)

    # Execute the processing pipeline
    print("🚀 開始音訊處理與轉錄流程...")
    wav_file = convert_to_wav(input_file, output_dir)
    clip_files = split_audio(wav_file, clip_duration_sec, output_dir)
    transcribe_audio(clip_files, output_dir, whisper_exec, whisper_model, language, transcript_filename)
    print(f"🎉 全部處理完成！轉錄結果已儲存至 {os.path.join(output_dir, '../transcripts/' + transcript_filename)}")
except Exception as e:
    print(f"❌ 處理過程中發生錯誤：{e}")