# Generate Transcript from Audio

This notebook allows you to process an audio file, split it into clips, and transcribe the content using Whisper.cpp. You can configure the settings using interactive widgets.

## Step 1: Setup and Configuration

Configure the input file, output directory, and other settings using interactive widgets.

In [None]:
import os
import ipywidgets as widgets
from IPython.display import display

# Default configuration values
# DEFAULT_INPUT_FILE = "../data//demo/demo.wav"
DEFAULT_INPUT_FILE = "/Volumes/MacHD/Ê¢µÂÖ¨Â≠ê/Ê¢µÂÖ¨Â≠ê„ÄêÁæéÂ•≥ËµÑÊ∫êÊî∂ÈõÜÂô®„Äë/Á¨¨ÂÖ≠ËäÇ„ÄêËØÜÂà´‚ÄúS‚ÄùÂ•≥„ÄëÂøÖÂê¨/Á¨¨ÂÖ≠ËäÇ„ÄêËØÜÂà´‚Äús‚ÄùÂ•≥„Äë„ÄêWWW.PUACP.COM„Äë.mp4"
DEFAULT_OUTPUT_DIR = "../data/output_clips"
DEFAULT_CLIP_DURATION_MIN = 1  # in minutes
DEFAULT_WHISPER_EXEC = "../whisper.cpp/build/bin/whisper-cli"
DEFAULT_WHISPER_MODEL = "../whisper.cpp/models/ggml-medium.bin"
DEFAULT_LANGUAGE = "zh"
DEFAULT_TRANSCRIPT_FILENAME = "transcription.txt"

# Widgets for configuration
input_file_widget = widgets.Text(
    value=DEFAULT_INPUT_FILE,
    placeholder='Enter input audio file path',
    description='Input File:',
    layout={'width': '500px'}
)

output_dir_widget = widgets.Text(
    value=DEFAULT_OUTPUT_DIR,
    placeholder='Enter output directory',
    description='Output Dir:',
    layout={'width': '500px'}
)

clip_duration_widget = widgets.IntSlider(
    value=DEFAULT_CLIP_DURATION_MIN,
    min=1,
    max=30,
    step=1,
    description='Clip Duration (min):',
    layout={'width': '500px'}
)

whisper_exec_widget = widgets.Text(
    value=DEFAULT_WHISPER_EXEC,
    placeholder='Enter Whisper.cpp executable path',
    description='Whisper Exec:',
    layout={'width': '500px'}
)

whisper_model_widget = widgets.Text(
    value=DEFAULT_WHISPER_MODEL,
    placeholder='Enter Whisper model path',
    description='Whisper Model:',
    layout={'width': '500px'}
)

language_widget = widgets.Dropdown(
    options=[('Chinese (zh)', 'zh'), ('English (en)', 'en')],
    value=DEFAULT_LANGUAGE,
    description='Language:',
    layout={'width': '500px'}
)

transcript_filename_widget = widgets.Text(
    value='',
    placeholder=f'Enter transcript filename (default: {DEFAULT_TRANSCRIPT_FILENAME})',
    description='Transcript File:',
    layout={'width': '500px'}
)

# Display widgets
display(input_file_widget)
display(output_dir_widget)
display(clip_duration_widget)
display(whisper_exec_widget)
display(whisper_model_widget)
display(language_widget)
display(transcript_filename_widget)

## Step 2: Import Audio Processing Functions

Import the necessary functions for processing audio files from `voice2transcripts.py`.

In [None]:
import os
import sys

# Add the scripts directory to the path so we can import the functions
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../scripts')))
from voice2transcripts import clear_output_folder, convert_to_wav, split_audio, transcribe_audio
from time_stamp_cleaner import clean_transcription, save_cleaned_transcription

## Step 3: Process Audio and Generate Transcript

Run the processing pipeline to convert, split, transcribe, and clean the audio file.

In [None]:
try:
    # Get values from widgets
    input_file = input_file_widget.value
    output_dir = output_dir_widget.value
    clip_duration_sec = clip_duration_widget.value * 60  # Convert minutes to seconds
    whisper_exec = whisper_exec_widget.value
    whisper_model = whisper_model_widget.value
    language = language_widget.value
    transcript_filename = transcript_filename_widget.value if transcript_filename_widget.value.strip() else DEFAULT_TRANSCRIPT_FILENAME

    # Check if input file exists
    if not os.path.exists(input_file):
        raise FileNotFoundError(f"‚ùå Êâæ‰∏çÂà∞Ëº∏ÂÖ•Èü≥Ë®äÊ™îÊ°àÔºö{input_file}")

    # Clear old files
    clear_output_folder(output_dir)

    # Execute the processing pipeline
    print("üöÄ ÈñãÂßãÈü≥Ë®äËôïÁêÜËàáËΩâÈåÑÊµÅÁ®ã...")
    wav_file = convert_to_wav(input_file, output_dir)
    clip_files = split_audio(wav_file, clip_duration_sec, output_dir)
    transcribe_audio(clip_files, output_dir, whisper_exec, whisper_model, language, transcript_filename)
    print(f"üéâ ËΩâÈåÑËôïÁêÜÂÆåÊàêÔºÅËΩâÈåÑÁµêÊûúÂ∑≤ÂÑ≤Â≠òËá≥ {os.path.join(output_dir, '../transcripts/' + transcript_filename)}")
except Exception as e:
    print(f"‚ùå ËôïÁêÜÈÅéÁ®ã‰∏≠ÁôºÁîüÈåØË™§Ôºö{e}")

## Step 4: Clean Transcription

Clean the transcription by removing per-sentence timestamps and formatting the content.

### Step 4.1: Configure Cleaned Transcript Filename

In [None]:
import ipywidgets as widgets
from IPython.display import display

# Widget for cleaned transcript filename
cleaned_transcript_filename_widget = widgets.Text(
    value='',
    placeholder='Enter cleaned transcript filename (default: clean_transcription.txt)',
    description='Cleaned Transcript:',
    layout={'width': '500px'}
)
display(cleaned_transcript_filename_widget)

### Step 4.2: Clean and Save Transcription

In [None]:
try:
    # Get the transcript file path based on the output directory structure
    transcript_dir = os.path.join(os.path.dirname(output_dir), 'transcripts')
    transcript_path = os.path.join(transcript_dir, transcript_filename)
    
    # Get the cleaned transcript filename from the widget
    cleaned_transcript_filename = cleaned_transcript_filename_widget.value if cleaned_transcript_filename_widget.value.strip() else "clean_transcription.txt"
    cleaned_transcript_path = os.path.join(transcript_dir, cleaned_transcript_filename)
    
    # Read, clean, and save the transcription
    print("üßπ ÈñãÂßãÊ∏ÖÁêÜËΩâÈåÑÁµêÊûú...")
    print(f"ÂòóË©¶ËÆÄÂèñËΩâÈåÑÊ™îÊ°àÔºö{transcript_path}")
    if os.path.exists(transcript_path):
        print(f"‚úÖ ÊâæÂà∞ËΩâÈåÑÊ™îÊ°àÔºåÈñãÂßãÊ∏ÖÁêÜ...")
        with open(transcript_path, 'r', encoding='utf-8') as f:
            text = f.read()
        cleaned_segments = clean_transcription(text)
        if cleaned_segments:
            save_cleaned_transcription(cleaned_segments, cleaned_transcript_path)
            print(f"üéâ Ê∏ÖÁêÜÂÆåÊàêÔºÅÊ∏ÖÁêÜÂæåÁöÑËΩâÈåÑÁµêÊûúÂ∑≤ÂÑ≤Â≠òËá≥ {cleaned_transcript_path}")
        else:
            print("‚ö†Ô∏è Ê≤íÊúâÊâæÂà∞ÊúâÊïàÁöÑËΩâÈåÑÂÖßÂÆπÔºåÁÑ°Ê≥ïÂÑ≤Â≠òÊ∏ÖÁêÜÂæåÁöÑÊ™îÊ°à„ÄÇ")
    else:
        print(f"‚ùå ËΩâÈåÑÊ™îÊ°à‰∏çÂ≠òÂú®Ôºö{transcript_path}")
except FileNotFoundError as e:
    print(f"‚ùå Êâæ‰∏çÂà∞Ê™îÊ°àÔºö{e}")
except IOError as e:
    print(f"‚ùå ËÆÄÂèñÊ™îÊ°àÊôÇÁôºÁîüÈåØË™§Ôºö{e}")
except Exception as e:
    print(f"‚ùå Ê∏ÖÁêÜËΩâÈåÑÈÅéÁ®ã‰∏≠ÁôºÁîüÊú™Áü•ÈåØË™§Ôºö{e}")

## Step 5: Convert Transcript to SRT Format

Convert the cleaned transcript into a continuous SRT file format for subtitles.

In [None]:
import re
import os

def time_to_srt_format(time_str):
    """Convert time format from HH:MM:SS to HH:MM:SS,000 for SRT."""
    if len(time_str.split(':')) == 2:  # Format is MM:SS
        time_str = '00:' + time_str
    return time_str.replace(':', ':') + ',000'

def convert_transcript_to_srt(transcript_path, srt_path):
    """Convert a transcript file with timestamps to SRT format."""
    try:
        with open(transcript_path, 'r', encoding='utf-8') as f:
            content = f.read()
        
        # Split content into segments based on timestamp headers
        segments = re.split(r'\[(\d{2}:\d{2}(?::\d{2})?) - (\d{2}:\d{2}(?::\d{2})?)\]\n', content)
        srt_content = []
        index = 1
        
        for i in range(1, len(segments), 3):
            start_time = time_to_srt_format(segments[i])
            end_time = time_to_srt_format(segments[i+1])
            text = segments[i+2].strip()
            if text:
                srt_content.append(f"{index}\n{start_time} --> {end_time}\n{text}\n")
                index += 1
        
        with open(srt_path, 'w', encoding='utf-8') as f:
            f.write('\n'.join(srt_content))
        print(f"üé¨ SRT file created successfully at: {srt_path}")
    except Exception as e:
        print(f"‚ùå Error converting transcript to SRT: {e}")

# Define paths for transcript and SRT output
transcript_dir = os.path.join(os.path.dirname(output_dir), 'transcripts')
transcript_path = os.path.join(transcript_dir, transcript_filename)
srt_path = os.path.join(transcript_dir, 'subtitles.srt')

# Convert transcript to SRT
if os.path.exists(transcript_path):
    convert_transcript_to_srt(transcript_path, srt_path)
else:
    print(f"‚ùå Transcript file does not exist: {transcript_path}")