[Reference](https://blog.gopenai.com/%EF%B8%8F-breaking-docling-unlocks-asr-automatic-speech-recognition-power-029a0c78897e)

```
python3 -m venv venv
source venv/bin/activate

pip install --upgrade pip

pip install docling
pip install openai-whisper
```

```
brew install ffmpeg
```

In [1]:
from pathlib import Path

from docling_core.types.doc import DoclingDocument

from docling.datamodel import asr_model_specs
from docling.datamodel.base_models import ConversionStatus, InputFormat
from docling.datamodel.document import ConversionResult
from docling.datamodel.pipeline_options import AsrPipelineOptions
from docling.document_converter import AudioFormatOption, DocumentConverter
from docling.pipeline.asr_pipeline import AsrPipeline


def get_asr_converter():
    """Create a DocumentConverter configured for ASR with whisper_turbo model."""
    pipeline_options = AsrPipelineOptions()
    pipeline_options.asr_options = asr_model_specs.WHISPER_TURBO

    converter = DocumentConverter(
        format_options={
            InputFormat.AUDIO: AudioFormatOption(
                pipeline_cls=AsrPipeline,
                pipeline_options=pipeline_options,
            )
        }
    )
    return converter


def asr_pipeline_conversion(audio_path: Path) -> DoclingDocument:
    """ASR pipeline conversion using whisper_turbo"""
    # Check if the test audio file exists
    assert audio_path.exists(), f"Test audio file not found: {audio_path}"

    converter = get_asr_converter()

    # Convert the audio file
    result: ConversionResult = converter.convert(audio_path)

    # Verify conversion was successful
    assert result.status == ConversionStatus.SUCCESS, (
        f"Conversion failed with status: {result.status}"
    )

    # --- Debugging Lines (can be removed if not needed, but useful for inspection) ---
    print("\n--- DoclingDocument Object Content ---")
    print(result.document)
    print("\n--- DoclingDocument Plain Text (from .texts attribute) ---")
    if hasattr(result.document, 'texts') and isinstance(result.document.texts, list):
        if result.document.texts:
            # Extract the actual text from TextItem objects
            text_segments = [item.text for item in result.document.texts]
            print(" ".join(text_segments)) # Concatenate all text segments for display
        else:
            print("[No text segments found]")
    else:
        print(f"DoclingDocument does not have a 'texts' attribute or it's not a list.")
    print("------------------------------------\n")
    # --- End Debugging Lines ---

    return result.document


if __name__ == "__main__":
    audio_path = Path("./input/sample_10s.mp3")
    output_markdown_file = Path("output_asr.md") # Define the output file path

    print(f"Attempting ASR conversion for: {audio_path}")
    doc = asr_pipeline_conversion(audio_path=audio_path)

    # Get the markdown content
    markdown_content = doc.export_to_markdown()

    # Write the markdown content to the specified file
    try:
        with open(output_markdown_file, "w", encoding="utf-8") as f:
            f.write(markdown_content)
        print(f"\nMarkdown content successfully written to: {output_markdown_file.absolute()}")
        print("\n--- Content of output_asr.md ---")
        print(markdown_content)
        print("--------------------------------")
    except IOError as e:
        print(f"\nError writing markdown to file {output_markdown_file}: {e}")
