## **Automatic Speech Recognition (ASR) with Distil-Whisper**

> A web interface for speech-to-text transcription using distil-whisper/distil-small.en

In [None]:
# Suppress warnings for cleaner output
warnings.filterwarnings("ignore")

> If you would like to run this code on your own machine, you can install the following:

In [None]:
# !pip install transformers
# !pip install gradio

### **Importing Libraries**

In [None]:
import os
import gradio as gr
from transformers import pipeline

### **Model Initializing**
> Creates an ASR pipeline with two key parameters:

> task="automatic-speech-recognition" - Specifies this is for converting speech to text

> model="distil-whisper/distil-small.en" - Uses a distilled version of OpenAI's Whisper model optimized for English

In [None]:
from transformers import pipeline

asr = pipeline(task="automatic-speech-recognition",     # Specifies this is for converting speech to text
               model="distil-whisper/distil-small.en")  # Uses a distilled version of OpenAI's Whisper model optimized for English

### **Transcribing The Audio**
> Transcribe audio file to text using speech recognition model.

> Arguments: filepath: Path to audio file (e.g., .mp3, .wav)

> Returns: string: Transcribed text or empty string if error occurs

In [None]:
def transcribe_speech(filepath):  
    
    if filepath is None:            # Check if audio file was provided
        gr.Warning("No audio found, please retry.")   
        return ""                   # Return empty string on error
        
    output = asr(filepath,          # Gets the transcribed text from the model's output.
      batch_size=8,                 # Process 8 chunks simultaneously (GPU optimization)
      chunk_length_s=30,            # Split long files into 30-second segments
      max_new_tokens=256)           # Limit output to ~200-300 words per chunk
    return output["text"]           #  Returns the transcribed text as a string

### **Build a shareable app with Gradio**
> This code creates a Gradio web interface for live microphone speech transcription.

#### **Create Gradio interface**

In [None]:
demo = gr.Blocks(title="Transcribing Chatbot", theme="soft")

# Build the interface

with demo:
    with gr.Row():
        with gr.Column():
            gr.Markdown("""
            <div style="text-align: center;">
                <h1 style="font-size: 2.5em; color: #4a6fa5; margin-bottom: 10px;">üéôÔ∏è UI Transcribing Chatbot</h1>
                <h3 style="color: #666; font-weight: normal;">Convert speech to text instantly</h3>
                <hr style="margin: 20px 0; border: 1px solid #e0e0e0;">
            </div>
            """)
    
    # Tabs for different input methods

    with gr.Tabs():
        # Tab 1: Microphone
        with gr.Tab("üé§ Microphone"):
            audio_input = gr.Audio(sources=["microphone"], label="Record")
            transcribe_btn = gr.Button("Transcribe", variant="primary")
            output = gr.Textbox(label="Transcript", lines=5)
            
            transcribe_btn.click(
                fn=transcribe_speech,
                inputs=audio_input,
                outputs=output
            )
        
        # Tab 2: File Upload
        with gr.Tab("üìÅ File Upload"):
            file_input = gr.File(label="Upload audio file", file_types=["audio"])
            transcribe_file_btn = gr.Button("Transcribe", variant="primary")
            file_output = gr.Textbox(label="Transcript", lines=5)
            
            transcribe_file_btn.click(
                fn=transcribe_speech,
                inputs=file_input,
                outputs=file_output
            )

# Launch the app
if __name__ == "__main__":
    demo.launch(debug=True)