<a href="https://colab.research.google.com/github/ShreejayShakya28/ASR-LLM-Pipeline/blob/main/Gradio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install nemo_toolkit['asr'] gradio soundfile librosa

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

In [None]:
import torch
import nemo.collections.asr as nemo_asr

torch.set_grad_enabled(False)

device = "cuda" if torch.cuda.is_available() else "cpu"

MODEL_PATH = "/content/drive/MyDrive/fastconformer.nemo"

asr_model = nemo_asr.models.EncDecCTCModelBPE.restore_from(
    restore_path=MODEL_PATH,
    map_location=device
)

asr_model.eval()

print("Model loaded:", type(asr_model))
print("Encoder:", asr_model.cfg.encoder._target_)
print("Tokenizer vocab size:", asr_model.tokenizer.vocab_size)

In [None]:
import numpy as np
import soundfile as sf
import tempfile
import os
import torch

torch.set_grad_enabled(False)

@torch.inference_mode()
def transcribe(audio):
    if audio is None:
        return ""

    sample_rate, audio_np = audio

    # mono
    if audio_np.ndim > 1:
        audio_np = audio_np.mean(axis=1)

    # save wav
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
        sf.write(f.name, audio_np, sample_rate)
        wav_path = f.name

    # === ASR inference ===
    result = asr_model.transcribe([wav_path])

    os.remove(wav_path)

    # Case 1: High-level API returns string
    if isinstance(result[0], str):
        return result[0]

    # Case 2: Low-level API returns Hypothesis
    return result[0].text

In [None]:
import gradio as gr

iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(
        sources=["microphone", "upload"],
        type="numpy",
        label="Speak or upload audio"
    ),
    outputs=gr.Textbox(label="Transcription"),
    title="Fast Conformer",
    description="Record or upload English speech."
)

iface.launch(share=True)