In [4]:
!pip install SpeechRecognition pydub


Collecting SpeechRecognition
  Downloading speechrecognition-3.14.2-py3-none-any.whl.metadata (30 kB)
Downloading speechrecognition-3.14.2-py3-none-any.whl (32.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m52.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.14.2


In [None]:
# ✅ Step 1: Install required libraries
!pip install SpeechRecognition pydub

import speech_recognition as sr
import IPython.display as display
from google.colab import output

# ✅ Step 2: JavaScript to Record Audio
record_js = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time));

async function recordAudio() {
    const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
    const mediaRecorder = new MediaRecorder(stream);
    const audioChunks = [];

    mediaRecorder.ondataavailable = event => {
        audioChunks.push(event.data);
    };

    mediaRecorder.start();
    await sleep(5000);  // Record for 5 seconds
    mediaRecorder.stop();

    await new Promise(resolve => mediaRecorder.onstop = resolve);

    const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
    const reader = new FileReader();
    reader.readAsDataURL(audioBlob);
    reader.onloadend = () => {
        const base64data = reader.result.split(',')[1];
        google.colab.kernel.invokeFunction('notebook.upload_audio', [base64data], {});
    };
}

recordAudio();
"""

# Run JavaScript for recording
display.display(display.Javascript(record_js))

# ✅ Step 3: Receive Recorded Audio and Save it
def upload_audio(data):
    with open("recorded_audio.wav", "wb") as file:
        file.write(data.decode('base64'))
    print("\n✅ Recording saved as recorded_audio.wav")

output.register_callback('notebook.upload_audio', upload_audio)




<IPython.core.display.Javascript object>

In [None]:
from google.colab import files

uploaded = files.upload()
audio_file = list(uploaded.keys())[0]  # Get the filename
print(f"✅ Uploaded file: {audio_file}")


Saving sample-1.wav to sample-1.wav
✅ Uploaded file: sample-1.wav


In [None]:
import speech_recognition as sr

# Initialize recognizer
recognizer = sr.Recognizer()

# Load the uploaded audio file
with sr.AudioFile(audio_file) as source:
    print("🔄 Processing audio...")
    audio = recognizer.record(source)

# Try to recognize speech
try:
    text = recognizer.recognize_google(audio)
    print("\n🔹 Transcribed Text: ", text)
except sr.UnknownValueError:
    print("\n❌ Could not understand the audio")
except sr.RequestError:
    print("\n❌ Could not connect to Google Speech Recognition API")



🔄 Processing audio...

🔹 Transcribed Text:  what


In [2]:
!pip install gradio


Collecting gradio
  Downloading gradio-5.23.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
[0m  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.

In [5]:
import gradio as gr
import speech_recognition as sr

def transcribe_audio(audio_file):
    recognizer = sr.Recognizer()

    try:
        with sr.AudioFile(audio_file) as source:
            audio = recognizer.record(source)
            text = recognizer.recognize_google(audio)
            return text
    except sr.UnknownValueError:
        return "❌ Could not understand the audio."
    except sr.RequestError:
        return "❌ Could not connect to Google Speech Recognition API."

iface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(type="filepath"),
    outputs="text",
    title="Speech-to-Text Transcription",
    description="Upload an audio file and get the transcribed text using Google Speech Recognition."
)

iface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://879e4fa53722e4327a.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


