<a href="https://colab.research.google.com/github/Balaji-731/AI_Internship/blob/main/Live_speech_to_text.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!apt-get -y install ffmpeg
!pip install SpeechRecognition pydub

from IPython.display import display, HTML
from google.colab import output, files
import speech_recognition as sr
from pydub import AudioSegment
import base64, io

recorded_audio = None  # will hold base64 audio from JS

# Callback from JavaScript
def save_audio(b64_audio):
    global recorded_audio
    recorded_audio = b64_audio

output.register_callback('notebook.save_audio', save_audio)

# -------------------------------------------------------
# HTML + JavaScript Buttons (Start / Stop)
# -------------------------------------------------------
html_code = """
<div>
  <button id="startBtn" style="padding:10px 20px; font-size:18px;">Start Recording</button>
  <button id="stopBtn" style="padding:10px 20px; font-size:18px; margin-left:10px;" disabled>Stop Recording</button>
  <p id="status" style="font-size:18px;">Status: Not Recording</p>
</div>

<script>
let mediaRecorder;
let audioChunks = [];

document.getElementById("startBtn").onclick = async () => {
  const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
  audioChunks = [];

  mediaRecorder = new MediaRecorder(stream, { mimeType: "audio/webm" });
  mediaRecorder.ondataavailable = e => audioChunks.push(e.data);

  mediaRecorder.onstop = async () => {
    let blob = new Blob(audioChunks, { type: "audio/webm" });
    let reader = new FileReader();
    reader.readAsDataURL(blob);
    reader.onloadend = () => {
      google.colab.kernel.invokeFunction(
        'notebook.save_audio',
        [reader.result.split(',')[1]],
        {}
      );
    };
  };

  mediaRecorder.start();
  document.getElementById("status").innerText = "Status: Recording...";
  document.getElementById("startBtn").disabled = true;
  document.getElementById("stopBtn").disabled = false;
};

document.getElementById("stopBtn").onclick = () => {
  mediaRecorder.stop();
  document.getElementById("status").innerText = "Status: Processing...";
  document.getElementById("startBtn").disabled = false;
  document.getElementById("stopBtn").disabled = true;
};
</script>
"""

display(HTML(html_code))


# -------------------------------------------------------
# PROCESS → SAVE AUDIO + SAVE TEXT
# -------------------------------------------------------
def process_audio_and_save():
    global recorded_audio

    if recorded_audio is None:
        print("No audio recorded yet! Click Start, speak, then Stop.")
        return

    # 1) BASE64 → bytes (webm)
    audio_bytes = base64.b64decode(recorded_audio)
    webm_io = io.BytesIO(audio_bytes)

    # 2) Convert WebM → WAV using pydub
    audio_seg = AudioSegment.from_file(webm_io, format="webm")
    wav_path = "recorded_audio.wav"
    audio_seg.export(wav_path, format="wav")

    # 3) SpeechRecognition on the WAV file
    recognizer = sr.Recognizer()
    with sr.AudioFile(wav_path) as source:
        audio_data = recognizer.record(source)

    try:
        text = recognizer.recognize_google(audio_data)  # ✅ correct variable
    except Exception as e:
        text = f"[Speech recognition error: {e}]"

    # 4) Save transcript to text file
    txt_path = "transcript.txt"
    with open(txt_path, "w", encoding="utf-8") as f:
        f.write(text)

    # 5) Show + download
    print("You said:")
    print(text)
    print("\n Saved files:")
    print(f"- {wav_path}")
    print(f"- {txt_path}")

    # Trigger downloads
    files.download(wav_path)
    files.download(txt_path)

print("Click Start / Stop above, then run: process_audio_and_save()")


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 57 not upgraded.


Click Start / Stop above, then run: process_audio_and_save()


In [None]:
process_audio_and_save()

You said:
[Speech recognition error: ]

 Saved files:
- recorded_audio.wav
- transcript.txt


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>