In [5]:
pip install ipywidgets


Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install sounddevice soundfile requests


Note: you may need to restart the kernel to use updated packages.


In [None]:
import sounddevice as sd
import soundfile as sf
import requests
import ipywidgets as widgets
from IPython.display import display, clear_output
import threading
import time
import random
import string

# API info
API_URL = "https://api-inference.huggingface.co/models/openai/whisper-large-v3"
API_TOKEN = "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"

# Global control variable
recording_flag = False

# Headers for Whisper
headers = {
    "Authorization": f"Bearer {API_TOKEN}",
    "Content-Type": "audio/wav"
}

# Output display area
output_area = widgets.Output()

# Duration slider
duration_input = widgets.IntSlider(value=5, min=2, max=15, step=1, description='Duration (sec):')

# Buttons
record_btn = widgets.Button(description="🎙️ Record Fixed Duration", button_style='primary')
manual_btn = widgets.Button(description="🔁 Record with Stop Breaks", button_style='info')
stop_btn = widgets.Button(description="⛔ Stop Recording", button_style='danger', disabled=True)

# Helper functions
def random_filename():
    return "recording_" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=6)) + ".wav"

def transcribe_audio_file(file_path):
    with open(file_path, "rb") as f:
        audio_bytes = f.read()
    response = requests.post(API_URL, headers=headers, data=audio_bytes)
    if response.status_code == 200:
        return response.json().get("text", "")
    else:
        return f"[ERROR] {response.status_code}: {response.text}"

def record_audio(duration):
    output_area.append_stdout(f"🎤 Recording for {duration} seconds...\n")
    fs = 16000
    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1, dtype='int16')
    sd.wait()
    filename = random_filename()
    sf.write(filename, audio, fs)
    output_area.append_stdout(f"✅ Recording saved as {filename}\n")
    return filename

# Button callbacks
def record_fixed_duration(btn=None):
    output_area.clear_output()
    duration = duration_input.value
    filename = record_audio(duration)
    transcript = transcribe_audio_file(filename)
    output_area.append_stdout(f"📝 Transcription:\n{transcript}\n")

def start_manual_recording(btn=None):
    global recording_flag
    recording_flag = True
    record_btn.disabled = True
    manual_btn.disabled = True
    stop_btn.disabled = False

    def recording_loop():
        chunk = 1
        while recording_flag:
            output_area.append_stdout(f"\n🔁 Chunk {chunk}:\n")
            filename = record_audio(duration_input.value)
            transcript = transcribe_audio_file(filename)
            output_area.append_stdout(f"📝 Transcription:\n{transcript}\n")
            chunk += 1
            for i in range(8):
                if not recording_flag:
                    break
                time.sleep(1)

        output_area.append_stdout("⛔ Recording stopped.\n")
        stop_btn.disabled = True
        manual_btn.disabled = False
        record_btn.disabled = False

    threading.Thread(target=recording_loop).start()

def stop_manual_recording(btn=None):
    global recording_flag
    recording_flag = False

# Bind buttons
record_btn.on_click(record_fixed_duration)
manual_btn.on_click(start_manual_recording)
stop_btn.on_click(stop_manual_recording)

# Display UI
display(duration_input, record_btn, manual_btn, stop_btn, output_area)


IntSlider(value=5, description='Duration (sec):', max=15, min=2)

Button(button_style='primary', description='🎙️ Record Fixed Duration', style=ButtonStyle())

Button(button_style='info', description='🔁 Record with Stop Breaks', style=ButtonStyle())

Button(button_style='danger', description='⛔ Stop Recording', disabled=True, style=ButtonStyle())

Output()