In [None]:
import whisper
import sounddevice as sd
import numpy as np
import re

# config
TRIGGER_WORD = "quit"
SAMPLERATE = 16000
CHUNK_DURATION = 5  # seconds # TODO: check if we can display while recording (buffered/windowed?)

# load model
model = whisper.load_model("base")

def record_chunk(duration, samplerate=16000):
    print(f"Listening for {duration} seconds...")  
    audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1, dtype='float32')
    sd.wait()
    return audio.flatten().astype(np.float32)

def transcribe_chunk(audio_chunk):
    return model.transcribe(audio_chunk, fp16=False, language="en", verbose=True)["text"].strip()

def contains_trigger(text, trigger):
    return trigger.lower() in text.lower()

def remove_trigger(text, trigger):
    pattern = re.compile(re.escape(trigger), re.IGNORECASE)
    return pattern.sub("", text).strip()

def get_full_message():
    parts = []

    while True:
        audio_chunk = record_chunk(CHUNK_DURATION, SAMPLERATE)
        text = transcribe_chunk(audio_chunk)
        if text:
            print(f"Transcribed: {text}")
            parts.append(text)

        if contains_trigger(text, TRIGGER_WORD):
            print("Trigger word detected.")
            break

    full_message = re.sub(r"\s+", " ", " ".join(parts)).strip()
    return remove_trigger(full_message, TRIGGER_WORD)

def main():
    while True:
        message = get_full_message()
        print(f"\nFull command:\"{message}\"")

        confirm = input("Confirm command? (y/n): ").strip().lower()
        if confirm == "y":
            print("Command confirmed.")
            break
        else:
            print("Let's try again...\n")

if __name__ == "__main__":
    main()
