Skip to content

Commit

Permalink
some Bugfixes
Browse files Browse the repository at this point in the history
  • Loading branch information
KoljaB committed Nov 9, 2023
1 parent 910225e commit cee57e3
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 39 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ https://github.com/KoljaB/RealtimeSTT/assets/7604638/207cb9a2-4482-48e7-9d2b-072

### Updates

#### v0.1.7
- Bugfix for Mac OS Installation (multiprocessing / queue.size())
- KeyboardInterrupt handling (now abortable with CTRL+C)
- Bugfix for spinner handling (could lead to exception in some cases)

#### v0.1.6
- Implements context manager protocol (recorder can be used in a `with` statement)
- Bugfix for resource management in shutdown method
Expand Down
92 changes: 56 additions & 36 deletions RealtimeSTT/audio_recorder.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,13 +229,14 @@ def __init__(self,
logging.info(f"Starting RealTimeSTT")

# Start transcription process
self.interrupt_stop_event = Event()
self.main_transcription_ready_event = Event()
self.parent_transcription_pipe, child_transcription_pipe = Pipe()
self.transcript_process = Process(target=AudioToTextRecorder._transcription_worker, args=(child_transcription_pipe, model, self.main_transcription_ready_event, self.shutdown_event))
self.transcript_process = Process(target=AudioToTextRecorder._transcription_worker, args=(child_transcription_pipe, model, self.main_transcription_ready_event, self.shutdown_event, self.interrupt_stop_event))
self.transcript_process.start()

# Start audio data reading process
self.reader_process = Process(target=AudioToTextRecorder._audio_data_worker, args=(self.audio_queue, self.sample_rate, self.buffer_size, self.shutdown_event))
self.reader_process = Process(target=AudioToTextRecorder._audio_data_worker, args=(self.audio_queue, self.sample_rate, self.buffer_size, self.shutdown_event, self.interrupt_stop_event))
self.reader_process.start()

# Initialize the realtime transcription model
Expand Down Expand Up @@ -326,7 +327,7 @@ def __init__(self,


@staticmethod
def _transcription_worker(conn, model_path, ready_event, shutdown_event):
def _transcription_worker(conn, model_path, ready_event, shutdown_event, interrupt_stop_event):
"""
Worker method that handles the continuous process of transcribing audio data.
Expand Down Expand Up @@ -363,25 +364,30 @@ def _transcription_worker(conn, model_path, ready_event, shutdown_event):
logging.debug('Faster_whisper main speech to text transcription model initialized successfully')

while not shutdown_event.is_set():
if conn.poll(0.5):
audio, language = conn.recv()
try:
segments = model.transcribe(audio, language=language if language else None)[0]
transcription = " ".join(seg.text for seg in segments).strip()
conn.send(('success', transcription))
except faster_whisper.WhisperError as e:
logging.error(f"Whisper transcription error: {e}")
conn.send(('error', str(e)))
except Exception as e:
logging.error(f"General transcription error: {e}")
conn.send(('error', str(e)))
else:
# If there's no data, sleep for a short while to prevent busy waiting
time.sleep(0.02)
try:
if conn.poll(0.5):
audio, language = conn.recv()
try:
segments = model.transcribe(audio, language=language if language else None)[0]
transcription = " ".join(seg.text for seg in segments).strip()
conn.send(('success', transcription))
except faster_whisper.WhisperError as e:
logging.error(f"Whisper transcription error: {e}")
conn.send(('error', str(e)))
except Exception as e:
logging.error(f"General transcription error: {e}")
conn.send(('error', str(e)))
else:
# If there's no data, sleep for a short while to prevent busy waiting
time.sleep(0.02)
except KeyboardInterrupt:
interrupt_stop_event.set()
logging.debug('Transcription worker process finished due to KeyboardInterrupt')
break


@staticmethod
def _audio_data_worker(audio_queue, sample_rate, buffer_size, shutdown_event):
def _audio_data_worker(audio_queue, sample_rate, buffer_size, shutdown_event, interrupt_stop_event):
"""
Worker method that handles the audio recording process.
Expand Down Expand Up @@ -434,7 +440,11 @@ def _audio_data_worker(audio_queue, sample_rate, buffer_size, shutdown_event):
print (f"Error: {e}")
continue

audio_queue.put(data)
audio_queue.put(data)

except KeyboardInterrupt:
interrupt_stop_event.set()
logging.debug('Audio data worker process finished due to KeyboardInterrupt')
finally:
stream.stop_stream()
stream.close()
Expand Down Expand Up @@ -464,14 +474,16 @@ def wait_audio(self):
self.start_recording_on_voice_activity = True

# Wait until recording starts
self.start_recording_event.wait()
while not self.interrupt_stop_event.is_set():
if (self.start_recording_event.wait(timeout=0.5)): break

# If recording is ongoing, wait for voice inactivity to finish recording.
if self.is_recording:
self.stop_recording_on_voice_deactivity = True

# Wait until recording stops
self.stop_recording_event.wait()
while not self.interrupt_stop_event.is_set():
if (self.stop_recording_event.wait(timeout=0.5)): break

# Convert recorded frames to the appropriate audio format.
audio_array = np.frombuffer(b''.join(self.frames), dtype=np.int16)
Expand Down Expand Up @@ -537,7 +549,7 @@ def text(self,

self.wait_audio()

if self.is_shut_down:
if self.is_shut_down or self.interrupt_stop_event.is_set():
return ""

if on_transcription_finished:
Expand Down Expand Up @@ -661,16 +673,23 @@ def _recording_worker(self):
# Continuously monitor audio for voice activity
while self.is_running:

data = self.audio_queue.get()
try:

# Handle queue overflow
queue_overflow_logged = False
while self.audio_queue.qsize() > self.allowed_latency_limit:
if not queue_overflow_logged:
logging.warning(f"Audio queue size exceeds latency limit. Current size: {self.audio_queue.qsize()}. Discarding old audio chunks.")
queue_overflow_logged = True
data = self.audio_queue.get()

# Handle queue overflow
queue_overflow_logged = False
while self.audio_queue.qsize() > self.allowed_latency_limit:
if not queue_overflow_logged:
logging.warning(f"Audio queue size exceeds latency limit. Current size: {self.audio_queue.qsize()}. Discarding old audio chunks.")
queue_overflow_logged = True
data = self.audio_queue.get()

except BrokenPipeError:
print ("BrokenPipeError _recording_worker")
self.is_running = False
break

if not self.is_recording:
# Handle not recording state

Expand Down Expand Up @@ -791,8 +810,9 @@ def _recording_worker(self):


except Exception as e:
logging.error(f"Unhandled exeption in _recording_worker: {e}")
raise
if not self.interrupt_stop_event.is_set():
logging.error(f"Unhandled exeption in _recording_worker: {e}")
raise


def _realtime_worker(self):
Expand Down Expand Up @@ -989,23 +1009,23 @@ def _set_state(self, new_state):
if self.on_vad_detect_start:
self.on_vad_detect_start()
self._set_spinner("speak now")
if self.spinner:
if self.spinner and self.halo:
self.halo._interval = 250
elif new_state == "wakeword":
if self.on_wakeword_detection_start:
self.on_wakeword_detection_start()
self._set_spinner(f"say {self.wake_words}")
if self.spinner:
if self.spinner and self.halo:
self.halo._interval = 500
elif new_state == "transcribing":
if self.on_transcription_start:
self.on_transcription_start()
self._set_spinner("transcribing")
if self.spinner:
if self.spinner and self.halo:
self.halo._interval = 50
elif new_state == "recording":
self._set_spinner("recording")
if self.spinner:
if self.spinner and self.halo:
self.halo._interval = 100
elif new_state == "inactive":
if self.spinner and self.halo:
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ faster-whisper>=0.7.1
pvporcupine==1.9.5
webrtcvad>=2.0.10
halo>=0.0.31
torch==2.0.1
torchaudio==2.0.2
torch>=2.0.1
torchaudio>=2.0.2
2 changes: 1 addition & 1 deletion tests/simple_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from RealtimeSTT import AudioToTextRecorder
if __name__ == '__main__':
recorder = AudioToTextRecorder(spinner=False)
recorder = AudioToTextRecorder(spinner=False, model="tiny.en", language="en")

print("Say something...")
while (True): print(recorder.text(), end=" ", flush=True)

0 comments on commit cee57e3

Please sign in to comment.