Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions RELEASES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@

first release version.

## v0.1.8

* add speaker and listener with audio rate conversion

## v0.1.7

* update speaker and listener with pyaudio device_index argument
Expand Down
10 changes: 7 additions & 3 deletions ghostos/framework/audio/pyaudio_io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

def get_pyaudio_pcm16_listener(
rate: int = 24000,
output_rate: int = 24000,
interval: float = 0.5,
channels: int = 1,
chunk_size: int = 1024,
Expand All @@ -15,7 +16,8 @@ def get_pyaudio_pcm16_listener(
raise ImportError(f"pyaudio package is required. run `pip install ghostos[audio]`")
from ghostos.framework.audio.pyaudio_io.listener import PyAudioPCM16Listener
return PyAudioPCM16Listener(
rate=rate,
sample_rate=rate,
output_rate=output_rate,
interval=interval,
channels=channels,
chunk_size=chunk_size,
Expand All @@ -24,7 +26,8 @@ def get_pyaudio_pcm16_listener(


def get_pyaudio_pcm16_speaker(
rate: int = 24000,
input_rate: int = 24000,
output_rate: int = 24000,
buffer_size: int = 1024 * 5,
channels: int = 1,
output_device_index: Union[int, None] = None,
Expand All @@ -35,7 +38,8 @@ def get_pyaudio_pcm16_speaker(
raise ImportError(f"pyaudio package is required. run `pip install ghostos[audio]`")
from ghostos.framework.audio.pyaudio_io.speaker import PyAudioPCM16Speaker
return PyAudioPCM16Speaker(
rate=rate,
input_rate=input_rate,
output_rate=output_rate,
buffer_size=buffer_size,
channels=channels,
output_device_index=output_device_index,
Expand Down
7 changes: 5 additions & 2 deletions ghostos/framework/audio/pyaudio_io/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@

if __name__ == '__main__':

listener = PyAudioPCM16Listener()
listener = PyAudioPCM16Listener(
sample_rate=44100,
output_rate=24000,
)
ticker = Timeleft(0)

heard = BytesIO()
Expand Down Expand Up @@ -46,7 +49,7 @@ def read() -> Union[bytes, None]:
return heard.read(1024)


speaker = PyAudioPCM16Speaker()
speaker = PyAudioPCM16Speaker(input_rate=24000, output_rate=44100)
print("start speaking, %f" % ticker.passed())
with speaker.speak(read) as speaking:
speaking.wait()
Expand Down
36 changes: 28 additions & 8 deletions ghostos/framework/audio/pyaudio_io/listener.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
try:
from pyaudio import PyAudio, paInt16
from scipy.signal import resample
except ImportError:
raise ImportError(f"Pyaudio is required, please install pyaudio or ghostos[audio] first")

import numpy as np
from typing import Callable, Optional
from ghostos.abcd.realtime import Listener, Listening
from threading import Thread, Event
Expand All @@ -18,25 +20,28 @@ class PyAudioPCM16Listener(Listener):

def __init__(
self,
rate: int = 24000,
sample_rate: int = 24000,
output_rate: int = 24000,
chunk_size: int = CHUNK,
interval: float = 0.5,
channels: int = CHANNELS,
input_device_index: Optional[int] = None,
):
self.rate = rate
self.sample_rate = sample_rate
self.output_rate = output_rate
self.chunk_size = chunk_size
self.stream = PyAudio().open(
format=paInt16,
channels=channels,
rate=self.rate,
rate=self.sample_rate,
input=True,
input_device_index=input_device_index,
)
self.interval = interval

def listen(self, sender: Callable[[bytes], None]) -> Listening:
return PyAudioPCM16Listening(self.stream, sender, self.rate, self.chunk_size, self.interval)
return PyAudioPCM16Listening(self.stream, sender, self.sample_rate, self.output_rate, self.chunk_size,
self.interval)

def __del__(self):
self.stream.close()
Expand All @@ -48,14 +53,16 @@ def __init__(
self,
stream,
sender: Callable[[bytes], None],
rate: int = 24000,
sample_rate: int = 24000,
output_rate: int = 24000,
chunk: int = CHUNK,
interval: float = 0.5,
):
self.sender = sender
self.stream = stream
self.interval = interval
self.rate = rate
self.sample_rate = sample_rate
self.output_rate = output_rate
self.chunk = chunk
self.stopped = Event()
self.thread = Thread(target=self._listening)
Expand All @@ -64,12 +71,25 @@ def _listening(self):
self.stream.start_stream()
while not self.stopped.is_set():
buffer = BytesIO()
for i in range(int((self.rate / self.chunk) * self.interval)):
for i in range(int((self.sample_rate / self.chunk) * self.interval)):
data = self.stream.read(self.chunk, exception_on_overflow=False)
buffer.write(data)
self.sender(buffer.getvalue())
parsed = self._parse_output_data(buffer.getvalue())
self.sender(parsed)
self.stream.stop_stream()

def _parse_output_data(self, data: bytes) -> bytes:
if self.sample_rate == self.output_rate:
return data
audio_data = np.frombuffer(data, dtype=np.int16)
num_samples = int(len(audio_data) * self.output_rate / self.sample_rate)

# 使用 resample 进行重新采样
resampled_audio = resample(audio_data, num_samples)

# 导出为二进制数据
return resampled_audio.astype(np.int16)

def __enter__(self):
self.thread.start()

Expand Down
39 changes: 32 additions & 7 deletions ghostos/framework/audio/pyaudio_io/speaker.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
try:
from pyaudio import PyAudio, paInt16
from scipy.signal import resample
except ImportError:
raise ImportError(f"Pyaudio is required, please install pyaudio or ghostos[audio] first")

import numpy as np
from typing import Callable, Union
from ghostos.abcd.realtime import Speaker, Speaking
from threading import Thread, Event
Expand All @@ -12,33 +14,43 @@ class PyAudioPCM16Speaker(Speaker):

def __init__(
self,
rate: int = 24000,
input_rate: int = 24000,
output_rate: int = 24000,
buffer_size: int = 4096,
channels: int = 1,
output_device_index: Union[int, None] = None,
):
self.rate = rate
self.input_rate = input_rate
self.output_rate = output_rate
self.buffer_size = buffer_size
self.stream = PyAudio().open(
format=paInt16,
channels=channels,
rate=self.rate,
rate=self.output_rate,
output=True,
output_device_index=output_device_index,
)

def speak(self, queue: Callable[[], Union[bytes, None]]) -> Speaking:
return PyAudioPCM16Speaking(self.stream, queue, self.rate, self.buffer_size)
return PyAudioPCM16Speaking(self.stream, queue, self.input_rate, self.output_rate, self.buffer_size)

def __del__(self):
self.stream.close()


class PyAudioPCM16Speaking(Speaking):

def __init__(self, stream, queue: Callable[[], Union[bytes, None]], rate: int = 24000, buffer_size: int = 0):
def __init__(
self,
stream,
queue: Callable[[], Union[bytes, None]],
input_rate: int = 24000,
output_rate: int = 24000,
buffer_size: int = 0,
):
self.stream = stream
self.rate = rate
self.input_rate = input_rate
self.output_rate = output_rate
self.buffer_size = buffer_size
self.queue = queue
self.stop = Event()
Expand All @@ -52,9 +64,22 @@ def _speaking(self):
data = self.queue()
if not data:
break
self.stream.write(data)
parsed = self._parse_output_data(data)
self.stream.write(parsed)
self._done = True

def _parse_output_data(self, data: bytes) -> bytes:
if self.input_rate == self.output_rate:
return data
audio_data = np.frombuffer(data, dtype=np.int16)
num_samples = int(len(audio_data) * self.output_rate / self.input_rate)

# 使用 resample 进行重新采样
resampled_audio = resample(audio_data, num_samples)

# 导出为二进制数据
return resampled_audio.astype(np.int16)

def __enter__(self):
self.thread.start()
return self
Expand Down
1 change: 1 addition & 0 deletions ghostos/framework/openai_realtime/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ def save_audio_data(self, item_id: str, audio_data: bytes) -> None:
with wave.open(buffer, 'wb') as f:
f.setnchannels(1)
f.setsampwidth(2)
# todo: save rate by configs
f.setframerate(24000)
f.writeframes(audio_data)

Expand Down
4 changes: 3 additions & 1 deletion ghostos/prototypes/streamlitapp/pages/chat_with_ghost.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,13 +259,15 @@ def get_realtime_app(conversation: Conversation) -> Optional[RealtimeApp]:
audio_input = app_conf.audio_input
audio_output = app_conf.audio_output
speaker = get_pyaudio_pcm16_speaker(
rate=audio_output.sample_rate,
input_rate=audio_output.input_rate,
output_rate=audio_output.output_rate,
buffer_size=audio_output.buffer_size,
channels=audio_output.channels,
output_device_index=audio_output.output_device_index,
)
listener = get_pyaudio_pcm16_listener(
rate=audio_input.sample_rate,
output_rate=audio_input.output_rate,
interval=audio_input.interval,
channels=audio_input.channels,
chunk_size=audio_input.chunk_size,
Expand Down
4 changes: 3 additions & 1 deletion ghostos/prototypes/streamlitapp/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@ def get_container() -> Container:

class AudioInputConf(BaseModel):
sample_rate: int = Field(24000)
output_rate: int = Field(24000)
interval: float = Field(0.5)
channels: int = Field(1)
chunk_size: int = Field(1024)
input_device_index: Union[int, None] = Field(None)


class AudioOutputConf(BaseModel):
sample_rate: int = Field(24000)
input_rate: int = Field(24000)
output_rate: int = Field(24000)
channels: int = Field(1)
buffer_size: int = Field(1024 * 5)
output_device_index: Union[int, None] = Field(None)
Expand Down
2 changes: 0 additions & 2 deletions ghostos/prototypes/streamlitapp/widgets/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,6 @@ def render_message_item(msg: Message, debug: bool):
render_user_message(msg, debug)
elif msg.role == Role.SYSTEM.value:
render_sys_message(msg, debug)
elif msg.role == Role.FUNCTION.value:
render_func_message(msg, debug)
else:
render_other_message(msg, debug)

Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,14 @@ spherov2 = { version = "^0.12.1", optional = true }
bleak = [
{ version = "^0.22.3", python = ">=3.10,<3.14", optional = true }
]
scipy = { version = "^1.15.1", optional = true }

[tool.poetry.scripts]
ghostos = "ghostos.scripts.cli:main"

[tool.poetry.extras]
realtime = ['pyaudio']
sphero = ["spherov2", "bleak", "pyaudio"]
realtime = ['pyaudio', "scipy"]
sphero = ["spherov2", "bleak"]


[tool.poetry.group.dev.dependencies]
Expand Down