In [None]:
import sys
sys.path.append("../src")
import importlib
from IPython.display import Audio
import pyaudio
import wave
import time
from scipy.signal import butter, filtfilt
import numpy as np
import matplotlib.pyplot as plt


In [None]:
import logic.utils.io_access as io

In [None]:
# force reload imports
#importlib.reload(at)

In [None]:
def find_seed_device_index():
    p = pyaudio.PyAudio()
    device_count = p.get_device_count()
    target_description = "seeed-2mic-voicecard"

    for i in range(device_count):
        device_info = p.get_device_info_by_index(i)
        if device_info.get('maxInputChannels') > 0:
            device_name = device_info.get('name')
            if target_description in device_name:
                p.terminate()
                return i

    p.terminate()
    raise Exception(
        f"Device with description containing '{target_description}' not found")
device_index = find_seed_device_index()

In [None]:
def bytes_to_np(bytes_data):
    return np.frombuffer(bytes_data, dtype=np.float32)


def np_to_bytes(np_data):
    return np_data.astype(np.float32).tobytes()


def filter_human_speech_only(np_channel, sample_rate):
    # Normalize the audio data
    np_channel_normalized = np_channel / 32768.0

    # Filter design parameters
    order = 5
    nyquist = 0.5 * sample_rate
    low = 300.0 / nyquist
    high = 3400.0 / nyquist

    # Design and apply the filter
    b, a = butter(order, [low, high], btype='band')
    y = filtfilt(b, a, np_channel_normalized)

    # Scale back and clip the values to int16 range
    y = np.clip(y * 32768.0, -32768, 32767)

    # Convert the datatype to int16
    y = y.astype(np.int16)
    return y


def volume_boost(np_channel, volume_ratio):
    # Volume boost & prevent clipping
    return np.clip(np_channel * volume_ratio, -1, 1)


def seperate_channels(np_data):
    left_channel = np_data[::2]
    right_channel = np_data[1::2]
    return left_channel, right_channel


def merge_two_channels(np_left_channel, np_right_channel):
    return (np_left_channel + np_right_channel) / 2


In [None]:
input_channels_count = 2
output_channels_count = 1
sample_rate = 44100
record_seconds = 5
testfile_path = io.get_path('data', 'test.wav')

In [None]:
class SpeechAudioStreamObservable:
    def __init__(self):
        # Set hardcoded values
        self.rate = sample_rate
        self.channels = input_channels_count
        self.format = pyaudio.paFloat32
        self.frames_per_buffer = 1024
        self.input_device_index = device_index
        self.observers = []

        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(
            format=self.format,
            channels=self.channels,
            rate=self.rate,
            input_device_index=self.input_device_index,
            input=True,
            frames_per_buffer=self.frames_per_buffer,
            stream_callback=self._callback
        )

    def _callback(self, in_data, frame_count, time_info, status):
        # Convert to np & seperate stereo channels
        np_input = bytes_to_np(in_data)
        np_left, np_right = seperate_channels(np_input)

        # Filter to human speech frequencies
        # WIP: We need fix whitenoise first, then test again
        #np_left = filter_human_speech_only(np_left, self.rate)
        #np_right = filter_human_speech_only(np_right, self.rate)

        # Volume boost
        #np_left = volume_boost(np_left, volume_rate)
        #np_right = volume_boost(np_right, volume_rate)

        # Merge channels to single mono channel & back to binary
        np_input = merge_two_channels(np_left, np_right)
        #np_input = np_left
        out_data = np_to_bytes(np_input)

        # Notify observers
        for observer in self.observers:
            observer.on_received(out_data)
        return (out_data, pyaudio.paContinue)

    def start(self):
        try:
            self.stream.start_stream()
        except Exception as ex:
            print(f"An error occurred while starting the stream: {ex}")

    def stop(self):
        try:
            self.stream.stop_stream()
            self.stream.close()
            self.p.terminate()
        except Exception as ex:
            print(f"An error occurred while stopping the stream: {ex}")

    def add_observer(self, observer):
        self.observers.append(observer)

    def remove_observer(self, observer):
        self.observers.remove(observer)



    
class AudioDataObserver:
    def __init__(self, duration):
        self.filename = testfile_path
        self.rate = sample_rate
        self.channels = output_channels_count
        self.duration = duration
        self.audio_data = bytearray()
        self.frames = int(sample_rate * duration)
        self.frame_count = 0

    def on_received(self, audio_data):
        self.audio_data.extend(audio_data)
        # 2 bytes per sample
        self.frame_count += len(audio_data) // (self.channels * 2)
        if self.frame_count >= self.frames:
            with wave.open(self.filename, 'wb') as wf:
                wf.setnchannels(self.channels)
                wf.setsampwidth(2)  # 2 bytes
                wf.setframerate(self.rate)
                wf.writeframes(self.audio_data)
                
    def get_audio_data(self):
        return self.audio_data

In [None]:
# record 5 seconds of audio
audio_stream_observable = SpeechAudioStreamObservable()
audio_stream_observer = AudioDataObserver(record_seconds)
audio_stream_observable.add_observer(audio_stream_observer)

# Start the audio stream
audio_stream_observable.start()
try:
    print("Recording audio for 3 seconds...")
    time.sleep(record_seconds)
finally:
    print("Done recording.")
    audio_stream_observable.stop()
    
# Play the recorded audio
recording_data = audio_stream_observer.get_audio_data()
Audio(recording_data, rate=sample_rate)


In [None]:
np_input = bytes_to_np(recording_data)

# In case of two output channels
np_left, np_right = seperate_channels(np_input)
display(Audio(np_left, rate=sample_rate))
display(Audio(np_right, rate=sample_rate))

# In case of one output channel
display(Audio(np_input, rate=sample_rate))


In [None]:
# Visualize recoding
# Create the plots
fig, axs = plt.subplots(2, 1, figsize=(10, 6))

# Plot left channel
axs[0].plot(np_left)
axs[0].set_title('Left Channel')
axs[0].set_xlabel('Sample number')
axs[0].set_ylabel('Amplitude')

# Plot right channel
axs[1].plot(np_right)
axs[1].set_title('Right Channel')
axs[1].set_xlabel('Sample number')
axs[1].set_ylabel('Amplitude')

plt.tight_layout()
plt.show()

In [None]:
Audio(np_right, rate=sample_rate)