In [None]:
# """PyAudio Example: Play a wave file."""

# import wave
# import sys

# import pyaudio


# CHUNK = 1024

# # if len(sys.argv) < 2:
# #     print(f'Plays a wave file. Usage: {sys.argv[0]} filename.wav')
# #     sys.exit(-1)

# with wave.open(r'/Users/olaogunade/Downloads/posh & becks - oakland.wav', 'rb') as wf:
#     # Instantiate PyAudio and initialize PortAudio system resources (1)
#     p = pyaudio.PyAudio()

#     # Open stream (2)
#     stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
#                     channels=wf.getnchannels(),
#                     rate=wf.getframerate(),
#                     output=True)

#     # Play samples from the wave file (3)
#     while len(data := wf.readframes(CHUNK)):  # Requires Python 3.8+ for :=
#         stream.write(data)

#     # Close stream (4)
#     stream.close()

#     # Release PortAudio system resources (5)
#     p.terminate()

In [1]:
import pyaudio 
import wave
import numpy as np
import os
from pydub import AudioSegment
from kafka import KafkaProducer
from kafka.errors import KafkaError


def record_audio_as_wav(filename):
    chunk = 1024  # Number of audio samples per chunk
    sample_format = pyaudio.paInt16  # 16-bit format
    channels = 2  # Stereo
    fs = 44100  # Sample rate (CD quality)
    filename = filename
    silence_threshold = 500  # Adjust based on noise levels
    silence_duration = 2  # Stop after 2 seconds of silence

    p = pyaudio.PyAudio()
    stream = p.open(format=sample_format,
                    channels=channels,
                    rate=fs,
                    frames_per_buffer=chunk,
                    input=True)

    frames = []
    silent_chunks = 0
    print("Recording... Speak now!")

    while True:
        data = stream.read(chunk,exception_on_overflow = False)  # Read chunk of audio
        frames.append(data)

        # Convert to numpy array to measure volume
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()  # Get average volume level

        if volume < silence_threshold:
            silent_chunks += 1
        else:
            silent_chunks = 0  # Reset silent chunk counter if sound is detected

        if silent_chunks > (fs / chunk * silence_duration):  # Stop if silent for `silence_duration` seconds
            print("Silence detected. Stopping recording.")
            break

    # Stop and close stream
    stream.stop_stream()
    stream.close()
    p.terminate()

    # Save the audio
    wf = wave.open(filename+".wav", 'wb')
    wf.setnchannels(channels)
    wf.setsampwidth(p.get_sample_size(sample_format))
    wf.setframerate(fs)
    wf.writeframes(b''.join(frames))
    wf.close()

    print("Recording saved as", filename+".wav")
    
    return filename


def wav_to_mp3(filename):
    ''' Conversion for storage reasons. 
    MP3 files use lossy compression, reducing file size by up to 90% without significant quality loss.
    Also WAV files are not always supported on mobile devices, browsers, or online platforms, whilst
    MP3 is widely supported and works on almost all
'''
    
    AudioSegment.converter = "/usr/local/bin/ffmpeg"  # Adjust path if needed

    sound = AudioSegment.from_wav(filename+".wav")
    try:
        sound.export(filename+".mp3", format="mp3")
        os.remove(filename+".wav")
        print("Recording saved as", filename+".mp3")
        print("Recording deleted: ", filename+".wav")
        return filename+".mp3"

    except Exception as e:
        return str(e)
    


def send_audio_stream_to_kafka(file_path, topic='audio_events', chunk_size=65536):
    """
    Streams an MP3 file in chunks to a Kafka topic instead of loading it into memory.
    
    :param file_path: Path to the MP3 file.
    :param topic: Kafka topic to send the data to.
    :param chunk_size: Size of each chunk in bytes (default 64 KB).
    """
    try:
        producer = KafkaProducer(
            bootstrap_servers='localhost:9092'
        )

        with open(file_path, 'rb') as audio_file:
            chunk_count = 0
            while chunk := audio_file.read(chunk_size):
                producer.send(topic, value=chunk)
                chunk_count += 1
                print(f"Sent chunk {chunk_count} to Kafka")

        print(f"Finished streaming {file_path} to Kafka.")
        producer.send(topic, b"end")

    except KafkaError as e:
        print(f"Kafka Error: {e}")

    finally:
        producer.flush()
        producer.close()

def main(filename):
    print('Wait for the onscreen prompt before you start speaking. Please record in a noise-free environment...')

    filename = record_audio_as_wav(filename)   #Captures audio as a wave file using pyaudio library

    mp3_file = wav_to_mp3(filename)     #Takes file and saves as an mp3 which takes up less storage, is sufficiently high quality and is compatible with a host of devices

    send_audio_stream_to_kafka(mp3_file, topic='audio_events', chunk_size=65536)






In [1]:
import pyaudio 
import time
import wave
import numpy as np
from kafka import KafkaProducer
from kafka.errors import KafkaError
    


def send_audio_stream_to_kafka(snippet, topic='audio_events'):
    """
    Streams an MP3 file in chunks to a Kafka topic instead of loading it into memory.
    
    :param file_path: Path to the MP3 file.
    :param topic: Kafka topic to send the data to.
    :param chunk_size: Size of each chunk in bytes (default 64 KB).
    """
    try:
        producer = KafkaProducer(
            bootstrap_servers='localhost:9092'
        )
        producer.send(topic, value=snippet)
        print(f"Sent voice snippet to Kafka")

    except KafkaError as e:
        print(f"Kafka Error: {e}")

    finally:
        producer.flush()
        producer.close()


def record_audio_as_wav():
    chunk = 1024  # Number of audio samples per chunk
    sample_format = pyaudio.paInt16  # 16-bit format
    channels = 2  # Stereo
    fs = 44100  # Sample rate (CD quality)
    # filename = filename
    silence_threshold = 500  # Adjust based on noise levels
    silence_duration = 2  # Stop after 2 seconds of silence

    p = pyaudio.PyAudio()
    stream = p.open(format=sample_format,
                    channels=channels,
                    rate=fs,
                    frames_per_buffer=chunk,
                    input=True)

    # frames = []
    silent_chunks = 0
    time.sleep(2)
    print("Recording... Speak now!")

    while True:
        data = stream.read(chunk,exception_on_overflow = False)  # Read chunk of audio
        send_audio_stream_to_kafka(data, topic='audio_events')
        
        # frames.append(data)

        # Convert to numpy array to measure volume
        audio_data = np.frombuffer(data, dtype=np.int16)
        volume = np.abs(audio_data).mean()  # Get average volume level

        if volume < silence_threshold:
            silent_chunks += 1
        else:
            silent_chunks = 0  # Reset silent chunk counter if sound is detected

        if silent_chunks > (fs / chunk * silence_duration):  # Stop if silent for `silence_duration` seconds
            print("Silence detected. Stopping recording.")
            send_audio_stream_to_kafka(b"end", topic='audio_events')

            break

    # Stop and close stream
    stream.stop_stream()
    stream.close()
    p.terminate()

    # Save the audio
    # wf = wave.open(filename+".wav", 'wb')
    # wf.setnchannels(channels)
    # wf.setsampwidth(p.get_sample_size(sample_format))
    # wf.setframerate(fs)
    # wf.writeframes(b''.join(frames))
    # wf.close()

    # print("Recording saved as", filename+".wav")
    
    return "Complete..."




