In [None]:
import requests
import json
import base64
import numpy as np
import soundfile as sf
from IPython.display import display, Audio

def audio_to_base64(audio_path):
    with open(audio_path, "rb") as audio_file:
        audio_data = audio_file.read()
        return base64.b64encode(audio_data).decode('utf-8')

def remove_chunks_wav_header(chunks):
    processed_chunks = []
    for i, chunk in enumerate(chunks):
            header_size = 44  # WAV head
            if chunk[:4] == b'RIFF':
                processed_chunks.append(chunk[header_size:])
    return b"".join(processed_chunks)

url = "http://host:port/audio"

audio_path = "audio.wav"
audio_base64 = audio_to_base64(audio_path)

params = {
    "prompt": audio_base64,
    "prompt_type": "audio",
    "temperature": 0.2,
    "top_p": 0.8,
    "max_new_tokens": 2000
}

# audio param
samplerate = 22050

audio_chunks = []

response = requests.post(url, json=params, stream=True)

for line in response.iter_lines():
    if line:
        line_data = json.loads(line.decode('utf-8'))
        if line_data["type"] == "text":
            text = line_data["content"]
            print("Text received:", text)

        elif line_data["type"] == "audio":
            if line_data["state"] == "stream":
                # stream
                audio_chunk = base64.b64decode(line_data["content"])
                print(f"Received audio chunk of size {len(audio_chunk)} bytes.")
                
                display(Audio(audio_chunk, rate=samplerate))
                
                audio_chunks.append(audio_chunk)
            elif line_data["state"] == "end":
                # complete audio
                audio_complete = base64.b64decode(line_data["content"])
                print(f"Received complete audio of size {len(audio_complete)} bytes.")


# *****stream*****
processed_audio_data = remove_chunks_wav_header(audio_chunks)
#PCM
audio_array = np.frombuffer(processed_audio_data, dtype=np.int16)

sf.write("stream_complete_audio.wav", audio_array, samplerate)
print("Audio saved as stream_complete_audio.wav")


# *****complete audio*****
with open("complete_audio.wav", "wb") as f:
    f.write(audio_complete)
print("Complete audio saved as complete_audio.wav")