In [36]:
# OpenAI API test (non-streaming)
from openai import OpenAI
from IPython.display import Audio, display

# Initialize the client
client = OpenAI(
    api_key="dummy-key",
    base_url="http://localhost:8000/v1"
)

# Generate audio
response = client.audio.speech.create(
    model="tts-1",
    voice="echo",
    input= """
    以下是一些中英文对照的话语。 
    1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 
    2. 你好呀，最近怎么样？Hello there, how have you been recently? 
    3. 别放弃，你能做到的！Don't give up, you can do it! 
    4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.
    """,
    response_format="wav"
)

# Get audio binary data
audio_data = response.content  # response.content is of type bytes

# Display and play in the Notebook
display(Audio(audio_data, autoplay=False))

In [37]:
# Test using the requests module, streaming mode
import requests
from IPython.display import Audio, display
import io

payload = {
    "model": "tts-1",
    "input": """
    以下是一些中英文对照的话语。 
    1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 
    2. 你好呀，最近怎么样？Hello there, how have you been recently? 
    3. 别放弃，你能做到的！Don't give up, you can do it! 
    4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.
    """,
    "voice": "echo",
    "response_format": "wav", 
    "stream": True
}

try:
    response = requests.post("http://localhost:8000/v1/audio/speech", json=payload, stream=True)
    response.raise_for_status()  # Check the status code
    
    audio_buffer = io.BytesIO()
    for chunk in response.iter_content(chunk_size=8192):
        if chunk:
            audio_buffer.write(chunk)
    
    audio_buffer.seek(0)
    display(Audio(audio_buffer.getvalue(), autoplay=False))
    print("Audio has been loaded into the Notebook and can be played manually")
except requests.exceptions.RequestException as e:
    print(f"Request failed: {str(e)}")
    if hasattr(e.response, "text"):
        print(f"Error details: {e.response.text}")

Audio has been loaded into the Notebook and can be played manually


In [38]:
import subprocess

# Use pipeline to implement streaming playback, WAV format
cmd = (
    'curl -X POST "http://localhost:8000/v1/audio/speech" '
    '-H "Content-Type: application/json" '
    '-d \'{"model": "tts-1", "input": "以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.", "voice": "echo", "response_format": "wav", "stream": true}\' '
    '-s | mpv --no-video -'
)
subprocess.run(cmd, shell=True, check=True)

[file] Reading from stdin...
[ffmpeg/demuxer] wav: Ignoring maximum wav data size, file may be invalid
● Audio  --aid=1  (pcm_s16le 1ch 24000 Hz 384 kbps)
AO: [pipewire] 24000Hz mono 1ch s16
A: 00:00:00 / 00:00:04 (0%) Cache: 4.0s/212KB
A: 00:00:00 / 00:00:04 (0%) Cache: 3.9s/208KB
A: 00:00:00 / 00:00:04 (1%) Cache: 3.8s/203KB
A: 00:00:00 / 00:00:04 (3%) Cache: 3.8s/199KB
A: 00:00:00 / 00:00:04 (5%) Cache: 3.7s/194KB
A: 00:00:00 / 00:00:04 (7%) Cache: 3.6s/190KB
A: 00:00:00 / 00:00:05 (7%) Cache: 5.0s/267KB
A: 00:00:00 / 00:00:05 (8%) Cache: 4.9s/262KB
A: 00:00:00 / 00:00:05 (9%) Cache: 4.9s/258KB
A: 00:00:00 / 00:00:05 (11%) Cache: 4.8s/253KB
A: 00:00:00 / 00:00:05 (12%) Cache: 4.7s/249KB
A: 00:00:00 / 00:00:05 (14%) Cache: 4.6s/244KB
A: 00:00:00 / 00:00:05 (15%) Cache: 4.5s/240KB
A: 00:00:00 / 00:00:07 (13%) Cache: 5.9s/312KB
A: 00:00:01 / 00:00:07 (14%) Cache: 5.8s/308KB
A: 00:00:01 / 00:00:07 (16%) Cache: 5.7s/303KB
A: 00:00:01 / 00:00:07 (17%) Cache: 5.6s/299KB
A: 00:00:01 / 00:00

CompletedProcess(args='curl -X POST "http://localhost:8000/v1/audio/speech" -H "Content-Type: application/json" -d \'{"model": "tts-1", "input": "以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.", "voice": "echo", "response_format": "wav", "stream": true}\' -s | mpv --no-video -', returncode=0)

In [39]:
import subprocess

# Use pipeline to implement streaming playback, MP3 format
cmd = (
    'curl -X POST "http://localhost:8000/v1/audio/speech" '
    '-H "Content-Type: application/json" '
    '-d \'{"model": "tts-1", "input": "以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.", "voice": "echo", "response_format": "mp3", "stream": true}\' '
    '-s | mpv --no-video -'
)
subprocess.run(cmd, shell=True, check=True)

[file] Reading from stdin...
[ffmpeg/demuxer] mp3: invalid concatenated file detected - using bitrate for duration
● Audio  --aid=1  (mp3 2ch 48000 Hz 128 kbps)
AO: [pipewire] 48000Hz stereo 2ch floatp
A: 00:00:00 / 00:00:04 (0%) Cache: 3.8s/142KB
A: 00:00:00 / 00:00:04 (0%) Cache: 3.7s/140KB
A: 00:00:00 / 00:00:04 (0%) Cache: 3.7s/137KB
A: 00:00:00 / 00:00:04 (0%) Cache: 3.6s/136KB
A: 00:00:00 / 00:00:04 (1%) Cache: 3.6s/134KB
[ffmpeg/audio] mp3float: Header missing
A: 00:00:00 / 00:00:04 (1%) Cache: 3.6s/134KB
Error decoding audio.
A: 00:00:00 / 00:00:04 (1%) Cache: 3.6s/134KB
A: 00:00:00 / 00:00:04 (3%) Cache: 3.5s/131KB
A: 00:00:00 / 00:00:04 (5%) Cache: 3.4s/128KB
A: 00:00:00 / 00:00:04 (7%) Cache: 3.4s/126KB
A: 00:00:00 / 00:00:04 (8%) Cache: 3.3s/123KB
A: 00:00:00 / 00:00:04 (10%) Cache: 3.2s/121KB
A: 00:00:00 / 00:00:04 (12%) Cache: 3.2s/118KB
A: 00:00:00 / 00:00:04 (13%) Cache: 3.1s/116KB
A: 00:00:00 / 00:00:04 (14%) Cache: 3.0s/114KB
A: 00:00:00 / 00:00:04 (16%) Cache: 3.0s/1

CompletedProcess(args='curl -X POST "http://localhost:8000/v1/audio/speech" -H "Content-Type: application/json" -d \'{"model": "tts-1", "input": "以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.", "voice": "echo", "response_format": "mp3", "stream": true}\' -s | mpv --no-video -', returncode=0)

In [40]:
import subprocess

# Use pipeline to implement streaming playback, OGG format
cmd = (
    'curl -X POST "http://localhost:8000/v1/audio/speech" '
    '-H "Content-Type: application/json" '
    '-d \'{"model": "tts-1", "input": "以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.", "voice": "echo", "response_format": "ogg", "stream": true}\' '
    '-s | mpv --no-video -'
)
subprocess.run(cmd, shell=True, check=True)

[file] Reading from stdin...
● Audio  --aid=1  (vorbis 2ch 48000 Hz 112 kbps)
[lavf] Linearizing discontinuity: 0.000000 -> 0.486667
[lavf] Linearizing discontinuity: 0.486667 -> 0.973333
[lavf] Linearizing discontinuity: 0.973333 -> 1.457333
[lavf] Linearizing discontinuity: 1.457333 -> 1.944000
[lavf] Linearizing discontinuity: 1.944000 -> 2.441333
[lavf] Linearizing discontinuity: 2.441333 -> 2.922667
[lavf] Linearizing discontinuity: 2.922667 -> 3.409333
AO: [pipewire] 48000Hz stereo 2ch floatp
A: 00:00:00 / 00:00:03 (0%) Cache: 3.2s/175KB
A: 00:00:00 / 00:00:03 (0%) Cache: 3.1s/174KB
A: 00:00:00 / 00:00:03 (0%) Cache: 3.1s/172KB
A: 00:00:00 / 00:00:03 (0%) Cache: 3.0s/171KB
A: 00:00:00 / 00:00:03 (1%) Cache: 2.9s/168KB
A: 00:00:00 / 00:00:03 (2%) Cache: 2.9s/160KB
A: 00:00:00 / 00:00:03 (4%) Cache: 2.9s/159KB
A: 00:00:00 / 00:00:03 (6%) Cache: 2.8s/156KB
A: 00:00:00 / 00:00:03 (8%) Cache: 2.8s/154KB
A: 00:00:00 / 00:00:03 (9%) Cache: 2.7s/152KB
A: 00:00:00 / 00:00:03 (11%) Cache: 

CompletedProcess(args='curl -X POST "http://localhost:8000/v1/audio/speech" -H "Content-Type: application/json" -d \'{"model": "tts-1", "input": "以下是一些中英文对照的话语。 1. 早上好！希望你有美好的一天。Good morning! Wish you a wonderful day. 2. 你好呀，最近怎么样？Hello there, how have you been recently? 3. 别放弃，你能做到的！Dont give up, you can do it! 4. 继续努力，你的付出会有回报的。Keep up the good work, your efforts will pay off.", "voice": "echo", "response_format": "ogg", "stream": true}\' -s | mpv --no-video -', returncode=0)