In [2]:
import nest_asyncio
nest_asyncio.apply()
import asyncio
import websockets
import json
import base64
import numpy as np
import soundfile as sf

async def test_tts():
    uri = "wss://2bfd8123d8a7.ngrok-free.app"  # Your server must be running first
    async with websockets.connect(uri) as websocket:
        # Send text chunks
        messages = [
            {"text": "Hello, this is a test of ", "flush": False},
            {"text": "the TTS websocket server.", "flush": True}  # flush=True triggers audio generation
        ]

        for msg in messages:
            await websocket.send(json.dumps(msg))
            print(f"Sent: {msg}")

        # Receive audio responses
        chunk_count = 0
        while True:
            try:
                response = await websocket.recv()
                data = json.loads(response)

                audio_b64 = data["audio"]
                audio_bytes = base64.b64decode(audio_b64)
                audio_np = np.frombuffer(audio_bytes, dtype=np.int16)

                # Save each chunk separately
                filename = f"output_chunk_{chunk_count}.wav"
                sf.write(filename, audio_np, 44100)
                print(f"Saved audio chunk: {filename}")

                # Print alignment info
                alignment = data["alignment"]
                print("Chars:", "".join(alignment["chars"]))
                print("Start times (ms):", alignment["char_start_times_ms"])
                print("Durations (ms):", alignment["char_durations_ms"])
                print("-" * 40)

                chunk_count += 1

            except websockets.ConnectionClosed:
                print("Connection closed")
                break

# Run inside notebook-friendly loop
loop = asyncio.get_event_loop()
loop.run_until_complete(test_tts())


Sent: {'text': 'Hello, this is a test of ', 'flush': False}
Sent: {'text': 'the TTS websocket server.', 'flush': True}
Saved audio chunk: output_chunk_0.wav
Chars: Hello, this is a
Start times (ms): [0, 80, 160, 240, 320, 400, 480, 560, 640, 720, 800, 880, 960, 1040, 1120, 1200]
Durations (ms): [80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80]
----------------------------------------
Saved audio chunk: output_chunk_1.wav
Chars: test of the TTS
Start times (ms): [0, 87, 175, 262, 350, 437, 525, 612, 700, 787, 875, 962, 1050, 1137, 1225]
Durations (ms): [87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87, 87]
----------------------------------------
Saved audio chunk: output_chunk_2.wav
Chars: websocket server.
Start times (ms): [0, 77, 154, 232, 309, 386, 463, 540, 617, 695, 772, 849, 926, 1003, 1080, 1158, 1235]
Durations (ms): [77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77]
----------------------------------------


KeyboardInterrupt: 

Connection closed


In [1]:
import nest_asyncio
nest_asyncio.apply()
import asyncio
import websockets
import json
import base64
import numpy as np
import soundfile as sf
import time

async def test_tts():
    uri = "ws://localhost:8111"  # Your server must be running first
    async with websockets.connect(uri) as websocket:
        # Send multiple sentences as separate chunks
        messages = [
            {"text": "Hello, this is a test of ", "flush": True},
            {"text": "the TTS websocket server.", "flush": True},
            {"text": "We are sending five sentences.", "flush": True},
            {"text": "Each sentence will be processed individually.", "flush": True},
            {"text": "This allows us to measure latency per chunk.", "flush": True},
        ]

        for msg in messages:
            send_time = time.time()  # Timestamp before sending
            await websocket.send(json.dumps(msg))
            print(f"Sent: {msg['text']}")

            # Receive audio responses for this chunk
            try:
                response = await websocket.recv()
                recv_time = time.time()  # Timestamp when first response arrives
                latency_ms = (recv_time - send_time) * 1000
                print(f"Latency for this chunk: {latency_ms:.1f} ms")

                data = json.loads(response)
                audio_b64 = data["audio"]
                audio_bytes = base64.b64decode(audio_b64)
                audio_np = np.frombuffer(audio_bytes, dtype=np.int16)

                # Save each chunk separately
                chunk_index = messages.index(msg)
                filename = f"output_chunk_{chunk_index}.wav"
                sf.write(filename, audio_np, 44100)
                print(f"Saved audio chunk: {filename}")

                # Print alignment info
                alignment = data["alignment"]
                print("Chars:", "".join(alignment["chars"]))
                print("Start times (ms):", alignment["char_start_times_ms"])
                print("Durations (ms):", alignment["char_durations_ms"])
                print("-" * 40)

            except websockets.ConnectionClosed:
                print("Connection closed")
                break

# Run inside notebook-friendly loop
loop = asyncio.get_event_loop()
loop.run_until_complete(test_tts())


Sent: Hello, this is a test of 
Latency for this chunk: 1482.3 ms
Saved audio chunk: output_chunk_0.wav
Chars: Hello, this is a
Start times (ms): [0, 78, 156, 234, 312, 390, 468, 546, 624, 702, 780, 858, 936, 1014, 1092, 1170]
Durations (ms): [78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, 78]
----------------------------------------
Sent: the TTS websocket server.
Latency for this chunk: 371.6 ms
Saved audio chunk: output_chunk_1.wav
Chars: test of
Start times (ms): [0, 105, 210, 315, 421, 526, 631]
Durations (ms): [105, 105, 105, 105, 105, 105, 105]
----------------------------------------
Sent: We are sending five sentences.
Latency for this chunk: 802.3 ms
Saved audio chunk: output_chunk_2.wav
Chars: the TTS websocket server.
Start times (ms): [0, 83, 166, 250, 333, 416, 499, 582, 666, 749, 832, 915, 998, 1082, 1165, 1248, 1331, 1414, 1498, 1581, 1664, 1747, 1830, 1914, 1997]
Durations (ms): [83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 83, 8