In [None]:
import asyncio
import nest_asyncio
import websockets
import json
import soundfile
import time

nest_asyncio.apply()

URI = "ws://localhost:7890/asr/streaming/pipeline"

async def send_audio_chunks(websocket, speech, chunk_frames: int = 9600):
    total_chunk_num = int(len(speech) - 1) // chunk_frames + 1
    for i in range(total_chunk_num):
        await asyncio.sleep(0.6)
        speech_chunk = speech[i * chunk_frames:(i + 1) * chunk_frames]
        data = {
            "chunk": speech_chunk.tolist(),
            "status": "processing",
            "is_final": i == total_chunk_num - 1
        }
        await websocket.send(json.dumps(data))
        print(f"Sent: success {str(speech_chunk[:5])}...")  # Print first 5 samples for brevity
        

    # Send end message
    end_message = {"status": "end", "chunk": []}
    await websocket.send(json.dumps(end_message))
    print(f"Sent: {end_message}")

async def receive_responses(websocket):
    results = []
    while True:
        try:
            response = await websocket.recv()
            response = json.loads(response)
            results.append({
                "action": "receive",
                "timestamp": time.time(),
                "data": response,
                "text": response.get("text", ""),
                "is_partial": response.get("is_partial", False)
            })
            print(f"Received: {response}")
        except websockets.ConnectionClosed:
            print("WebSocket connection closed.")
            break
    return results

async def stream_asr(speech, chunk_frames: int = 9600):
    async with websockets.connect(URI) as websocket:
        # Run send_audio_chunks and receive_responses concurrently
        send_task = send_audio_chunks(websocket, speech, chunk_frames)
        receive_task = receive_responses(websocket)
        results = await asyncio.gather(send_task, receive_task)
    return results[1]  # Return the results from receive_responses

if __name__ == "__main__":

    wav_file = '/data1/wanghuan/audio_services/cache/recording.wav'
    speech, _ = soundfile.read(wav_file)
    res = asyncio.run(stream_asr(speech))
    print('Got result', res)

Sent: success [0. 0. 0. 0. 0.]...
Sent: success [0. 0. 0. 0. 0.]...
Sent: success [ 0.16900635  0.31655884 -0.48492432 -0.35516357  0.07397461]...
Sent: success [ 0.00326538  0.0012207  -0.00137329 -0.00039673  0.0005188 ]...
Received: {'conversation_id': '8ccada5c-0df7-4cb9-9432-d53e42c0aecf', 'chunk_id': 1, 'start_time': 0, 'end_time': 600, 'text': '', 'is_partial': True}
Received: {'conversation_id': '8ccada5c-0df7-4cb9-9432-d53e42c0aecf', 'chunk_id': 2, 'start_time': 600, 'end_time': 1200, 'text': '', 'is_partial': True}
Received: {'conversation_id': '8ccada5c-0df7-4cb9-9432-d53e42c0aecf', 'chunk_id': 3, 'start_time': 1200, 'end_time': 1800, 'text': '你好你', 'is_partial': True}
Sent: success [ 0.00030518  0.00024414 -0.00018311 -0.00021362  0.        ]...
Received: {'conversation_id': '8ccada5c-0df7-4cb9-9432-d53e42c0aecf', 'chunk_id': 4, 'start_time': 1800, 'end_time': 2400, 'text': '好', 'is_partial': True}
Sent: success [-6.10351562e-05 -9.15527344e-04 -1.15966797e-03  1.83105469e-