diff --git a/src/lib/common/ProfileDropdown.svelte b/src/lib/common/ProfileDropdown.svelte index 41d9c802..00fa0681 100644 --- a/src/lib/common/ProfileDropdown.svelte +++ b/src/lib/common/ProfileDropdown.svelte @@ -42,7 +42,8 @@ > handleAvatarLoad(e)} /> diff --git a/src/lib/common/audio-player/AudioPlayer.svelte b/src/lib/common/audio-player/AudioPlayer.svelte index 6de707bb..5e5f730e 100644 --- a/src/lib/common/audio-player/AudioPlayer.svelte +++ b/src/lib/common/audio-player/AudioPlayer.svelte @@ -242,7 +242,7 @@ if (loop === "none") { if (order === "list") { if ($playList.playingIndex < audios.length - 1) { - const promise = buildNextSongPromise(nextIdx); + const promise = buildNextAudioPromise(nextIdx); promise.then(() => play()); } else { $playList.playingIndex = ($playList.playingIndex + 1) % audios.length; @@ -257,13 +257,13 @@ } else { targetIdx = randomIdx; } - const promise = buildNextSongPromise(targetIdx); + const promise = buildNextAudioPromise(targetIdx); promise.then(() => play()); } } else if (loop === "one") { player.currentTime = 0; } else if (loop === "all") { - const promise = buildNextSongPromise(nextIdx); + const promise = buildNextAudioPromise(nextIdx); promise.then(() => play()); } }; @@ -271,7 +271,7 @@ /** * @param {number} idx */ - function buildNextSongPromise(idx) { + function buildNextAudioPromise(idx) { return new Promise((/** @type {any} */ resolve) => { $playList.playingIndex = idx; player.currentTime = 0; @@ -287,8 +287,8 @@ /** * @param {number} idx */ - function switchSong(idx) { - const promise = buildNextSongPromise(idx); + function switchAudio(idx) { + const promise = buildNextAudioPromise(idx); if (autoPlayNextOnClick) { promise.then(() => { play(); @@ -490,7 +490,7 @@ {#each $audioList as song, idx} -
  • switchSong(idx) }> +
  • switchAudio(idx) }> {#if idx === $playList.playingIndex} {/if} diff --git a/src/lib/helpers/realtime/pcmProcessor.js b/src/lib/helpers/realtime/pcmProcessor.js new file mode 100644 index 00000000..948973e8 --- /dev/null +++ b/src/lib/helpers/realtime/pcmProcessor.js @@ -0,0 +1,61 @@ + +export const AudioRecordingWorklet = ` +class AudioProcessingWorklet extends AudioWorkletProcessor { + + // send and clear buffer every 2048 samples, + // which at 16khz is about 8 times a second, + // or at 24khz is about 11 times a second + buffer = new Int16Array(2048); + + // current write index + bufferWriteIndex = 0; + + speaking = false; + threshold = 0.1; + + constructor() { + super(); + } + + /** + * @param inputs Float32Array[][] [input#][channel#][sample#] so to access first inputs 1st channel inputs[0][0] + * @param outputs Float32Array[][] + */ + process(inputs) { + if (inputs[0].length) { + const channel0 = inputs[0][0]; + this.speaking = channel0.some(sample => Math.abs(sample) >= this.threshold); + this.processChunk(channel0); + } + return true; + } + + sendAndClearBuffer(){ + this.port.postMessage({ + event: "chunk", + data: { + speaking: this.speaking, + int16arrayBuffer: this.buffer.slice(0, this.bufferWriteIndex).buffer, + }, + }); + this.bufferWriteIndex = 0; + } + + processChunk(float32Array) { + const l = float32Array.length; + + for (let i = 0; i < l; i++) { + // convert float32 -1 to 1 to int16 -32768 to 32767 + const int16Value = float32Array[i] * 32768; + this.buffer[this.bufferWriteIndex++] = int16Value; + if (this.bufferWriteIndex >= this.buffer.length) { + this.sendAndClearBuffer(); + } + } + + if (this.bufferWriteIndex >= this.buffer.length) { + this.sendAndClearBuffer(); + } + } +} +`; \ No newline at end of file diff --git a/src/lib/scss/custom/pages/_agent.scss b/src/lib/scss/custom/pages/_agent.scss index a16d0ecd..2cddf4b5 100644 --- a/src/lib/scss/custom/pages/_agent.scss +++ b/src/lib/scss/custom/pages/_agent.scss @@ -100,7 +100,7 @@ .agent-prompt-header { background-color: white; - padding: 20px; + padding: 15px; } .agent-prompt-body { diff --git a/src/lib/services/llm-realtime-service.js b/src/lib/services/llm-realtime-service.js index e90f2e6a..f5121575 100644 --- a/src/lib/services/llm-realtime-service.js +++ b/src/lib/services/llm-realtime-service.js @@ -1,7 +1,6 @@ import { endpoints } from '$lib/services/api-endpoints.js'; import { replaceUrl } from '$lib/helpers/http'; import axios from 'axios'; -import { json } from '@sveltejs/kit'; export const llmRealtime = { /** @type {RTCPeerConnection} */ diff --git a/src/lib/services/realtime-chat-service.js b/src/lib/services/realtime-chat-service.js new file mode 100644 index 00000000..df26040f --- /dev/null +++ b/src/lib/services/realtime-chat-service.js @@ -0,0 +1,237 @@ +import { PUBLIC_SERVICE_URL } from "$env/static/public"; +import { AudioRecordingWorklet } from "$lib/helpers/realtime/pcmProcessor"; + +// @ts-ignore +const AudioContext = window.AudioContext || window.webkitAudioContext; + +const sampleRate = 24000; + +/** @type {AudioContext} */ +let audioCtx = new AudioContext(); + +/** @type {any[]} */ +let audioQueue = []; + +/** @type {boolean} */ +let isPlaying = false; + +/** @type {WebSocket | null} */ +let socket = null; + +/** @type {MediaStream | null} */ +let mediaStream = null; + +/** @type {AudioWorkletNode | null} */ +let workletNode = null; + +/** @type {MediaStreamAudioSourceNode | null} */ +let micSource = null; + +export const realtimeChat = { + + /** + * @param {string} agentId + * @param {string} conversationId + */ + start(agentId, conversationId) { + reset(); + const wsUrl = buildWebsocketUrl(); + socket = new WebSocket(`${wsUrl}/chat/stream/${agentId}/${conversationId}`); + + socket.onopen = async () => { + console.log("WebSocket connected"); + + socket?.send(JSON.stringify({ + event: "start" + })); + + mediaStream = await navigator.mediaDevices.getUserMedia({ audio: true }); + audioCtx = new AudioContext({ sampleRate: sampleRate }); + + const workletName = "audio-recorder-worklet"; + const src = createWorkletFromSrc(workletName, AudioRecordingWorklet); + await audioCtx.audioWorklet.addModule(src); + + workletNode = new AudioWorkletNode(audioCtx, workletName); + micSource = audioCtx.createMediaStreamSource(mediaStream); + micSource.connect(workletNode); + + workletNode.port.onmessage = event => { + const arrayBuffer = event.data.data.int16arrayBuffer; + if (arrayBuffer && socket?.readyState === WebSocket.OPEN) { + if (event.data.data.speaking) { + reset(); + } + const arrayBufferString = arrayBufferToBase64(arrayBuffer); + socket.send(JSON.stringify({ + event: 'media', + body: { + payload: arrayBufferString + } + })); + } + }; + }; + + socket.onmessage = (/** @type {MessageEvent} */ e) => { + try { + const json = JSON.parse(e.data); + if (json.event === 'media' && !!json.media.payload) { + const data = json.media.payload; + enqueueAudioChunk(data); + } + } catch { + // console.error('Error when parsing message'); + } + }; + + socket.onclose = () => { + console.log("Websocket closed"); + }; + + socket.onerror = (/** @type {Event} */ e) => { + console.error('WebSocket error', e); + }; + }, + + stop() { + reset(); + + if (mediaStream) { + mediaStream.getTracks().forEach(t => t.stop()); + mediaStream = null; + } + + if (workletNode) { + micSource?.disconnect(workletNode); + workletNode.port.close(); + workletNode.disconnect(); + micSource = null; + workletNode = null; + } + + if (socket?.readyState === WebSocket.OPEN) { + socket.send(JSON.stringify({ + event: 'disconnect' + })); + socket.close(); + socket = null; + } + } +}; + + +function buildWebsocketUrl() { + let url = ''; + const host = PUBLIC_SERVICE_URL.split('://'); + + if (PUBLIC_SERVICE_URL.startsWith('https')) { + url = `wss:${host[1]}`; + } else if (PUBLIC_SERVICE_URL.startsWith('http')) { + url = `ws:${host[1]}`; + } + + return url; +} + +function reset() { + isPlaying = false; + audioQueue = []; +} + +/** + * @param {string} base64Audio + */ +function enqueueAudioChunk(base64Audio) { + const arrayBuffer = base64ToArrayBuffer(base64Audio); + const float32Data = convert16BitPCMToFloat32(arrayBuffer); + + const audioBuffer = audioCtx.createBuffer(1, float32Data.length, sampleRate); + audioBuffer.getChannelData(0).set(float32Data); + audioQueue.push(audioBuffer); + + if (!isPlaying) { + playNext(); + } +} + +function playNext() { + if (audioQueue.length === 0) { + isPlaying = false; + return; + } + + isPlaying = true; + const buffer = audioQueue.shift(); + + const source = audioCtx.createBufferSource(); + source.buffer = buffer; + source.connect(audioCtx.destination); + source.onended = () => { + playNext(); + }; + source.start(); +} + + +/** + * @param {string} workletName + * @param {string} workletSrc + */ +function createWorkletFromSrc(workletName, workletSrc) { + const script = new Blob( + [`registerProcessor("${workletName}", ${workletSrc})`], + { + type: "application/javascript", + }, + ); + + return URL.createObjectURL(script); +}; + + +/** + * @param {ArrayBuffer} buffer + */ +function arrayBufferToBase64(buffer) { + var binary = ""; + var bytes = new Uint8Array(buffer); + var len = bytes.byteLength; + for (var i = 0; i < len; i++) { + binary += String.fromCharCode(bytes[i]); + } + return btoa(binary); +}; + +/** + * @param {string} base64 + */ +function base64ToArrayBuffer(base64) { + const binaryStr = atob(base64); + const len = binaryStr.length; + const bytes = new Uint8Array(len); + + for (let i = 0; i < len; i++) { + bytes[i] = binaryStr.charCodeAt(i); + } + return bytes.buffer; +}; + +/** + * @param {ArrayBuffer} buffer + */ +function convert16BitPCMToFloat32(buffer) { + const chunk = new Uint8Array(buffer); + const output = new Float32Array(chunk.length / 2); + const dataView = new DataView(chunk.buffer); + + for (let i = 0; i< chunk.length / 2; i++) { + try { + const int16 = dataView.getInt16(i * 2, true); + output[i] = int16 / 32768; + } catch (e) { + console.error(e); + } + } + return output; +}; \ No newline at end of file diff --git a/src/routes/chat/[agentId]/[conversationId]/chat-box.svelte b/src/routes/chat/[agentId]/[conversationId]/chat-box.svelte index ced8e358..25f99160 100644 --- a/src/routes/chat/[agentId]/[conversationId]/chat-box.svelte +++ b/src/routes/chat/[agentId]/[conversationId]/chat-box.svelte @@ -46,7 +46,6 @@ } from '$env/static/public'; import { BOT_SENDERS, LEARNER_ID, TRAINING_MODE, USER_SENDERS, ADMIN_ROLES, IMAGE_DATA_PREFIX } from '$lib/helpers/constants'; import { signalr } from '$lib/services/signalr-service.js'; - import { llmRealtime } from '$lib/services/llm-realtime-service.js'; import { newConversation } from '$lib/services/conversation-service'; import DialogModal from '$lib/common/DialogModal.svelte'; import HeadTitle from '$lib/common/HeadTitle.svelte'; @@ -71,6 +70,7 @@ import PersistLog from './persist-log/persist-log.svelte'; import InstantLog from './instant-log/instant-log.svelte'; import LocalStorageManager from '$lib/helpers/utils/storage-manager'; + import { realtimeChat } from '$lib/services/realtime-chat-service'; const options = { @@ -673,13 +673,11 @@ if (disableSpeech) return; if (!isListening) { - llmRealtime.start(params.agentId, (/** @type {any} */ message) => { - console.log(message); - }); + realtimeChat.start(params.agentId, params.conversationId); isListening = true; microphoneIcon = "microphone"; } else { - llmRealtime.stop(); + realtimeChat.stop(); isListening = false; microphoneIcon = "microphone-off"; } diff --git a/src/routes/page/agent/[agentId]/+page.svelte b/src/routes/page/agent/[agentId]/+page.svelte index ddbf636c..0ca18306 100644 --- a/src/routes/page/agent/[agentId]/+page.svelte +++ b/src/routes/page/agent/[agentId]/+page.svelte @@ -165,15 +165,14 @@ // Templates function formatOriginalTemplates() { - const obj = agentTemplateCmp?.fetchOriginalTemplates(); + const templates = agentTemplateCmp?.fetchOriginalTemplates(); return { - templates: obj.templates || [] + templates: templates || [] } } function fetchTemplates() { - const obj = agentTemplateCmp?.fetchTemplates(); - agent.templates = obj.templates || []; + agent.templates = agentTemplateCmp?.fetchTemplates();; } function refreshTemplates() { diff --git a/src/routes/page/agent/[agentId]/agent-components/agent-template.svelte b/src/routes/page/agent/[agentId]/agent-components/agent-template.svelte index c192d707..70255c2d 100644 --- a/src/routes/page/agent/[agentId]/agent-components/agent-template.svelte +++ b/src/routes/page/agent/[agentId]/agent-components/agent-template.svelte @@ -15,12 +15,11 @@ export let handleAgentChange = () => {}; export const fetchOriginalTemplates = () => { - return { - templates: inner_templates?.map(x => ({ + const templates = inner_templates?.map(x => ({ name: x.name, content: x.content - })) || [] - }; + })) || []; + return templates; }; export const fetchTemplates = () => { @@ -36,9 +35,7 @@ } } - return { - templates: prompts - }; + return prompts; } export const refresh = () => init(); @@ -180,7 +177,7 @@ value={selected_template.content} rows={15} on:input={(e) => changePrompt(e)} - placeholder="Enter your template" + placeholder="Enter your content" /> {/if} diff --git a/src/routes/page/user/me/+page.svelte b/src/routes/page/user/me/+page.svelte index 89d0e02d..77720fa3 100644 --- a/src/routes/page/user/me/+page.svelte +++ b/src/routes/page/user/me/+page.svelte @@ -74,7 +74,8 @@ on:drop={e => handleFileDrop(e)} >