Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions packages/lib/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@palabra-ai/translator",
"version": "0.0.5",
"version": "0.0.6",
"private": false,
"main": "dist/lib.js",
"types": "dist/index.d.ts",
Expand All @@ -20,7 +20,8 @@
},
"dependencies": {
"livekit-client": "2.13.0",
"typed-emitter": "^2.1.0"
"typed-emitter": "^2.1.0",
"ts-deepmerge": "^7.0.3"
},
"devDependencies": {
"@eslint/js": "^9.28.0",
Expand Down
8 changes: 6 additions & 2 deletions packages/lib/src/PalabraClient.model.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { SourceLangCode } from '~/utils/source';
import { TargetLangCode } from '~/utils/target';
import { PipelineConfigManager } from './config/PipelineConfigManager';

export interface ClientCredentialsAuth {
clientId: string;
Expand All @@ -10,16 +11,19 @@ export interface UserTokenAuth {
userToken: string;
}

export interface PalabraClientData {

// eslint-disable-next-line @typescript-eslint/no-explicit-any
export interface PalabraClientData<CM extends PipelineConfigManager<any> = PipelineConfigManager<any>> {
auth:ClientCredentialsAuth | UserTokenAuth;
translateFrom: SourceLangCode;
translateTo: TargetLangCode;
handleOriginalTrack: () => Promise<MediaStreamTrack>;
transportType?: 'webrtc'; // TODO: add websocket transport | 'websocket'
transportType?: 'webrtc';
apiBaseUrl?: string;
intent?:string;
audioContext?:AudioContext;
ignoreAudioContext?:boolean;
configManager?: CM;
}

export type TrackSid = string;
30 changes: 17 additions & 13 deletions packages/lib/src/PalabraClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ import { supportsAudioContextSetSinkId, VolumeNode } from './utils';
import { ConnectionState } from 'livekit-client';
import { PipelineConfig } from './config';

export class PalabraClient extends PalabraBaseEventEmitter {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
export class PalabraClient<CM extends PipelineConfigManager<any> = PipelineConfigManager<unknown>> extends PalabraBaseEventEmitter {
private translateFrom: SourceLangCode;
private translateTo: TargetLangCode;
private auth: PalabraClientData['auth'];
Expand All @@ -30,7 +31,7 @@ export class PalabraClient extends PalabraBaseEventEmitter {
private originalTrackVolumeNode: VolumeNode | null = null;
public transport: PalabraWebRtcTransport | null = null;
private transportType: PalabraClientData['transportType'];
private configManager: PipelineConfigManager;
private configManager: CM;
private audioContext: AudioContext;


Expand All @@ -50,7 +51,7 @@ export class PalabraClient extends PalabraBaseEventEmitter {

private ignoreAudioContext: PalabraClientData['ignoreAudioContext'];

constructor(data: PalabraClientData) {
constructor(data: PalabraClientData<CM>) {
super();

this.auth = data.auth;
Expand All @@ -61,20 +62,21 @@ export class PalabraClient extends PalabraBaseEventEmitter {

this.transportType = data.transportType ?? 'webrtc';

this.initConfig();

this.shouldPlayTranslation = false;

this.ignoreAudioContext = data.ignoreAudioContext ?? false;

if (data.audioContext) {
this.audioContext = data.audioContext;
if (data.configManager) {
this.configManager = data.configManager;
}

this.initConfig();

this.initAudioContext(data.audioContext);
}

public async startTranslation(): Promise<boolean> {
try {
this.initAudioContext();
await this.wrapOriginalTrack();
const transport = await this.createSession();
this.initTransportHandlers();
Expand All @@ -93,7 +95,6 @@ export class PalabraClient extends PalabraBaseEventEmitter {
await this.deleteSession();
this.transport = null;
this.stopPlayback();
this.closeAudioContext();
this.cleanUnusedTracks([]);
this.translationStatus = 'stopped';
this.cleanupOriginalTrack();
Expand Down Expand Up @@ -247,7 +248,7 @@ export class PalabraClient extends PalabraBaseEventEmitter {
return this.transport;
}

public getConfigManager() {
public getConfigManager(): CM {
return this.configManager;
}

Expand Down Expand Up @@ -317,6 +318,7 @@ export class PalabraClient extends PalabraBaseEventEmitter {

public async cleanup() {
await this.stopTranslation();
this.closeAudioContext();
this.initConfig();
}

Expand All @@ -337,9 +339,9 @@ export class PalabraClient extends PalabraBaseEventEmitter {
});
}

private async initAudioContext() {
private async initAudioContext(audioContext?: AudioContext) {
if (this.audioContext || this.ignoreAudioContext) return;
this.audioContext = new AudioContext();
this.audioContext = audioContext ?? new AudioContext();
}

private closeAudioContext() {
Expand All @@ -348,7 +350,9 @@ export class PalabraClient extends PalabraBaseEventEmitter {
}

private initConfig() {
this.configManager = new PipelineConfigManager(this.transportType);
if (!this.configManager) {
this.configManager = new PipelineConfigManager() as CM;
}

this.configManager.setSourceLanguage(this.translateFrom as SourceLangCode);
this.configManager.addTranslationTarget({ target_language: this.translateTo as TargetLangCode });
Expand Down
83 changes: 83 additions & 0 deletions packages/lib/src/__tests__/PalabraClient.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import { PalabraClient } from '../PalabraClient';
import type { TargetLangCode } from '../utils/target';
import type { SourceLangCode } from '../utils/source';
import { EVENT_START_TRANSLATION, EVENT_STOP_TRANSLATION } from '../transport/PalabraWebRtcTransport.model';
import { PipelineConfigManager } from '~/config';

// Mock MediaStreamTrack for tests
class MockMediaStreamTrack {
Expand Down Expand Up @@ -140,6 +141,88 @@ describe('PalabraClient', () => {
client = new PalabraClient(baseConstructorData);
});

describe('ConfigManager', () => {
it('should set value and get value with extensions', ()=>{
const manager = new PipelineConfigManager({ initialExtension: { testProp: 12 } });

const cl = new PalabraClient({ ...baseConstructorData, configManager: manager });
expect((cl.getConfigManager()).getValue('testProp')).toBe(12);

(cl.getConfigManager()).setValue('testProp', 13);
expect((cl.getConfigManager()).getValue('testProp')).toBe(13);

manager.setValue('testProp', 14);
expect((cl.getConfigManager()).getValue('testProp')).toBe(14);
});

it('should set value and get value without extensions', ()=>{
const cl = new PalabraClient({ ...baseConstructorData });
expect((cl.getConfigManager()).getValue('preprocessing.enable_vad')).toBe(true);
});
});

describe('AudioContext', () => {
it('should not close AudioContext in stopTranslation', async () => {
const closeAudioContextSpy = vi.spyOn(client as unknown as { closeAudioContext: () => void }, 'closeAudioContext').mockImplementation(() => undefined);
await client.stopTranslation();
expect(closeAudioContextSpy).not.toHaveBeenCalled();
});

it('should not call close method on AudioContext in stopTranslation', async() => {
const ctx = new AudioContext();
const closeAudioContextSpy = vi.spyOn(ctx, 'close').mockImplementation(() => undefined);
const localClient = new PalabraClient({ ...baseConstructorData, audioContext: ctx });
await localClient.stopTranslation();
expect(closeAudioContextSpy).not.toHaveBeenCalled();
});

it('should close AudioContext in cleanup', async () => {
const closeAudioContextSpy = vi.spyOn(client as unknown as { closeAudioContext: () => void }, 'closeAudioContext').mockImplementation(() => undefined);
await client.cleanup();
expect(closeAudioContextSpy).toHaveBeenCalled();
});

it('should ignore AudioContext creation when ignoreAudioContext is true and audioContext is provided', async() => {
const ctx = new AudioContext();
const localClient = new PalabraClient({ ...baseConstructorData, audioContext: ctx, ignoreAudioContext: true });
// @ts-expect-error: audioContext is private
expect(localClient.audioContext).toBeUndefined();
});

it('should create a new AudioContext when ignoreAudioContext is false and audioContext is not provided', async() => {
const localClient = new PalabraClient({ ...baseConstructorData, ignoreAudioContext: false });
// @ts-expect-error: audioContext is private
expect(localClient.audioContext).toBeDefined();
});

it('should use provided AudioContext', async() => {
const ctx = new AudioContext();
// @ts-expect-error field for test
ctx.field = 'test';

const localClient = new PalabraClient({ ...baseConstructorData, audioContext: ctx, ignoreAudioContext: false });
// @ts-expect-error: audioContext is private
expect(localClient.audioContext).toBeDefined();
// @ts-expect-error: field for test
expect(localClient.audioContext.field).toBe('test');
});

it('should create a new AudioContext when audioContext is not provided', async() => {
const localClient = new PalabraClient(baseConstructorData);
await localClient.startTranslation();
// @ts-expect-error: audioContext is private
expect(localClient.audioContext).toBeDefined();
});
});

it('should get api client', () => {
expect(client.getApiClient()).toBeDefined();
});

it('should get config manager', () => {
expect(client.getConfigManager()).toBeDefined();
});

it('should create a new PalabraClient', () => {
expect(client).toBeDefined();
});
Expand Down
59 changes: 2 additions & 57 deletions packages/lib/src/config/PipelineConfig.model.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export interface StreamConfigBase {
}

export interface StreamConfigWebRtc extends StreamConfigBase {
content_type: 'audio';
source?: {
type: 'webrtc';
};
Expand Down Expand Up @@ -43,55 +44,18 @@ export interface PreprocessingConfig {

export interface SentenceSplitterConfig {
enabled: boolean;
splitter_model: 'auto' | string;
advanced: {
min_sentence_characters: number;
min_sentence_seconds: number;
min_split_interval: number;
context_size: number;
segments_after_restart: number;
step_size: number;
max_steps_without_eos: number;
force_end_of_segment: number;
};
}

export interface VerificationConfig {
verification_model: 'auto' | string;
allow_verification_glossaries: boolean;
auto_transcription_correction: boolean;
transcription_correction_style: string | null;
}

export interface TranscriptionAdvancedConfig {
filler_phrases: {
enabled: boolean;
min_transcription_len: number;
min_transcription_time: number;
phrase_chance: number;
};
ignore_languages: SourceLangCode[];
}

export interface TranscriptionConfig {
source_language: SourceLangCode;
detectable_languages: SourceLangCode[];
asr_model: 'auto' | string;
denoise: 'none' | string;
allow_hotwords_glossaries: boolean;
supress_numeral_tokens: boolean;
diarize_speakers: boolean;
priority: 'normal' | string;
min_alignment_score: number;
max_alignment_cer: number;
segment_confirmation_silence_threshold: number;
only_confirm_by_silence: boolean;
batched_inference: boolean;
force_detect_language: boolean;
calculate_voice_loudness: boolean;
sentence_splitter: SentenceSplitterConfig;
verification: VerificationConfig;
advanced: TranscriptionAdvancedConfig;
}

export type AddTranslationArgs = Partial<Omit<TranslationConfig, 'target_language'>> & Pick<TranslationConfig, 'target_language'>;
Expand All @@ -102,33 +66,15 @@ export interface VoiceTimbreDetectionConfig {
low_timbre_voices: string[];
}

export interface SpeechGenerationAdvancedConfig {
f0_variance_factor: number;
energy_variance_factor: number;
with_custom_stress: boolean;
}

export interface SpeechGenerationConfig {
tts_model: 'auto' | string;
voice_cloning: boolean;
voice_cloning_mode: 'static_10' | string;
denoise_voice_samples: boolean;
voice_id: string;
voice_timbre_detection: VoiceTimbreDetectionConfig;
speech_tempo_auto: boolean;
speech_tempo_timings_factor: number;
speech_tempo_adjustment_factor: number;
advanced: SpeechGenerationAdvancedConfig;
}

export interface TranslationConfig {
target_language: TargetLangCode;
allowed_source_languages: SourceLangCode[];
translation_model: 'auto' | 'alpha' | string;
allow_translation_glossaries: boolean;
style: string | null;
translate_partial_transcriptions: boolean;
advanced: Record<string, unknown>;
speech_generation: SpeechGenerationConfig;
}

Expand All @@ -146,8 +92,7 @@ export type AllowedMessageTypes = (string
| 'translated_transcription'
| 'partial_translated_transcription'
| 'partial_transcription'
| 'validated_transcription'
| 'pipeline_timings');
| 'validated_transcription');

export interface PipelineConfig {
input_stream: StreamConfig;
Expand Down
Loading