diff --git a/llm-server/src/llm-provider.ts b/llm-server/src/llm-provider.ts index b3a033b3..bf2d2069 100644 --- a/llm-server/src/llm-provider.ts +++ b/llm-server/src/llm-provider.ts @@ -104,13 +104,27 @@ export class LLMProvider { return model; } - async chat(input: MessageInput): Promise { + async chat(input: MessageInput, timeoutMs: number): Promise { try { const model = this.getModelInstance(input.model); - const completion = await model.chat(input.messages); - return completion.choices[0].message.content || ''; + + // Set a timeout dynamically based on the provided value + const timeoutPromise = new Promise((_, reject) => + setTimeout( + () => reject(new Error('Chat request timed out')), + timeoutMs, + ), + ); + + // Race between the actual model call and the timeout + const completion = await Promise.race([ + model.chat(input.messages), + timeoutPromise, + ]); + + return (completion as any).choices[0].message.content || ''; } catch (error) { - this.logger.error('Error in chat:', error); + this.logger.error(`Error in chat (Timeout: ${timeoutMs}ms):`, error); throw error; } } diff --git a/llm-server/src/main.ts b/llm-server/src/main.ts index 289caf53..a16701c4 100644 --- a/llm-server/src/main.ts +++ b/llm-server/src/main.ts @@ -137,7 +137,8 @@ export class App { } } else { // Handle regular response - const response = await this.llmProvider.chat(input); + // TODO make it to dynamic Now is 200 second by defult. + const response = await this.llmProvider.chat(input, 200000); res.json({ model: input.model, choices: [ diff --git a/llm-server/src/model/remote-model-instance.ts b/llm-server/src/model/remote-model-instance.ts index f75876a6..def1e589 100644 --- a/llm-server/src/model/remote-model-instance.ts +++ b/llm-server/src/model/remote-model-instance.ts @@ -30,7 +30,6 @@ export class RemoteOpenAIModelEngine implements ModelInstance { interval: 1000, // per 1000ms (1 second) carryoverConcurrencyCount: true, // Carry over pending tasks // FIXME: hack way to set up timeout - timeout: 120000, // 120 second timeout to accommodate longer streams }); // Log queue events for monitoring