diff --git a/llm-server/src/llm-provider.ts b/llm-server/src/llm-provider.ts
index b3a033b3..bf2d2069 100644
--- a/llm-server/src/llm-provider.ts
+++ b/llm-server/src/llm-provider.ts
@@ -104,13 +104,27 @@ export class LLMProvider {
     return model;
   }
 
-  async chat(input: MessageInput): Promise<string> {
+  async chat(input: MessageInput, timeoutMs: number): Promise<string> {
     try {
       const model = this.getModelInstance(input.model);
-      const completion = await model.chat(input.messages);
-      return completion.choices[0].message.content || '';
+
+      // Set a timeout dynamically based on the provided value
+      const timeoutPromise = new Promise<string>((_, reject) =>
+        setTimeout(
+          () => reject(new Error('Chat request timed out')),
+          timeoutMs,
+        ),
+      );
+
+      // Race between the actual model call and the timeout
+      const completion = await Promise.race([
+        model.chat(input.messages),
+        timeoutPromise,
+      ]);
+
+      return (completion as any).choices[0].message.content || '';
     } catch (error) {
-      this.logger.error('Error in chat:', error);
+      this.logger.error(`Error in chat (Timeout: ${timeoutMs}ms):`, error);
       throw error;
     }
   }
diff --git a/llm-server/src/main.ts b/llm-server/src/main.ts
index 289caf53..a16701c4 100644
--- a/llm-server/src/main.ts
+++ b/llm-server/src/main.ts
@@ -137,7 +137,8 @@ export class App {
         }
       } else {
         // Handle regular response
-        const response = await this.llmProvider.chat(input);
+        // TODO make it to dynamic Now is 200 second by defult.
+        const response = await this.llmProvider.chat(input, 200000);
         res.json({
           model: input.model,
           choices: [
diff --git a/llm-server/src/model/remote-model-instance.ts b/llm-server/src/model/remote-model-instance.ts
index f75876a6..def1e589 100644
--- a/llm-server/src/model/remote-model-instance.ts
+++ b/llm-server/src/model/remote-model-instance.ts
@@ -30,7 +30,6 @@ export class RemoteOpenAIModelEngine implements ModelInstance {
       interval: 1000, // per 1000ms (1 second)
       carryoverConcurrencyCount: true, // Carry over pending tasks
       // FIXME: hack way to set up timeout
-      timeout: 120000, // 120 second timeout to accommodate longer streams
     });
 
     // Log queue events for monitoring