From 6b46184ff1ec1c4dd1b2216190a7039b593db3ae Mon Sep 17 00:00:00 2001
From: Nihhaar0002 <msnrk2006@gmail.com>
Date: Sun, 23 Nov 2025 12:54:07 +0530
Subject: [PATCH 1/4] Update AI.js

---
 src/puter-js/src/modules/AI.js | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js
index c708ea892e..c8b1c5d487 100644
--- a/src/puter-js/src/modules/AI.js
+++ b/src/puter-js/src/modules/AI.js
@@ -804,12 +804,24 @@ class AI{
             options = args[0]
         }
 
-        let AIService = "openai-image-generation"
+                let AIService = "openai-image-generation";
+
+        // Backwards compat: old alias
         if (options.model === "nano-banana") 
             options.model = "gemini-2.5-flash-image-preview";
 
-        if (options.model === "gemini-2.5-flash-image-preview")
+        // New alias for Gemini 3 Pro image
+        if (options.model === "nano-banana-pro")
+            options.model = "gemini-3-pro-image-preview";
+
+        // Use Gemini image generation driver for both Gemini image models
+        if (
+            options.model === "gemini-2.5-flash-image-preview" ||
+            options.model === "gemini-3-pro-image-preview"
+        ) {
             AIService = "gemini-image-generation";
+        }
+
         // Call the original chat.complete method
         return await utils.make_driver_method(['prompt'], 'puter-image-generation', AIService, 'generate', {
             responseType: 'blob',

From e4482b6a70d2b3bfb494c68d6be0336cda26da38 Mon Sep 17 00:00:00 2001
From: Nihhaar0002 <msnrk2006@gmail.com>
Date: Sun, 23 Nov 2025 12:59:00 +0530
Subject: [PATCH 2/4] Update GeminiImageGenerationService.js

---
 .../puterai/GeminiImageGenerationService.js    | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/src/backend/src/modules/puterai/GeminiImageGenerationService.js b/src/backend/src/modules/puterai/GeminiImageGenerationService.js
index a4d55d4bf8..a7097a333a 100644
--- a/src/backend/src/modules/puterai/GeminiImageGenerationService.js
+++ b/src/backend/src/modules/puterai/GeminiImageGenerationService.js
@@ -37,11 +37,15 @@ class GeminiImageGenerationService extends BaseService {
     static MODULES = {
     };
 
-    _construct() {
+      _construct() {
         this.models_ = {
             'gemini-2.5-flash-image-preview': {
                 '1024x1024': 0.039,
             },
+            'gemini-3-pro-image-preview': {
+                // using same price as 2.5 flash for now
+                '1024x1024': 0.039,
+            },
         };
     }
 
@@ -192,9 +196,10 @@ class GeminiImageGenerationService extends BaseService {
         }
 
         const response = await this.genAI.models.generateContent({
-            model: 'gemini-2.5-flash-image-preview',
-            contents: contents,
+            model,
+            contents,
         });
+
         // Metering usage tracking
         // Gemini usage: always 1 image, resolution, cost, model
         this.meteringService.incrementUsage(actor, usageType, 1);
@@ -222,11 +227,16 @@ class GeminiImageGenerationService extends BaseService {
      * @private
      */
     _getValidRatios(model) {
-        if ( model === 'gemini-2.5-flash-image-preview' ) {
+        if (
+            model === 'gemini-2.5-flash-image-preview' ||
+            model === 'gemini-3-pro-image-preview'
+        ) {
             return [this.constructor.RATIO_SQUARE];
         }
+        return [];
     }
 
+
     _validate_ratio(ratio, model) {
         const validRatios = this._getValidRatios(model);
         return validRatios.includes(ratio);

From 331d25d00d524378254d5ee52a30fc1f04b78e7f Mon Sep 17 00:00:00 2001
From: Nihhaar0002 <msnrk2006@gmail.com>
Date: Mon, 24 Nov 2025 12:46:11 +0530
Subject: [PATCH 3/4] Add streaming usage totals to AI chunks

---
 .../src/modules/puterai/ClaudeService.js      | 55 ++++++++++++++--
 .../src/modules/puterai/lib/OpenAIUtil.js     | 65 +++++++++++++++++--
 2 files changed, 111 insertions(+), 9 deletions(-)

diff --git a/src/backend/src/modules/puterai/ClaudeService.js b/src/backend/src/modules/puterai/ClaudeService.js
index c8b83f52b5..877462c244 100644
--- a/src/backend/src/modules/puterai/ClaudeService.js
+++ b/src/backend/src/modules/puterai/ClaudeService.js
@@ -247,6 +247,50 @@ class ClaudeService extends BaseService {
                     const init_chat_stream = async ({ chatStream }) => {
                         const completion = await anthropic.messages.stream(sdk_params);
                         const usageSum = {};
+                        const runningUsage = {
+                            input_tokens: 0,
+                            output_tokens: 0,
+                            total_tokens: 0,
+                        };
+
+                        // Each emitted content block now carries an incremental usage object
+                        // ({ input_tokens, output_tokens, total_tokens }) for live metering.
+                        const getUsage = () => ({
+                            input_tokens: runningUsage.input_tokens,
+                            output_tokens: runningUsage.output_tokens,
+                            total_tokens: runningUsage.total_tokens,
+                        });
+
+                        const enhanceTextBlock = (block) => {
+                            block.addText = (text) => {
+                                const payload = {
+                                    type: 'text',
+                                    text,
+                                    usage: getUsage(),
+                                };
+                                block.chatStream.stream.write(JSON.stringify(payload) + '\n');
+                            };
+                            return block;
+                        };
+
+                        const enhanceToolBlock = (block) => {
+                            const originalAddPartialJSON = block.addPartialJSON?.bind(block);
+                            if ( originalAddPartialJSON ) {
+                                block.addPartialJSON = (partial_json) => originalAddPartialJSON(partial_json);
+                            }
+                            block.end = () => {
+                                const buffer = (block.buffer || '').trim() === '' ? '{}' : block.buffer;
+                                const payload = {
+                                    ...block.contentBlock,
+                                    input: JSON.parse(buffer),
+                                    ...(block.contentBlock?.text ? {} : { text: '' }),
+                                    type: 'tool_use',
+                                    usage: getUsage(),
+                                };
+                                block.chatStream.stream.write(JSON.stringify(payload) + '\n');
+                            };
+                            return block;
+                        };
 
                         let message, contentBlock;
                         for await ( const event of completion ) {
@@ -257,6 +301,9 @@ class ClaudeService extends BaseService {
                                 if ( ! usageSum[key] ) usageSum[key] = 0;
                                 usageSum[key] += meteredData[key];
                             });
+                            runningUsage.input_tokens += meteredData.input_tokens || 0;
+                            runningUsage.output_tokens += meteredData.output_tokens || 0;
+                            runningUsage.total_tokens = runningUsage.input_tokens + runningUsage.output_tokens;
 
                             if ( event.type === 'message_start' ) {
                                 message = chatStream.message();
@@ -270,16 +317,16 @@ class ClaudeService extends BaseService {
 
                             if ( event.type === 'content_block_start' ) {
                                 if ( event.content_block.type === 'tool_use' ) {
-                                    contentBlock = message.contentBlock({
+                                    contentBlock = enhanceToolBlock(message.contentBlock({
                                         type: event.content_block.type,
                                         id: event.content_block.id,
                                         name: event.content_block.name,
-                                    });
+                                    }));
                                     continue;
                                 }
-                                contentBlock = message.contentBlock({
+                                contentBlock = enhanceTextBlock(message.contentBlock({
                                     type: event.content_block.type,
-                                });
+                                }));
                                 continue;
                             }
 
diff --git a/src/backend/src/modules/puterai/lib/OpenAIUtil.js b/src/backend/src/modules/puterai/lib/OpenAIUtil.js
index 045d04c840..de2a68dc68 100644
--- a/src/backend/src/modules/puterai/lib/OpenAIUtil.js
+++ b/src/backend/src/modules/puterai/lib/OpenAIUtil.js
@@ -91,6 +91,8 @@ const create_chat_stream_handler = ({
     completion,
     usage_calculator,
 }) => async ({ chatStream }) => {
+    // Streaming chunks now include a running usage object ({ prompt_tokens, completion_tokens, total_tokens })
+    // so downstream consumers can surface live token counts without changing existing fields.
     deviations = Object.assign({
         // affected by: Groq
         index_usage_from_stream_chunk: chunk => chunk.usage,
@@ -106,6 +108,50 @@ const create_chat_stream_handler = ({
     const tool_call_blocks = [];
 
     let last_usage = null;
+    const usage_totals = {
+        prompt_tokens: 0,
+        completion_tokens: 0,
+        total_tokens: 0,
+    };
+
+    const getUsage = () => ({
+        prompt_tokens: usage_totals.prompt_tokens,
+        completion_tokens: usage_totals.completion_tokens,
+        total_tokens: usage_totals.total_tokens,
+    });
+
+    const enhanceTextBlock = (block) => {
+        block.addText = (text) => {
+            const payload = {
+                type: 'text',
+                text,
+                usage: getUsage(),
+            };
+            block.chatStream.stream.write(JSON.stringify(payload) + '\n');
+        };
+        return block;
+    };
+
+    const enhanceToolBlock = (block) => {
+        const originalAddPartialJSON = block.addPartialJSON?.bind(block);
+        if ( originalAddPartialJSON ) {
+            block.addPartialJSON = (partial_json) => originalAddPartialJSON(partial_json);
+        }
+        block.end = () => {
+            const buffer = (block.buffer || '').trim() === '' ? '{}' : block.buffer;
+            const payload = {
+                ...block.contentBlock,
+                input: JSON.parse(buffer),
+                ...(block.contentBlock?.text ? {} : { text: '' }),
+                type: 'tool_use',
+                usage: getUsage(),
+            };
+            block.chatStream.stream.write(JSON.stringify(payload) + '\n');
+        };
+        return block;
+    };
+
+    textblock = enhanceTextBlock(textblock);
     for await ( let chunk of completion ) {
         chunk = deviations.chunk_but_like_actually(chunk);
         if ( process.env.DEBUG ) {
@@ -115,7 +161,15 @@ const create_chat_stream_handler = ({
                             delta && JSON.stringify(delta));
         }
         const chunk_usage = deviations.index_usage_from_stream_chunk(chunk);
-        if ( chunk_usage ) last_usage = chunk_usage;
+        if ( chunk_usage ) {
+            usage_totals.prompt_tokens += chunk_usage.prompt_tokens ?? 0;
+            usage_totals.completion_tokens += chunk_usage.completion_tokens ?? 0;
+            usage_totals.total_tokens = usage_totals.prompt_tokens + usage_totals.completion_tokens;
+            last_usage = {
+                ...chunk_usage,
+                ...getUsage(),
+            };
+        }
         if ( chunk.choices.length < 1 ) continue;
 
         const choice = chunk.choices[0];
@@ -124,7 +178,7 @@ const create_chat_stream_handler = ({
             if ( mode === 'tool' ) {
                 toolblock.end();
                 mode = 'text';
-                textblock = message.contentBlock({ type: 'text' });
+                textblock = enhanceTextBlock(message.contentBlock({ type: 'text' }));
             }
             textblock.addText(choice.delta.content);
             continue;
@@ -138,11 +192,11 @@ const create_chat_stream_handler = ({
             }
             for ( const tool_call of tool_calls ) {
                 if ( ! tool_call_blocks[tool_call.index] ) {
-                    toolblock = message.contentBlock({
+                    toolblock = enhanceToolBlock(message.contentBlock({
                         type: 'tool_use',
                         id: tool_call.id,
                         name: tool_call.function.name,
-                    });
+                    }));
                     tool_call_blocks[tool_call.index] = toolblock;
                 } else {
                     toolblock = tool_call_blocks[tool_call.index];
@@ -152,8 +206,9 @@ const create_chat_stream_handler = ({
         }
     }
 
+    const final_usage = last_usage ?? getUsage();
     // TODO DS: this is a bit too abstracted... this is basically just doing the metering now
-    usage_calculator({ usage: last_usage });
+    usage_calculator({ usage: final_usage });
 
     if ( mode === 'text' ) textblock.end();
     if ( mode === 'tool' ) toolblock.end();

From eb900be8846545bf4952e9ce5704ce061eee4ad8 Mon Sep 17 00:00:00 2001
From: Nihhaar Saini <msnrk2006@hmail.com>
Date: Mon, 24 Nov 2025 13:55:45 +0530
Subject: [PATCH 4/4] Enable usage in OpenAI streamed chat completions

---
 .../OpenAICompletionService.mjs               | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs b/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs
index bd845f37e7..db22d43d71 100644
--- a/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs
+++ b/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs
@@ -253,17 +253,18 @@ export class OpenAICompletionService {
         messages = await OpenAIUtil.process_input_messages(messages);
 
         const completion = await this.#openAi.chat.completions.create({
-            user: user_private_uid,
-            messages: messages,
-            model: model,
-            ...(tools ? { tools } : {}),
-            ...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
-            ...(temperature ? { temperature } : {}),
-            stream,
-            ...(stream ? {
-                stream_options: { include_usage: true },
-            } : {}),
-        });
+    user: user_private_uid,
+    messages: messages,
+    model: model,
+    ...(tools ? { tools } : {}),
+    ...(max_tokens ? { max_completion_tokens: max_tokens } : {}),
+    ...(temperature ? { temperature } : {}),
+    stream,
+    ...(stream ? {
+        stream_options: { include_usage: true },
+    } : {}),
+});
+
         // TODO DS: simplify this logic for all the ai services, each service should handle its cost calculation in the service
         // for now I'm overloading this usage calculator to handle the future promise resolution...
         return OpenAIUtil.handle_completion_output({