From 6b46184ff1ec1c4dd1b2216190a7039b593db3ae Mon Sep 17 00:00:00 2001 From: Nihhaar0002 Date: Sun, 23 Nov 2025 12:54:07 +0530 Subject: [PATCH 1/4] Update AI.js --- src/puter-js/src/modules/AI.js | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js index c708ea892e..c8b1c5d487 100644 --- a/src/puter-js/src/modules/AI.js +++ b/src/puter-js/src/modules/AI.js @@ -804,12 +804,24 @@ class AI{ options = args[0] } - let AIService = "openai-image-generation" + let AIService = "openai-image-generation"; + + // Backwards compat: old alias if (options.model === "nano-banana") options.model = "gemini-2.5-flash-image-preview"; - if (options.model === "gemini-2.5-flash-image-preview") + // New alias for Gemini 3 Pro image + if (options.model === "nano-banana-pro") + options.model = "gemini-3-pro-image-preview"; + + // Use Gemini image generation driver for both Gemini image models + if ( + options.model === "gemini-2.5-flash-image-preview" || + options.model === "gemini-3-pro-image-preview" + ) { AIService = "gemini-image-generation"; + } + // Call the original chat.complete method return await utils.make_driver_method(['prompt'], 'puter-image-generation', AIService, 'generate', { responseType: 'blob', From e4482b6a70d2b3bfb494c68d6be0336cda26da38 Mon Sep 17 00:00:00 2001 From: Nihhaar0002 Date: Sun, 23 Nov 2025 12:59:00 +0530 Subject: [PATCH 2/4] Update GeminiImageGenerationService.js --- .../puterai/GeminiImageGenerationService.js | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/backend/src/modules/puterai/GeminiImageGenerationService.js b/src/backend/src/modules/puterai/GeminiImageGenerationService.js index a4d55d4bf8..a7097a333a 100644 --- a/src/backend/src/modules/puterai/GeminiImageGenerationService.js +++ b/src/backend/src/modules/puterai/GeminiImageGenerationService.js @@ -37,11 +37,15 @@ class GeminiImageGenerationService extends BaseService { static MODULES = { }; - _construct() { + _construct() { this.models_ = { 'gemini-2.5-flash-image-preview': { '1024x1024': 0.039, }, + 'gemini-3-pro-image-preview': { + // using same price as 2.5 flash for now + '1024x1024': 0.039, + }, }; } @@ -192,9 +196,10 @@ class GeminiImageGenerationService extends BaseService { } const response = await this.genAI.models.generateContent({ - model: 'gemini-2.5-flash-image-preview', - contents: contents, + model, + contents, }); + // Metering usage tracking // Gemini usage: always 1 image, resolution, cost, model this.meteringService.incrementUsage(actor, usageType, 1); @@ -222,11 +227,16 @@ class GeminiImageGenerationService extends BaseService { * @private */ _getValidRatios(model) { - if ( model === 'gemini-2.5-flash-image-preview' ) { + if ( + model === 'gemini-2.5-flash-image-preview' || + model === 'gemini-3-pro-image-preview' + ) { return [this.constructor.RATIO_SQUARE]; } + return []; } + _validate_ratio(ratio, model) { const validRatios = this._getValidRatios(model); return validRatios.includes(ratio); From 331d25d00d524378254d5ee52a30fc1f04b78e7f Mon Sep 17 00:00:00 2001 From: Nihhaar0002 Date: Mon, 24 Nov 2025 12:46:11 +0530 Subject: [PATCH 3/4] Add streaming usage totals to AI chunks --- .../src/modules/puterai/ClaudeService.js | 55 ++++++++++++++-- .../src/modules/puterai/lib/OpenAIUtil.js | 65 +++++++++++++++++-- 2 files changed, 111 insertions(+), 9 deletions(-) diff --git a/src/backend/src/modules/puterai/ClaudeService.js b/src/backend/src/modules/puterai/ClaudeService.js index c8b83f52b5..877462c244 100644 --- a/src/backend/src/modules/puterai/ClaudeService.js +++ b/src/backend/src/modules/puterai/ClaudeService.js @@ -247,6 +247,50 @@ class ClaudeService extends BaseService { const init_chat_stream = async ({ chatStream }) => { const completion = await anthropic.messages.stream(sdk_params); const usageSum = {}; + const runningUsage = { + input_tokens: 0, + output_tokens: 0, + total_tokens: 0, + }; + + // Each emitted content block now carries an incremental usage object + // ({ input_tokens, output_tokens, total_tokens }) for live metering. + const getUsage = () => ({ + input_tokens: runningUsage.input_tokens, + output_tokens: runningUsage.output_tokens, + total_tokens: runningUsage.total_tokens, + }); + + const enhanceTextBlock = (block) => { + block.addText = (text) => { + const payload = { + type: 'text', + text, + usage: getUsage(), + }; + block.chatStream.stream.write(JSON.stringify(payload) + '\n'); + }; + return block; + }; + + const enhanceToolBlock = (block) => { + const originalAddPartialJSON = block.addPartialJSON?.bind(block); + if ( originalAddPartialJSON ) { + block.addPartialJSON = (partial_json) => originalAddPartialJSON(partial_json); + } + block.end = () => { + const buffer = (block.buffer || '').trim() === '' ? '{}' : block.buffer; + const payload = { + ...block.contentBlock, + input: JSON.parse(buffer), + ...(block.contentBlock?.text ? {} : { text: '' }), + type: 'tool_use', + usage: getUsage(), + }; + block.chatStream.stream.write(JSON.stringify(payload) + '\n'); + }; + return block; + }; let message, contentBlock; for await ( const event of completion ) { @@ -257,6 +301,9 @@ class ClaudeService extends BaseService { if ( ! usageSum[key] ) usageSum[key] = 0; usageSum[key] += meteredData[key]; }); + runningUsage.input_tokens += meteredData.input_tokens || 0; + runningUsage.output_tokens += meteredData.output_tokens || 0; + runningUsage.total_tokens = runningUsage.input_tokens + runningUsage.output_tokens; if ( event.type === 'message_start' ) { message = chatStream.message(); @@ -270,16 +317,16 @@ class ClaudeService extends BaseService { if ( event.type === 'content_block_start' ) { if ( event.content_block.type === 'tool_use' ) { - contentBlock = message.contentBlock({ + contentBlock = enhanceToolBlock(message.contentBlock({ type: event.content_block.type, id: event.content_block.id, name: event.content_block.name, - }); + })); continue; } - contentBlock = message.contentBlock({ + contentBlock = enhanceTextBlock(message.contentBlock({ type: event.content_block.type, - }); + })); continue; } diff --git a/src/backend/src/modules/puterai/lib/OpenAIUtil.js b/src/backend/src/modules/puterai/lib/OpenAIUtil.js index 045d04c840..de2a68dc68 100644 --- a/src/backend/src/modules/puterai/lib/OpenAIUtil.js +++ b/src/backend/src/modules/puterai/lib/OpenAIUtil.js @@ -91,6 +91,8 @@ const create_chat_stream_handler = ({ completion, usage_calculator, }) => async ({ chatStream }) => { + // Streaming chunks now include a running usage object ({ prompt_tokens, completion_tokens, total_tokens }) + // so downstream consumers can surface live token counts without changing existing fields. deviations = Object.assign({ // affected by: Groq index_usage_from_stream_chunk: chunk => chunk.usage, @@ -106,6 +108,50 @@ const create_chat_stream_handler = ({ const tool_call_blocks = []; let last_usage = null; + const usage_totals = { + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0, + }; + + const getUsage = () => ({ + prompt_tokens: usage_totals.prompt_tokens, + completion_tokens: usage_totals.completion_tokens, + total_tokens: usage_totals.total_tokens, + }); + + const enhanceTextBlock = (block) => { + block.addText = (text) => { + const payload = { + type: 'text', + text, + usage: getUsage(), + }; + block.chatStream.stream.write(JSON.stringify(payload) + '\n'); + }; + return block; + }; + + const enhanceToolBlock = (block) => { + const originalAddPartialJSON = block.addPartialJSON?.bind(block); + if ( originalAddPartialJSON ) { + block.addPartialJSON = (partial_json) => originalAddPartialJSON(partial_json); + } + block.end = () => { + const buffer = (block.buffer || '').trim() === '' ? '{}' : block.buffer; + const payload = { + ...block.contentBlock, + input: JSON.parse(buffer), + ...(block.contentBlock?.text ? {} : { text: '' }), + type: 'tool_use', + usage: getUsage(), + }; + block.chatStream.stream.write(JSON.stringify(payload) + '\n'); + }; + return block; + }; + + textblock = enhanceTextBlock(textblock); for await ( let chunk of completion ) { chunk = deviations.chunk_but_like_actually(chunk); if ( process.env.DEBUG ) { @@ -115,7 +161,15 @@ const create_chat_stream_handler = ({ delta && JSON.stringify(delta)); } const chunk_usage = deviations.index_usage_from_stream_chunk(chunk); - if ( chunk_usage ) last_usage = chunk_usage; + if ( chunk_usage ) { + usage_totals.prompt_tokens += chunk_usage.prompt_tokens ?? 0; + usage_totals.completion_tokens += chunk_usage.completion_tokens ?? 0; + usage_totals.total_tokens = usage_totals.prompt_tokens + usage_totals.completion_tokens; + last_usage = { + ...chunk_usage, + ...getUsage(), + }; + } if ( chunk.choices.length < 1 ) continue; const choice = chunk.choices[0]; @@ -124,7 +178,7 @@ const create_chat_stream_handler = ({ if ( mode === 'tool' ) { toolblock.end(); mode = 'text'; - textblock = message.contentBlock({ type: 'text' }); + textblock = enhanceTextBlock(message.contentBlock({ type: 'text' })); } textblock.addText(choice.delta.content); continue; @@ -138,11 +192,11 @@ const create_chat_stream_handler = ({ } for ( const tool_call of tool_calls ) { if ( ! tool_call_blocks[tool_call.index] ) { - toolblock = message.contentBlock({ + toolblock = enhanceToolBlock(message.contentBlock({ type: 'tool_use', id: tool_call.id, name: tool_call.function.name, - }); + })); tool_call_blocks[tool_call.index] = toolblock; } else { toolblock = tool_call_blocks[tool_call.index]; @@ -152,8 +206,9 @@ const create_chat_stream_handler = ({ } } + const final_usage = last_usage ?? getUsage(); // TODO DS: this is a bit too abstracted... this is basically just doing the metering now - usage_calculator({ usage: last_usage }); + usage_calculator({ usage: final_usage }); if ( mode === 'text' ) textblock.end(); if ( mode === 'tool' ) toolblock.end(); From eb900be8846545bf4952e9ce5704ce061eee4ad8 Mon Sep 17 00:00:00 2001 From: Nihhaar Saini Date: Mon, 24 Nov 2025 13:55:45 +0530 Subject: [PATCH 4/4] Enable usage in OpenAI streamed chat completions --- .../OpenAICompletionService.mjs | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs b/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs index bd845f37e7..db22d43d71 100644 --- a/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs +++ b/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs @@ -253,17 +253,18 @@ export class OpenAICompletionService { messages = await OpenAIUtil.process_input_messages(messages); const completion = await this.#openAi.chat.completions.create({ - user: user_private_uid, - messages: messages, - model: model, - ...(tools ? { tools } : {}), - ...(max_tokens ? { max_completion_tokens: max_tokens } : {}), - ...(temperature ? { temperature } : {}), - stream, - ...(stream ? { - stream_options: { include_usage: true }, - } : {}), - }); + user: user_private_uid, + messages: messages, + model: model, + ...(tools ? { tools } : {}), + ...(max_tokens ? { max_completion_tokens: max_tokens } : {}), + ...(temperature ? { temperature } : {}), + stream, + ...(stream ? { + stream_options: { include_usage: true }, + } : {}), +}); + // TODO DS: simplify this logic for all the ai services, each service should handle its cost calculation in the service // for now I'm overloading this usage calculator to handle the future promise resolution... return OpenAIUtil.handle_completion_output({