diff --git a/apps/web/src/lib/ai-gateway/models.test.ts b/apps/web/src/lib/ai-gateway/models.test.ts index 8d7d81911..c4a2d94ab 100644 --- a/apps/web/src/lib/ai-gateway/models.test.ts +++ b/apps/web/src/lib/ai-gateway/models.test.ts @@ -12,7 +12,12 @@ import { claude_sonnet_4_6_stealth_model, claude_opus_4_6_stealth_model, } from './providers/anthropic.constants'; -import { isAlibabaDirectModel, qwen36_plus_model, qwen37_max_model } from './providers/qwen'; +import { + isAlibabaDirectModel, + qwen36_plus_model, + qwen37_max_model, + qwen37_plus_model, +} from './providers/qwen'; describe('isFreeModel', () => { describe('free models', () => { @@ -79,6 +84,14 @@ describe('isFreeModel', () => { expect(getInferenceProvider(qwen37_max_model)).toBe('alibaba'); }); + test('routes Qwen3.7 Plus directly through Alibaba', () => { + expect(findKiloExclusiveModel(qwen37_plus_model.public_id)).toBe(qwen37_plus_model); + expect(isAlibabaDirectModel(qwen37_plus_model.public_id)).toBe(true); + expect(qwen37_plus_model.gateway).toBe('alibaba'); + expect(qwen37_plus_model.internal_id).toBe('qwen3.7-plus'); + expect(getInferenceProvider(qwen37_plus_model)).toBe('alibaba'); + }); + test('requires data collection for paid training-enabled offerings', () => { expect( isKiloExclusiveModelRequiringDataCollection(claude_opus_4_7_stealth_model.public_id) diff --git a/apps/web/src/lib/ai-gateway/processUsage.calculatKiloExclusiveCost.test.ts b/apps/web/src/lib/ai-gateway/processUsage.calculatKiloExclusiveCost.test.ts index 0f2be61a4..ac1fe5c88 100644 --- a/apps/web/src/lib/ai-gateway/processUsage.calculatKiloExclusiveCost.test.ts +++ b/apps/web/src/lib/ai-gateway/processUsage.calculatKiloExclusiveCost.test.ts @@ -9,6 +9,7 @@ import { qwen36_max_preview_model, qwen36_plus_model, qwen37_max_model, + qwen37_plus_model, } from '@/lib/ai-gateway/providers/qwen'; const makeUsage = (overrides: Partial = {}): JustTheCostsUsageStats => ({ @@ -41,6 +42,44 @@ describe('calculatKiloExclusiveCost_mUsd with qwen3.7-max', () => { }); }); +describe('calculatKiloExclusiveCost_mUsd with qwen3.7-plus', () => { + test('uses direct Alibaba pricing with the Kilo discount in the <=256k tier', () => { + const result = calculateKiloExclusiveCost_mUsd( + qwen37_plus_model, + makeUsage({ inputTokens: 100_000, outputTokens: 10_000 }) + ); + + expect(result).toBe(Math.round(100_000 * 0.26 + 10_000 * 1.04)); + }); + + test('charges explicit cache reads and writes at discounted rates', () => { + const result = calculateKiloExclusiveCost_mUsd( + qwen37_plus_model, + makeUsage({ inputTokens: 100_000, cacheHitTokens: 20_000, cacheWriteTokens: 30_000 }) + ); + + expect(result).toBe(Math.round(50_000 * 0.26 + 20_000 * 0.026 + 30_000 * 0.325)); + }); + + test('uses direct Alibaba pricing with the Kilo discount in the >256k tier', () => { + const result = calculateKiloExclusiveCost_mUsd( + qwen37_plus_model, + makeUsage({ inputTokens: 300_000, outputTokens: 10_000 }) + ); + + expect(result).toBe(Math.round(300_000 * 0.78 + 10_000 * 3.12)); + }); + + test('moves to the long-context tier above the 256k boundary', () => { + expect( + calculateKiloExclusiveCost_mUsd(qwen37_plus_model, makeUsage({ inputTokens: 262_144 })) + ).toBe(Math.round(262_144 * 0.26)); + expect( + calculateKiloExclusiveCost_mUsd(qwen37_plus_model, makeUsage({ inputTokens: 262_145 })) + ).toBe(Math.round(262_145 * 0.78)); + }); +}); + describe('calculatKiloExclusiveCost_mUsd with qwen3.6-plus', () => { // Pre-discount prices from Qwen pricing page (35% Kilo discount applied in code): // diff --git a/apps/web/src/lib/ai-gateway/providers/qwen.ts b/apps/web/src/lib/ai-gateway/providers/qwen.ts index bdd7c6285..3195a82af 100644 --- a/apps/web/src/lib/ai-gateway/providers/qwen.ts +++ b/apps/web/src/lib/ai-gateway/providers/qwen.ts @@ -96,6 +96,41 @@ export const qwen37_max_model: KiloExclusiveModel = { inference_provider_restriction: [], }; +export const qwen37_plus_model: KiloExclusiveModel = { + public_id: 'qwen/qwen3.7-plus', + display_name: 'Qwen: Qwen3.7 Plus', + description: + "Qwen3.7-Plus is Alibaba's native multimodal agent model for visual-language reasoning, agentic coding, tool use, and productivity workflows. It supports text, image, and video inputs. Note: a surcharge applies to long-context workloads exceeding 256K input tokens.", + context_length: 1_000_000, + max_completion_tokens: 65_536, + status: 'public', + flags: ['reasoning', 'vision'], + gateway: 'alibaba', + internal_id: 'qwen3.7-plus', + pricing: makeTieredPricing([ + { + maxInputTokens: TOKENS_256K, + undiscounted: { + prompt_per_million: 0.4, + completion_per_million: 1.6, + input_cache_read_per_million: 0.04, + input_cache_write_per_million: 0.5, + }, + }, + { + maxInputTokens: TOKENS_1M, + undiscounted: { + prompt_per_million: 1.2, + completion_per_million: 4.8, + input_cache_read_per_million: 0.12, + input_cache_write_per_million: 1.5, + }, + }, + ]), + exclusive_to: [], + inference_provider_restriction: [], +}; + export const qwen36_plus_model: KiloExclusiveModel = { public_id: 'qwen/qwen3.6-plus', display_name: 'Qwen: Qwen3.6 Plus', @@ -262,6 +297,7 @@ export const qwen36_27b_model: KiloExclusiveModel = { export const alibabaDirectModels: ReadonlyArray = [ qwen37_max_model, + qwen37_plus_model, qwen36_plus_model, qwen36_flash_model, qwen36_max_preview_model,