Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 104 additions & 21 deletions js/owrap.ai.js
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,13 @@ OpenWrap.ai.prototype.__gpttypes = {
if (isDef(aResponse.usage.prompt_tokens)) tokens.prompt = aResponse.usage.prompt_tokens
if (isDef(aResponse.usage.completion_tokens)) tokens.completion = aResponse.usage.completion_tokens
if (isDef(aResponse.usage.total_tokens)) tokens.total = aResponse.usage.total_tokens
if (isMap(aResponse.usage.prompt_tokens_details)) {
if (isDef(aResponse.usage.prompt_tokens_details.cached_tokens)) tokens.cached = aResponse.usage.prompt_tokens_details.cached_tokens
if (isDef(aResponse.usage.prompt_tokens_details.audio_tokens)) tokens.audio = aResponse.usage.prompt_tokens_details.audio_tokens
}
if (isMap(aResponse.usage.completion_tokens_details)) {
if (isDef(aResponse.usage.completion_tokens_details.reasoning_tokens)) tokens.reasoning = aResponse.usage.completion_tokens_details.reasoning_tokens
}
if (Object.keys(tokens).length > 0) stats.tokens = tokens
stats.usage = aResponse.usage
}
Expand Down Expand Up @@ -2258,6 +2265,7 @@ OpenWrap.ai.prototype.__gpttypes = {
aOptions.temperature = _$(aOptions.temperature, "aOptions.temperature").isNumber().default(0.7)
aOptions.url = _$(aOptions.url, "aOptions.url").isString().default("https://api.anthropic.com/")
aOptions.headers = _$(aOptions.headers, "aOptions.headers").isMap().default({})
aOptions.promptCaching = _$(aOptions.promptCaching, "aOptions.promptCaching").isBoolean().default(false)
// If noSystem=true it will not output the system messages
aOptions.noSystem = _$(aOptions.noSystem, "aOptions.noSystem").isBoolean().default(true)

Expand All @@ -2266,6 +2274,7 @@ OpenWrap.ai.prototype.__gpttypes = {
var _model = aOptions.model
var _temperature = aOptions.temperature
var _noSystem = aOptions.noSystem
var _promptCaching = aOptions.promptCaching
var _lastStats = __
var _debugCh = __
var _resetStats = () => { _lastStats = __ }
Expand All @@ -2288,6 +2297,8 @@ OpenWrap.ai.prototype.__gpttypes = {
if (isDef(aResponse.usage.input_tokens)) tokens.prompt = aResponse.usage.input_tokens
if (isDef(aResponse.usage.output_tokens)) tokens.completion = aResponse.usage.output_tokens
if (isDef(aResponse.usage.total_tokens)) tokens.total = aResponse.usage.total_tokens
if (isDef(aResponse.usage.cache_creation_input_tokens)) tokens.cacheCreation = aResponse.usage.cache_creation_input_tokens
if (isDef(aResponse.usage.cache_read_input_tokens)) tokens.cacheRead = aResponse.usage.cache_read_input_tokens
if (Object.keys(tokens).length > 0) stats.tokens = tokens
stats.usage = aResponse.usage
}
Expand Down Expand Up @@ -2362,6 +2373,66 @@ OpenWrap.ai.prototype.__gpttypes = {
if (isUnDef(aResult) || aResult === null) return ""
return stringify(aResult, __, "")
}
var _applyPromptCacheControlToMessages = aMessages => {
if (!_promptCaching || !isArray(aMessages) || aMessages.length === 0) return aMessages
var _lastUserIdx = -1
for (var ii = aMessages.length - 1; ii >= 0; ii--) {
if (isMap(aMessages[ii]) && aMessages[ii].role === "user") {
var _content = aMessages[ii].content
if (isString(_content)) {
_lastUserIdx = ii
break
}
if (isMap(_content)) {
if (isUnDef(_content.type) || _content.type !== "tool_result") {
_lastUserIdx = ii
break
}
}
if (isArray(_content)) {
var _hasCacheableBlock = _content.some(b => isMap(b) && (isUnDef(b.type) || b.type !== "tool_result"))
if (_hasCacheableBlock) {
_lastUserIdx = ii
break
}
}
}
}
if (_lastUserIdx < 0) return aMessages

var _msg = aMessages[_lastUserIdx]
if (isString(_msg.content)) {
_msg.content = [{ type: "text", text: _msg.content, cache_control: { type: "ephemeral" } }]
} else if (isMap(_msg.content)) {
_msg.content = [ merge(_msg.content, { cache_control: { type: "ephemeral" } }) ]
} else if (isArray(_msg.content)) {
var _lastBlockIdx = -1
for (var jj = _msg.content.length - 1; jj >= 0; jj--) {
if (isMap(_msg.content[jj]) && (isUnDef(_msg.content[jj].type) || _msg.content[jj].type !== "tool_result")) {
_lastBlockIdx = jj
break
}
}
if (_lastBlockIdx >= 0) {
_msg.content[_lastBlockIdx] = merge(_msg.content[_lastBlockIdx], { cache_control: { type: "ephemeral" } })
} else {
_msg.content.push({ type: "text", text: "", cache_control: { type: "ephemeral" } })
}
}
return aMessages
}
var _buildSystemField = (_systemText, _isJsonPrompt) => {
var _txt = _systemText
if (_isJsonPrompt) {
// Keep compatibility with the existing Anthropic JSON steering instruction.
_txt = (isString(_txt) && _txt.length > 0 ? _txt + "\n\n" : "") + "output json"
}
if (!(isString(_txt) && _txt.length > 0)) return __
if (_promptCaching) {
return [{ type: "text", text: _txt, cache_control: { type: "ephemeral" } }]
}
return _txt
}

var _r = {
conversation: [],
Expand Down Expand Up @@ -2532,6 +2603,8 @@ OpenWrap.ai.prototype.__gpttypes = {

var systemMsgs = msgs.filter(m => m.role == "system");
var bodyMessages = (_noSystem ? msgs.filter(m => m.role != "system") : msgs.slice());
if (_promptCaching) bodyMessages = clone(bodyMessages)
bodyMessages = _applyPromptCacheControlToMessages(bodyMessages)

_r.conversation = msgs;

Expand Down Expand Up @@ -2559,10 +2632,9 @@ OpenWrap.ai.prototype.__gpttypes = {
})
.filter(s => isString(s) && s.length > 0)
.join("\n")
if (_systemText.length > 0) body.system = _systemText
}
if (aJsonFlag) {
body.system = (isString(body.system) && body.system.length > 0 ? body.system + "\n\n" : "") + "output json"
body.system = _buildSystemField(_systemText, aJsonFlag)
} else if (aJsonFlag) {
body.system = _buildSystemField("", true)
}

body = merge(body, aOptions.params)
Expand Down Expand Up @@ -2681,6 +2753,8 @@ OpenWrap.ai.prototype.__gpttypes = {

var systemMsgs = msgs.filter(m => m.role == "system");
var bodyMessages = (_noSystem ? msgs.filter(m => m.role != "system") : msgs.slice());
if (_promptCaching) bodyMessages = clone(bodyMessages)
bodyMessages = _applyPromptCacheControlToMessages(bodyMessages)

_r.conversation = msgs;

Expand All @@ -2707,10 +2781,9 @@ OpenWrap.ai.prototype.__gpttypes = {
})
.filter(s => isString(s) && s.length > 0)
.join("\n")
if (_systemText.length > 0) body.system = _systemText
}
if (aJsonFlag) {
body.system = (isString(body.system) && body.system.length > 0 ? body.system + "\n\n" : "") + "output json"
body.system = _buildSystemField(_systemText, aJsonFlag)
} else if (aJsonFlag) {
body.system = _buildSystemField("", true)
}

body = merge(body, aOptions.params)
Expand Down Expand Up @@ -2925,14 +2998,16 @@ OpenWrap.ai.prototype.__gpttypes = {
aVerb = _$(aVerb, "aVerb").isString().default("POST")

var _h = new ow.obj.http(__, __, __, __, __, __, __, { timeout: _timeout })
var _reqHeaders = merge(aOptions.headers, {
"x-api-key" : Packages.openaf.AFCmdBase.afc.dIP(_key),
"anthropic-version": "2023-06-01",
Accept : "*/*"
})
if (_promptCaching) _reqHeaders["anthropic-beta"] = "prompt-caching-2024-07-31"
var __m = {
conTimeout : 60000,
httpClient : _h,
requestHeaders: merge(aOptions.headers, {
"x-api-key" : Packages.openaf.AFCmdBase.afc.dIP(_key),
"anthropic-version": "2023-06-01",
Accept : "*/*"
})
requestHeaders: _reqHeaders
}
_h.close()

Expand Down Expand Up @@ -2961,14 +3036,16 @@ OpenWrap.ai.prototype.__gpttypes = {
aVerb = _$(aVerb, "aVerb").isString().default("POST")

var _h = new ow.obj.http(__, __, __, __, __, __, __, { timeout: _timeout })
var _reqHeaders = merge(aOptions.headers, {
"x-api-key" : Packages.openaf.AFCmdBase.afc.dIP(_key),
"anthropic-version": "2023-06-01",
Accept : "text/event-stream"
})
if (_promptCaching) _reqHeaders["anthropic-beta"] = "prompt-caching-2024-07-31"
var __m = {
conTimeout : 60000,
httpClient : _h,
requestHeaders: merge(aOptions.headers, {
"x-api-key" : Packages.openaf.AFCmdBase.afc.dIP(_key),
"anthropic-version": "2023-06-01",
Accept : "text/event-stream"
})
requestHeaders: _reqHeaders
}
_h.close()

Expand Down Expand Up @@ -3121,6 +3198,7 @@ OpenWrap.ai.prototype.agent = function(aOptions) {
* - params: extra request body parameters merged into prompt, image and embedding calls.\
* - noSystem: when true, system messages are converted to developer messages where supported (defaults to true).\
* - noResponseFormat: when true, disables OpenAI-compatible JSON response_format injection.\
* - promptCaching: when true enables Anthropic prompt caching headers and cache_control markers (defaults to false).\
* \
* OpenAI-compatible transport options:\
* - apiVersion: API version/path segment for OpenAI-compatible routes (defaults to "v1"). In Azure legacy mode this becomes the api-version query parameter. In Foundry mode, "v1" uses the /openai/v1 path; dated versions use the /models route with api-version.\
Expand All @@ -3134,6 +3212,8 @@ OpenWrap.ai.prototype.agent = function(aOptions) {
* - Azure OpenAI legacy: new ow.ai.gpt("openai", { key: "...", url: "https://RESOURCE.openai.azure.com", mode: "azure-openai-legacy", deployment: "DEPLOYMENT", apiVersion: "2024-10-21" })\
* - Azure AI Foundry v1: new ow.ai.gpt("openai", { key: "...", url: "https://RESOURCE.services.ai.azure.com", mode: "foundry", model: "DEPLOYMENT" })\
* - Azure AI Foundry dated API: new ow.ai.gpt("openai", { key: "...", url: "https://RESOURCE.services.ai.azure.com/models", mode: "foundry", apiVersion: "2024-05-01-preview", model: "DEPLOYMENT" })\
* \
* Usage stats note: `getLastStats()` now includes OpenAI cached prompt tokens (`tokens.cached`) when reported by compatible models.\
* </odoc>
*/
OpenWrap.ai.prototype.gpt = function(aType, aOptions) {
Expand Down Expand Up @@ -3229,7 +3309,8 @@ OpenWrap.ai.prototype.gpt.prototype.getEmbeddings = function(aInput, aDimensions
/**
* <odoc>
* <key>ow.ai.gpt.getLastStats() : Map</key>
* Returns the latest usage statistics reported by the underlying GPT model for the most recent prompt request.
* Returns the latest usage statistics reported by the underlying GPT model for the most recent prompt request
* (including provider-specific fields such as OpenAI cached prompt tokens and Anthropic cache read/creation tokens when available).
* </odoc>
*/
OpenWrap.ai.prototype.gpt.prototype.getLastStats = function() {
Expand Down Expand Up @@ -3555,7 +3636,8 @@ OpenWrap.ai.prototype.gpt.prototype.addUserPrompt = function(aPrompt) {
/**
* <odoc>
* <key>ow.ai.gpt.addSystemPrompt(aPrompt) : ow.ai.gpt</key>
* Adds aPrompt (a string or an array of strings) with aRole (defaults to "user") to the current conversation.
* Adds aPrompt (a string or an array of strings) as a system prompt to the current conversation.
* For Gemini, large system instructions may be implicitly cached by the provider. For Anthropic, explicit cache markers are sent when `promptCaching` is enabled.
* </odoc>
*/
OpenWrap.ai.prototype.gpt.prototype.addSystemPrompt = function(aPrompt) {
Expand Down Expand Up @@ -3793,6 +3875,7 @@ OpenWrap.ai.prototype.gpt.prototype.codePrompt = function(aPrompt, aModel, aTemp
* - instructions: a string or an array of strings with the instructions for the model (e.g. "json", "boolean", "sql", "js", "path")\
* - headers: a map with the headers to use in the requests (e.g. { "Content-Type": "application/json" })\
* - params: a map with the parameters to use in the requests (e.g. { "max_tokens": 1000, "top_p": 1, "frequency_penalty": 0, "presence_penalty": 0 })\
* - promptCaching: when true enables Anthropic prompt caching headers and cache_control markers (defaults to false)\
* \
* For type "openai", options can also include:\
* - mode: transport mode ("openai", "azure-openai-v1", "azure-openai-legacy" or "foundry").\
Expand All @@ -3810,7 +3893,7 @@ OpenWrap.ai.prototype.gpt.prototype.codePrompt = function(aPrompt, aModel, aTemp
* If aModel is not provided, it will try to get the model from the environment variable "OAF_MODEL" with the map in JSON or SLON format.
* \
* The returned object also exposes helper methods to inspect vendor usage information: `getLastStats`/`lastStats` (map with the latest statistics), `promptWithStats`,
* `promptJSONWithStats` and `rawPromptWithStats` (returning `{ response, stats }`).
* `promptJSONWithStats` and `rawPromptWithStats` (returning `{ response, stats }`). OpenAI compatible models can expose cached prompt tokens via `stats.tokens.cached`.
* </odoc>
*/
global.$gpt = function(aModel) {
Expand Down
Loading
Loading