diff --git a/.goreleaser.yml b/.goreleaser.yml
index 20ee3db3..903cdba5 100644
--- a/.goreleaser.yml
+++ b/.goreleaser.yml
@@ -144,7 +144,7 @@ brews:
#
# Default depends on the client.
# Templates: allowed
- url_template: "https://github.mycompany.com/foo/bar/releases/download/{{ .Tag }}/{{ .ArtifactName }}"
+ url_template: "https://github.com/einstack/glide/releases/download/{{ .Tag }}/{{ .ArtifactName }}"
# Allows you to set a custom download strategy. Note that you'll need
# to implement the strategy and add it to your tap repository.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 550a2919..e596df47 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,21 @@ The changelog consists of three categories:
- **Improvements** - bugfixes, performance and other types of improvements to existing functionality
- **Miscellaneous** - all other updates like build, release, CLI, etc.
+## 0.0.1 (Jan 31st, 2024)
+
+### Features
+
+- β¨ #81: Allow to chat message based for specific models (@mkrueger12)
+
+### Improvements
+
+- π§ #78: Normalize response latency by response token count (@roma-glushko)
+- π #112 added the CLI banner info (@roma-glushko)
+
+### Miscellaneous
+
+- π #114 Make links actual across the project (@roma-glushko)
+
## 0.0.1-rc.2 (Jan 22nd, 2024)
### Improvements
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index a13ebf99..980d59bf 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -59,7 +59,7 @@ a project may be further defined and clarified by project maintainers.
## Enforcement
Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported by contacting the project team at roman.glushko.m@gmail.com. All
+reported by contacting the project team at [contact@einstack.ai](mailto:contact@einstack.ai). All
complaints will be reviewed and investigated and will result in a response that
is deemed necessary and appropriate to the circumstances. The project team is
obligated to maintain confidentiality with regard to the reporter of an incident.
diff --git a/README.md b/README.md
index f30bad23..7b7572ef 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,16 @@
# Glide: Cloud-Native LLM Gateway for Seamless LLMOps
-
+
-[![LICENSE](https://img.shields.io/github/license/modelgateway/glide.svg?style=flat-square&color=%233f90c8)](https://github.com/modelgateway/glide/blob/main/LICENSE)
+
[![codecov](https://codecov.io/github/EinStack/glide/graph/badge.svg?token=F7JT39RHX9)](https://codecov.io/github/EinStack/glide)
+[![Discord](https://img.shields.io/discord/1181281407813828710)](https://discord.gg/pt53Ej7rrc)
+[![Documentation](https://img.shields.io/badge/build-view-violet%20?style=flat&logo=books&label=docs&link=https%3A%2F%2Fglide.einstack.ai%2F)](https://glide.einstack.ai/)
+[![LICENSE](https://img.shields.io/github/license/EinStack/glide.svg?style=flat-square&color=%233f90c8)](https://github.com/EinStack/glide/blob/main/LICENSE)
+[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FEinStack%2Fglide.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2FEinStack%2Fglide?ref=badge_shield)
+
+---
Glide is your go-to cloud-native LLM gateway, delivering high-performance LLMOps in a lightweight, all-in-one package.
@@ -14,9 +20,11 @@ so you can dive into tackling your core challenges.
Glide sits between your application and model providers to seamlessly handle various LLMOps tasks like
model failover, caching, key management, etc.
+
+
Take a look at the develop branch.
-Check out our [documentation](https://backlandlabs.mintlify.app/introduction)!
+Check out our [documentation](https://glide.einstack.ai)!
> [!Warning]
> Glide is under active development right now. Give us a star to support the project β¨
@@ -28,7 +36,7 @@ Check out our [documentation](https://backlandlabs.mintlify.app/introduction)!
- Support **popular LLM providers**.
- **High performance**. Performance is our priority. We want to keep Glide "invisible" for your latency-wise, while providing rich functionality.
- **Production-ready observability** via OpenTelemetry, emit metrics on models health, allows whitebox monitoring.
-- Straightforward and simple maintenance and configuration, centrilized API key control & management & rotation, etc.
+- Straightforward and simple maintenance and configuration, centralized API key control & management & rotation, etc.
## Supported Providers
@@ -48,7 +56,7 @@ Check out our [documentation](https://backlandlabs.mintlify.app/introduction)!
Routers are a core functionality of Glide. Think of routers as a group of models with some predefined logic. For example, the resilience router allows a user to define a set of backup models should the initial model fail. Another example, would be to leverage the least-latency router to make latency sensitive LLM calls in the most efficient manner.
-Detailed info on routers can be found [here](https://backlandlabs.mintlify.app/essentials/routers).
+Detailed info on routers can be found [here](https://glide.einstack.ai/essentials/routers).
#### Available Routers
@@ -62,43 +70,47 @@ Detailed info on routers can be found [here](https://backlandlabs.mintlify.app/e
## Get Started
-#### Install
+### Installation
-The easiest way to deploy Glide is to build from source.
+The easiest way to deploy Glide is to our [demo repository](https://github.com/EinStack/glide-demo.git) and [docker-compose](https://docs.docker.com/compose/).
-Steps to build a container with Docker can be found [here](https://backlandlabs.mintlify.app/introduction#install-and-deploy).
+### 1. Clone the demo repository
+
+```bash
+git clone https://github.com/EinStack/glide-demo.git
+```
+
+### 2. Init Configs
+
+The demo repository comes with a basic config. Additionally, you need to init your secrets by running:
+
+```bash
+make init # from the demo root
+```
-#### Set Configuration File
+This will create the `secrets` directory with one `.OPENAI_API_KEY` file that you need to put your key to.
-Find detailed information on configuration [here](https://backlandlabs.mintlify.app/essentials/configuration).
+### 3. Start Glide
-```yaml
-telemetry:
- logging:
- level: debug # debug, info, warn, error, fatal
- encoding: console
+After that, just use docker compose via this command to start your demo environment:
-routers:
- language:
- - id: myrouter
- models:
- - id: openai
- openai:
- api_key: ""
+```bash
+make up
```
-#### Sample API Request to `/chat` endpoint
+### 4. Sample API Request to `/chat` endpoint
-See [API Reference](https://backlandlabs.mintlify.app/api-reference/introduction) for more details.
+See [API Reference](https://glide.einstack.ai/api-reference/introduction) for more details.
```json
{
+ "model": "gpt-3.5-turbo", # this is not required but can be used to specify different prompts to different models
"message":
{
"role": "user",
"content": "Where was it played?"
},
- "messageHistory": [
+ "messageHistory": [
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": "Who won the world series in 2020?"},
{"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."}
@@ -108,13 +120,60 @@ See [API Reference](https://backlandlabs.mintlify.app/api-reference/introduction
### API Docs
-Once deployed, Glide comes with OpenAPI documentation that is accessible via http://127.0.0.1:9099/v1/swagger/index.html
+Finally, Glide comes with OpenAPI documentation that is accessible via http://127.0.0.1:9099/v1/swagger/index.html
+
+That's it π
+
+Use [our documentation](https://glide.einstack.ai) to further learn about Glide capabilities and configs.
+
+---
+
+Other ways to install Glide are available:
+
+### Homebrew (MacOS)
+
+```bash
+brew tap einstack/tap
+brew install einstack/tap/glide
+```
+
+### Snapcraft (Linux)
+
+Coming Soon
+
+### Docker Images
+
+Glide provides official images in our [GHCR](https://github.com/EinStack/glide/pkgs/container/glide):
+
+- Alpine 3.19:
+```bash
+docker pull ghcr.io/einstack/glide:latest-alpine
+```
+
+- Ubuntu 22.04 LTS:
+```bash
+docker pull ghcr.io/einstack/glide:latest-ubuntu
+```
+
+- Google Distroless (non-root)
+```bash
+docker pull ghcr.io/einstack/glide:latest-distroless
+```
+
+- RedHat UBI 8.9 Micro
+```bash
+docker pull ghcr.io/einstack/glide:latest-redhat
+```
+
+### Helm Chart
+
+Coming Soon
## Community
- Join [Discord](https://discord.gg/pt53Ej7rrc) for real-time discussion
-Open [an issue](https://github.com/modelgateway/glide/issues) or start [a discussion](https://github.com/modelgateway/glide/discussions)
+Open [an issue](https://github.com/EinStack/glide/issues) or start [a discussion](https://github.com/EinStack/glide/discussions)
if there is a feature or an enhancement you'd like to see in Glide.
## Contribute
@@ -126,6 +185,12 @@ if there is a feature or an enhancement you'd like to see in Glide.
Thanks everyone for already put their effort to make Glide better and more feature-rich:
-
+
+
+## License
+
+Apache 2.0
+
+[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FEinStack%2Fglide.svg?type=large)](https://app.fossa.com/projects/git%2Bgithub.com%2FEinStack%2Fglide?ref=badge_large)
diff --git a/ROADMAP.md b/ROADMAP.md
index a6e7eecf..facda76a 100644
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -12,9 +12,9 @@ This document describes the current status and the upcoming milestones of the Gl
| :---: | :--- | :---: |
| π | **Unified Chat Endpoint Support** | 4 / 4 |
| π | **Fallback Routing Strategy** | 1 / 1 |
-| π | **Priority, Round Robin, Weighted Round Robin, Least Latency** | 2 / 4 |
-| π | **Documentation** | 1 / 1 |
-| π | **Private Preview** | 4 / 5 |
+| π | **Priority, Round Robin, Weighted Round Robin, Least Latency** | 4 / 4 |
+| π | **Documentation** | 1 / 1 |
+| π | **Private Preview** | 4.5 / 5 |
| π | **Streaming Support** | 0 / 4 |
| π | **Embedding Support** | 0 / 4 |
| π | **Caching** | 0 / 1 |
diff --git a/SECURITY.md b/SECURITY.md
index 05beb8da..ba0fb400 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -3,4 +3,4 @@
We want to keep Glide safe for everyone.
If you've discovered a security vulnerability in Glide,
-we appreciate your help in disclosing it to us in a responsible manner, using this email: roman.glushko.m@gmail.com
+we appreciate your help in disclosing it to us in a responsible manner, using this email: [contact@einstack.ai](mailto:contact@einstack.ai)
diff --git a/docs/docs.go b/docs/docs.go
index f5a9cc48..5ede09c8 100644
--- a/docs/docs.go
+++ b/docs/docs.go
@@ -10,12 +10,13 @@ const docTemplate = `{
"description": "{{escape .Description}}",
"title": "{{.Title}}",
"contact": {
- "name": "Glide Community",
- "url": "https://github.com/modelgateway/glide"
+ "name": "EinStack Community",
+ "url": "https://github.com/EinStack/glide/",
+ "email": "contact@einstack.ai"
},
"license": {
"name": "Apache 2.0",
- "url": "https://github.com/modelgateway/glide/blob/develop/LICENSE"
+ "url": "https://github.com/EinStack/glide/blob/develop/LICENSE"
},
"version": "{{.Version}}"
},
@@ -626,6 +627,17 @@ const docTemplate = `{
}
}
},
+ "schemas.OverrideChatRequest": {
+ "type": "object",
+ "properties": {
+ "message": {
+ "$ref": "#/definitions/schemas.ChatMessage"
+ },
+ "model_id": {
+ "type": "string"
+ }
+ }
+ },
"schemas.ProviderResponse": {
"type": "object",
"properties": {
@@ -639,11 +651,11 @@ const docTemplate = `{
}
},
"tokenCount": {
- "$ref": "#/definitions/schemas.TokenCount"
+ "$ref": "#/definitions/schemas.TokenUsage"
}
}
},
- "schemas.TokenCount": {
+ "schemas.TokenUsage": {
"type": "object",
"properties": {
"promptTokens": {
@@ -668,6 +680,9 @@ const docTemplate = `{
"items": {
"$ref": "#/definitions/schemas.ChatMessage"
}
+ },
+ "override": {
+ "$ref": "#/definitions/schemas.OverrideChatRequest"
}
}
},
@@ -700,16 +715,20 @@ const docTemplate = `{
}
}
}
+ },
+ "externalDocs": {
+ "description": "Documentation",
+ "url": "https://glide.einstack.ai/"
}
}`
// SwaggerInfo holds exported Swagger Info so clients can modify it
var SwaggerInfo = &swag.Spec{
- Version: "1.0",
+ Version: "0.0.1",
Host: "localhost:9099",
BasePath: "/",
Schemes: []string{"http"},
- Title: "Glide Gateway",
+ Title: "Glide",
Description: "API documentation for Glide, an open-source lightweight high-performance model gateway",
InfoInstanceName: "swagger",
SwaggerTemplate: docTemplate,
diff --git a/docs/images/marketecture.svg b/docs/images/marketecture.svg
new file mode 100644
index 00000000..9928b068
--- /dev/null
+++ b/docs/images/marketecture.svg
@@ -0,0 +1,311 @@
+
diff --git a/docs/logo/glide.png b/docs/logo/glide.png
new file mode 100644
index 00000000..48eb15a8
Binary files /dev/null and b/docs/logo/glide.png differ
diff --git a/docs/logo/glide_bird_in_circle.png b/docs/logo/glide_bird_in_circle.png
new file mode 100644
index 00000000..1e8ffaad
Binary files /dev/null and b/docs/logo/glide_bird_in_circle.png differ
diff --git a/docs/logo/glide_bird_title.png b/docs/logo/glide_bird_title.png
new file mode 100644
index 00000000..0b152207
Binary files /dev/null and b/docs/logo/glide_bird_title.png differ
diff --git a/docs/logo/glide_no_bgd.png b/docs/logo/glide_no_bgd.png
new file mode 100644
index 00000000..2563bf54
Binary files /dev/null and b/docs/logo/glide_no_bgd.png differ
diff --git a/docs/logo/glide_title.png b/docs/logo/glide_title.png
new file mode 100644
index 00000000..9f8e4af7
Binary files /dev/null and b/docs/logo/glide_title.png differ
diff --git a/docs/logo/glide_title_underscore.png b/docs/logo/glide_title_underscore.png
new file mode 100644
index 00000000..1412a60b
Binary files /dev/null and b/docs/logo/glide_title_underscore.png differ
diff --git a/docs/swagger.json b/docs/swagger.json
index c6bf90fd..8146a113 100644
--- a/docs/swagger.json
+++ b/docs/swagger.json
@@ -5,16 +5,17 @@
"swagger": "2.0",
"info": {
"description": "API documentation for Glide, an open-source lightweight high-performance model gateway",
- "title": "Glide Gateway",
+ "title": "Glide",
"contact": {
- "name": "Glide Community",
- "url": "https://github.com/modelgateway/glide"
+ "name": "EinStack Community",
+ "url": "https://github.com/EinStack/glide/",
+ "email": "contact@einstack.ai"
},
"license": {
"name": "Apache 2.0",
- "url": "https://github.com/modelgateway/glide/blob/develop/LICENSE"
+ "url": "https://github.com/EinStack/glide/blob/develop/LICENSE"
},
- "version": "1.0"
+ "version": "0.0.1"
},
"host": "localhost:9099",
"basePath": "/",
@@ -623,6 +624,17 @@
}
}
},
+ "schemas.OverrideChatRequest": {
+ "type": "object",
+ "properties": {
+ "message": {
+ "$ref": "#/definitions/schemas.ChatMessage"
+ },
+ "model_id": {
+ "type": "string"
+ }
+ }
+ },
"schemas.ProviderResponse": {
"type": "object",
"properties": {
@@ -636,11 +648,11 @@
}
},
"tokenCount": {
- "$ref": "#/definitions/schemas.TokenCount"
+ "$ref": "#/definitions/schemas.TokenUsage"
}
}
},
- "schemas.TokenCount": {
+ "schemas.TokenUsage": {
"type": "object",
"properties": {
"promptTokens": {
@@ -665,6 +677,9 @@
"items": {
"$ref": "#/definitions/schemas.ChatMessage"
}
+ },
+ "override": {
+ "$ref": "#/definitions/schemas.OverrideChatRequest"
}
}
},
@@ -697,5 +712,9 @@
}
}
}
+ },
+ "externalDocs": {
+ "description": "Documentation",
+ "url": "https://glide.einstack.ai/"
}
}
\ No newline at end of file
diff --git a/docs/swagger.yaml b/docs/swagger.yaml
index 0ff8a476..c0b25776 100644
--- a/docs/swagger.yaml
+++ b/docs/swagger.yaml
@@ -342,6 +342,13 @@ definitions:
or assistant.
type: string
type: object
+ schemas.OverrideChatRequest:
+ properties:
+ message:
+ $ref: '#/definitions/schemas.ChatMessage'
+ model_id:
+ type: string
+ type: object
schemas.ProviderResponse:
properties:
message:
@@ -351,9 +358,9 @@ definitions:
type: string
type: object
tokenCount:
- $ref: '#/definitions/schemas.TokenCount'
+ $ref: '#/definitions/schemas.TokenUsage'
type: object
- schemas.TokenCount:
+ schemas.TokenUsage:
properties:
promptTokens:
type: number
@@ -370,6 +377,8 @@ definitions:
items:
$ref: '#/definitions/schemas.ChatMessage'
type: array
+ override:
+ $ref: '#/definitions/schemas.OverrideChatRequest'
type: object
schemas.UnifiedChatResponse:
properties:
@@ -390,18 +399,22 @@ definitions:
router:
type: string
type: object
+externalDocs:
+ description: Documentation
+ url: https://glide.einstack.ai/
host: localhost:9099
info:
contact:
- name: Glide Community
- url: https://github.com/modelgateway/glide
+ email: contact@einstack.ai
+ name: EinStack Community
+ url: https://github.com/EinStack/glide/
description: API documentation for Glide, an open-source lightweight high-performance
model gateway
license:
name: Apache 2.0
- url: https://github.com/modelgateway/glide/blob/develop/LICENSE
- title: Glide Gateway
- version: "1.0"
+ url: https://github.com/EinStack/glide/blob/develop/LICENSE
+ title: Glide
+ version: 0.0.1
paths:
/v1/health/:
get:
diff --git a/main.go b/main.go
index eaa16bdd..0173f5ff 100644
--- a/main.go
+++ b/main.go
@@ -6,15 +6,19 @@ import (
"glide/pkg/cmd"
)
-// @title Glide Gateway
-// @version 1.0
+// @title Glide
+// @version 0.0.1
// @description API documentation for Glide, an open-source lightweight high-performance model gateway
-// @contact.name Glide Community
-// @contact.url https://github.com/modelgateway/glide
+// @contact.name EinStack Community
+// @contact.url https://github.com/EinStack/glide/
+// @contact.email contact@einstack.ai
// @license.name Apache 2.0
-// @license.url https://github.com/modelgateway/glide/blob/develop/LICENSE
+// @license.url https://github.com/EinStack/glide/blob/develop/LICENSE
+
+// @externalDocs.description Documentation
+// @externalDocs.url https://glide.einstack.ai/
// @host localhost:9099
// @BasePath /
diff --git a/pkg/api/http/handlers.go b/pkg/api/http/handlers.go
index 9db2e5fc..e25bd6ca 100644
--- a/pkg/api/http/handlers.go
+++ b/pkg/api/http/handlers.go
@@ -33,10 +33,12 @@ type Handler = func(ctx context.Context, c *app.RequestContext)
// @Router /v1/language/{router}/chat [POST]
func LangChatHandler(routerManager *routers.RouterManager) Handler {
return func(ctx context.Context, c *app.RequestContext) {
+ // Unmarshal request body
var req *schemas.UnifiedChatRequest
err := json.Unmarshal(c.Request.Body(), &req)
if err != nil {
+ // Return bad request error
c.JSON(consts.StatusBadRequest, ErrorSchema{
Message: err.Error(),
})
@@ -44,8 +46,10 @@ func LangChatHandler(routerManager *routers.RouterManager) Handler {
return
}
+ // Bind JSON to request
err = c.BindJSON(&req)
if err != nil {
+ // Return bad request error
c.JSON(consts.StatusBadRequest, ErrorSchema{
Message: err.Error(),
})
@@ -53,10 +57,12 @@ func LangChatHandler(routerManager *routers.RouterManager) Handler {
return
}
+ // Get router ID from path
routerID := c.Param("router")
router, err := routerManager.GetLangRouter(routerID)
if errors.Is(err, routers.ErrRouterNotFound) {
+ // Return not found error
c.JSON(consts.StatusNotFound, ErrorSchema{
Message: err.Error(),
})
@@ -64,9 +70,10 @@ func LangChatHandler(routerManager *routers.RouterManager) Handler {
return
}
+ // Chat with router
resp, err := router.Chat(ctx, req)
if err != nil {
- // TODO: do a better handling, not everything is going to be an internal error
+ // Return internal server error
c.JSON(consts.StatusInternalServerError, ErrorSchema{
Message: err.Error(),
})
@@ -74,6 +81,7 @@ func LangChatHandler(routerManager *routers.RouterManager) Handler {
return
}
+ // Return chat response
c.JSON(consts.StatusOK, resp)
}
}
diff --git a/pkg/api/schemas/language.go b/pkg/api/schemas/language.go
index 068e0588..c06699c5 100644
--- a/pkg/api/schemas/language.go
+++ b/pkg/api/schemas/language.go
@@ -2,8 +2,14 @@ package schemas
// UnifiedChatRequest defines Glide's Chat Request Schema unified across all language models
type UnifiedChatRequest struct {
- Message ChatMessage `json:"message"`
- MessageHistory []ChatMessage `json:"messageHistory"`
+ Message ChatMessage `json:"message"`
+ MessageHistory []ChatMessage `json:"messageHistory"`
+ Override OverrideChatRequest `json:"override,omitempty"`
+}
+
+type OverrideChatRequest struct {
+ Model string `json:"model_id"`
+ Message ChatMessage `json:"message"`
}
func NewChatFromStr(message string) *UnifiedChatRequest {
@@ -33,10 +39,10 @@ type UnifiedChatResponse struct {
type ProviderResponse struct {
SystemID map[string]string `json:"responseId,omitempty"`
Message ChatMessage `json:"message"`
- TokenCount TokenCount `json:"tokenCount"`
+ TokenUsage TokenUsage `json:"tokenCount"`
}
-type TokenCount struct {
+type TokenUsage struct {
PromptTokens float64 `json:"promptTokens"`
ResponseTokens float64 `json:"responseTokens"`
TotalTokens float64 `json:"totalTokens"`
diff --git a/pkg/cmd/cli.go b/pkg/cmd/cli.go
index 7a2cbcc0..ffa2b933 100644
--- a/pkg/cmd/cli.go
+++ b/pkg/cmd/cli.go
@@ -9,13 +9,31 @@ import (
var cfgFile string
+const Description = `
+ βββββββ βββ ββββββββββ ββββββββ
+ββββββββ βββ βββββββββββββββββββ
+βββ βββββββ ββββββ βββββββββ
+βββ ββββββ ββββββ βββββββββ
+ββββββββββββββββββββββββββββββββββββ
+ βββββββ ββββββββββββββββββ ββββββββ
+π¦An open-source, lightweight, high-performance model gateway
+to make your LLM applications production ready π
+
+πDocumentation: https://glide.einstack.ai
+π οΈSource: https://github.com/EinStack/glide
+π¬Discord: https://discord.gg/pt53Ej7rrc
+πBug Tracker: https://github.com/EinStack/glide/issues
+
+ποΈEinStack Community (mailto:contact@einstack.ai), 2024-Present (c)
+`
+
// NewCLI Create a Glide CLI
func NewCLI() *cobra.Command {
// TODO: Chances are we could use the build in flags module in this is all we need from CLI
cli := &cobra.Command{
Use: "glide",
Short: "π¦Glide is an open-source, lightweight, high-performance model gateway",
- Long: "TODO",
+ Long: Description,
Version: pkg.FullVersion,
RunE: func(cmd *cobra.Command, args []string) error {
configProvider, err := config.NewProvider().Load(cfgFile)
diff --git a/pkg/config/provider.go b/pkg/config/provider.go
index da0856d9..ccfb35e2 100644
--- a/pkg/config/provider.go
+++ b/pkg/config/provider.go
@@ -57,7 +57,6 @@ func (p *Provider) Load(configPath string) (*Provider, error) {
}
err = p.validator.Struct(cfg)
-
if err != nil {
return p, p.formatValidationError(configPath, err)
}
diff --git a/pkg/providers/anthropic/chat.go b/pkg/providers/anthropic/chat.go
index 11c742f0..b525bcb9 100644
--- a/pkg/providers/anthropic/chat.go
+++ b/pkg/providers/anthropic/chat.go
@@ -178,7 +178,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
Content: anthropicCompletion.Content[0].Text,
Name: "",
},
- TokenCount: schemas.TokenCount{
+ TokenUsage: schemas.TokenUsage{
PromptTokens: 0, // Anthropic doesn't send prompt tokens
ResponseTokens: 0,
TotalTokens: 0,
diff --git a/pkg/providers/anthropic/client_test.go b/pkg/providers/anthropic/client_test.go
index 7ffb0557..321d38ed 100644
--- a/pkg/providers/anthropic/client_test.go
+++ b/pkg/providers/anthropic/client_test.go
@@ -38,7 +38,6 @@ func TestAnthropicClient_ChatRequest(t *testing.T) {
w.Header().Set("Content-Type", "application/json")
_, err = w.Write(chatResponse)
-
if err != nil {
t.Errorf("error on sending chat response: %v", err)
}
diff --git a/pkg/providers/azureopenai/chat.go b/pkg/providers/azureopenai/chat.go
index 320c90ee..6fda0305 100644
--- a/pkg/providers/azureopenai/chat.go
+++ b/pkg/providers/azureopenai/chat.go
@@ -190,7 +190,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
Content: openAICompletion.Choices[0].Message.Content,
Name: "",
},
- TokenCount: schemas.TokenCount{
+ TokenUsage: schemas.TokenUsage{
PromptTokens: openAICompletion.Usage.PromptTokens,
ResponseTokens: openAICompletion.Usage.CompletionTokens,
TotalTokens: openAICompletion.Usage.TotalTokens,
diff --git a/pkg/providers/azureopenai/client_test.go b/pkg/providers/azureopenai/client_test.go
index 5e96753b..b7800aec 100644
--- a/pkg/providers/azureopenai/client_test.go
+++ b/pkg/providers/azureopenai/client_test.go
@@ -38,7 +38,6 @@ func TestAzureOpenAIClient_ChatRequest(t *testing.T) {
w.Header().Set("Content-Type", "application/json")
_, err = w.Write(chatResponse)
-
if err != nil {
t.Errorf("error on sending chat response: %v", err)
}
diff --git a/pkg/providers/cohere/chat.go b/pkg/providers/cohere/chat.go
index ffcc017c..28712887 100644
--- a/pkg/providers/cohere/chat.go
+++ b/pkg/providers/cohere/chat.go
@@ -195,7 +195,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
Content: cohereCompletion.Text,
Name: "",
},
- TokenCount: schemas.TokenCount{
+ TokenUsage: schemas.TokenUsage{
PromptTokens: cohereCompletion.TokenCount.PromptTokens,
ResponseTokens: cohereCompletion.TokenCount.ResponseTokens,
TotalTokens: cohereCompletion.TokenCount.TotalTokens,
diff --git a/pkg/providers/cohere/client_test.go b/pkg/providers/cohere/client_test.go
index 5e49a3e0..c88fef4a 100644
--- a/pkg/providers/cohere/client_test.go
+++ b/pkg/providers/cohere/client_test.go
@@ -38,7 +38,6 @@ func TestCohereClient_ChatRequest(t *testing.T) {
w.Header().Set("Content-Type", "application/json")
_, err = w.Write(chatResponse)
-
if err != nil {
t.Errorf("error on sending chat response: %v", err)
}
diff --git a/pkg/providers/octoml/chat.go b/pkg/providers/octoml/chat.go
index 00ab6aa0..29ca6b7d 100644
--- a/pkg/providers/octoml/chat.go
+++ b/pkg/providers/octoml/chat.go
@@ -176,7 +176,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
Content: openAICompletion.Choices[0].Message.Content,
Name: "",
},
- TokenCount: schemas.TokenCount{
+ TokenUsage: schemas.TokenUsage{
PromptTokens: openAICompletion.Usage.PromptTokens,
ResponseTokens: openAICompletion.Usage.CompletionTokens,
TotalTokens: openAICompletion.Usage.TotalTokens,
diff --git a/pkg/providers/octoml/client_test.go b/pkg/providers/octoml/client_test.go
index a8f0d625..5e99e3f1 100644
--- a/pkg/providers/octoml/client_test.go
+++ b/pkg/providers/octoml/client_test.go
@@ -38,7 +38,6 @@ func TestOctoMLClient_ChatRequest(t *testing.T) {
w.Header().Set("Content-Type", "application/json")
_, err = w.Write(chatResponse)
-
if err != nil {
t.Errorf("error on sending chat response: %v", err)
}
diff --git a/pkg/providers/openai/chat.go b/pkg/providers/openai/chat.go
index f8a69525..c296c080 100644
--- a/pkg/providers/openai/chat.go
+++ b/pkg/providers/openai/chat.go
@@ -190,7 +190,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche
Content: openAICompletion.Choices[0].Message.Content,
Name: "",
},
- TokenCount: schemas.TokenCount{
+ TokenUsage: schemas.TokenUsage{
PromptTokens: openAICompletion.Usage.PromptTokens,
ResponseTokens: openAICompletion.Usage.CompletionTokens,
TotalTokens: openAICompletion.Usage.TotalTokens,
diff --git a/pkg/providers/openai/client_test.go b/pkg/providers/openai/client_test.go
index d026298a..81c8f4df 100644
--- a/pkg/providers/openai/client_test.go
+++ b/pkg/providers/openai/client_test.go
@@ -38,7 +38,6 @@ func TestOpenAIClient_ChatRequest(t *testing.T) {
w.Header().Set("Content-Type", "application/json")
_, err = w.Write(chatResponse)
-
if err != nil {
t.Errorf("error on sending chat response: %v", err)
}
diff --git a/pkg/providers/provider.go b/pkg/providers/provider.go
index 11e89ae7..4a3774b2 100644
--- a/pkg/providers/provider.go
+++ b/pkg/providers/provider.go
@@ -79,14 +79,13 @@ func (m *LangModel) Weight() int {
}
func (m *LangModel) Chat(ctx context.Context, request *schemas.UnifiedChatRequest) (*schemas.UnifiedChatResponse, error) {
- // TODO: we may want to track time-to-first-byte to "normalize" response latency wrt response size
startedAt := time.Now()
resp, err := m.client.Chat(ctx, request)
- // Do we want to track latency in case of errors as well?
- m.latency.Add(float64(time.Since(startedAt)))
-
if err == nil {
+ // record latency per token to normalize measurements
+ m.latency.Add(float64(time.Since(startedAt)) / resp.ModelResponse.TokenUsage.ResponseTokens)
+
// successful response
resp.ModelID = m.modelID
diff --git a/pkg/routers/router.go b/pkg/routers/router.go
index 5b7747c4..c2149c7a 100644
--- a/pkg/routers/router.go
+++ b/pkg/routers/router.go
@@ -75,6 +75,14 @@ func (r *LangRouter) Chat(ctx context.Context, request *schemas.UnifiedChatReque
langModel := model.(providers.LanguageModel)
+ // Check if there is an override in the request
+ if request.Override != (schemas.OverrideChatRequest{}) {
+ // Override the message if the language model ID matches the override model ID
+ if langModel.ID() == request.Override.Model {
+ request.Message = request.Override.Message
+ }
+ }
+
resp, err := langModel.Chat(ctx, request)
if err != nil {
r.telemetry.Logger.Warn(