diff --git a/.goreleaser.yml b/.goreleaser.yml index 20ee3db3..903cdba5 100644 --- a/.goreleaser.yml +++ b/.goreleaser.yml @@ -144,7 +144,7 @@ brews: # # Default depends on the client. # Templates: allowed - url_template: "https://github.mycompany.com/foo/bar/releases/download/{{ .Tag }}/{{ .ArtifactName }}" + url_template: "https://github.com/einstack/glide/releases/download/{{ .Tag }}/{{ .ArtifactName }}" # Allows you to set a custom download strategy. Note that you'll need # to implement the strategy and add it to your tap repository. diff --git a/CHANGELOG.md b/CHANGELOG.md index 550a2919..e596df47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,21 @@ The changelog consists of three categories: - **Improvements** - bugfixes, performance and other types of improvements to existing functionality - **Miscellaneous** - all other updates like build, release, CLI, etc. +## 0.0.1 (Jan 31st, 2024) + +### Features + +- ✨ #81: Allow to chat message based for specific models (@mkrueger12) + +### Improvements + +- πŸ”§ #78: Normalize response latency by response token count (@roma-glushko) +- πŸ“ #112 added the CLI banner info (@roma-glushko) + +### Miscellaneous + +- πŸ“ #114 Make links actual across the project (@roma-glushko) + ## 0.0.1-rc.2 (Jan 22nd, 2024) ### Improvements diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index a13ebf99..980d59bf 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -59,7 +59,7 @@ a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the project team at roman.glushko.m@gmail.com. All +reported by contacting the project team at [contact@einstack.ai](mailto:contact@einstack.ai). All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. diff --git a/README.md b/README.md index f30bad23..7b7572ef 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,16 @@ # Glide: Cloud-Native LLM Gateway for Seamless LLMOps
- Glide GH Header + Glide GH Header
-[![LICENSE](https://img.shields.io/github/license/modelgateway/glide.svg?style=flat-square&color=%233f90c8)](https://github.com/modelgateway/glide/blob/main/LICENSE) + [![codecov](https://codecov.io/github/EinStack/glide/graph/badge.svg?token=F7JT39RHX9)](https://codecov.io/github/EinStack/glide) +[![Discord](https://img.shields.io/discord/1181281407813828710)](https://discord.gg/pt53Ej7rrc) +[![Documentation](https://img.shields.io/badge/build-view-violet%20?style=flat&logo=books&label=docs&link=https%3A%2F%2Fglide.einstack.ai%2F)](https://glide.einstack.ai/) +[![LICENSE](https://img.shields.io/github/license/EinStack/glide.svg?style=flat-square&color=%233f90c8)](https://github.com/EinStack/glide/blob/main/LICENSE) +[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FEinStack%2Fglide.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2FEinStack%2Fglide?ref=badge_shield) + +--- Glide is your go-to cloud-native LLM gateway, delivering high-performance LLMOps in a lightweight, all-in-one package. @@ -14,9 +20,11 @@ so you can dive into tackling your core challenges. Glide sits between your application and model providers to seamlessly handle various LLMOps tasks like model failover, caching, key management, etc. + + Take a look at the develop branch. -Check out our [documentation](https://backlandlabs.mintlify.app/introduction)! +Check out our [documentation](https://glide.einstack.ai)! > [!Warning] > Glide is under active development right now. Give us a star to support the project ✨ @@ -28,7 +36,7 @@ Check out our [documentation](https://backlandlabs.mintlify.app/introduction)! - Support **popular LLM providers**. - **High performance**. Performance is our priority. We want to keep Glide "invisible" for your latency-wise, while providing rich functionality. - **Production-ready observability** via OpenTelemetry, emit metrics on models health, allows whitebox monitoring. -- Straightforward and simple maintenance and configuration, centrilized API key control & management & rotation, etc. +- Straightforward and simple maintenance and configuration, centralized API key control & management & rotation, etc. ## Supported Providers @@ -48,7 +56,7 @@ Check out our [documentation](https://backlandlabs.mintlify.app/introduction)! Routers are a core functionality of Glide. Think of routers as a group of models with some predefined logic. For example, the resilience router allows a user to define a set of backup models should the initial model fail. Another example, would be to leverage the least-latency router to make latency sensitive LLM calls in the most efficient manner. -Detailed info on routers can be found [here](https://backlandlabs.mintlify.app/essentials/routers). +Detailed info on routers can be found [here](https://glide.einstack.ai/essentials/routers). #### Available Routers @@ -62,43 +70,47 @@ Detailed info on routers can be found [here](https://backlandlabs.mintlify.app/e ## Get Started -#### Install +### Installation -The easiest way to deploy Glide is to build from source. +The easiest way to deploy Glide is to our [demo repository](https://github.com/EinStack/glide-demo.git) and [docker-compose](https://docs.docker.com/compose/). -Steps to build a container with Docker can be found [here](https://backlandlabs.mintlify.app/introduction#install-and-deploy). +### 1. Clone the demo repository + +```bash +git clone https://github.com/EinStack/glide-demo.git +``` + +### 2. Init Configs + +The demo repository comes with a basic config. Additionally, you need to init your secrets by running: + +```bash +make init # from the demo root +``` -#### Set Configuration File +This will create the `secrets` directory with one `.OPENAI_API_KEY` file that you need to put your key to. -Find detailed information on configuration [here](https://backlandlabs.mintlify.app/essentials/configuration). +### 3. Start Glide -```yaml -telemetry: - logging: - level: debug # debug, info, warn, error, fatal - encoding: console +After that, just use docker compose via this command to start your demo environment: -routers: - language: - - id: myrouter - models: - - id: openai - openai: - api_key: "" +```bash +make up ``` -#### Sample API Request to `/chat` endpoint +### 4. Sample API Request to `/chat` endpoint -See [API Reference](https://backlandlabs.mintlify.app/api-reference/introduction) for more details. +See [API Reference](https://glide.einstack.ai/api-reference/introduction) for more details. ```json { + "model": "gpt-3.5-turbo", # this is not required but can be used to specify different prompts to different models "message": { "role": "user", "content": "Where was it played?" }, - "messageHistory": [ + "messageHistory": [ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Who won the world series in 2020?"}, {"role": "assistant", "content": "The Los Angeles Dodgers won the World Series in 2020."} @@ -108,13 +120,60 @@ See [API Reference](https://backlandlabs.mintlify.app/api-reference/introduction ### API Docs -Once deployed, Glide comes with OpenAPI documentation that is accessible via http://127.0.0.1:9099/v1/swagger/index.html +Finally, Glide comes with OpenAPI documentation that is accessible via http://127.0.0.1:9099/v1/swagger/index.html + +That's it πŸ™Œ + +Use [our documentation](https://glide.einstack.ai) to further learn about Glide capabilities and configs. + +--- + +Other ways to install Glide are available: + +### Homebrew (MacOS) + +```bash +brew tap einstack/tap +brew install einstack/tap/glide +``` + +### Snapcraft (Linux) + +Coming Soon + +### Docker Images + +Glide provides official images in our [GHCR](https://github.com/EinStack/glide/pkgs/container/glide): + +- Alpine 3.19: +```bash +docker pull ghcr.io/einstack/glide:latest-alpine +``` + +- Ubuntu 22.04 LTS: +```bash +docker pull ghcr.io/einstack/glide:latest-ubuntu +``` + +- Google Distroless (non-root) +```bash +docker pull ghcr.io/einstack/glide:latest-distroless +``` + +- RedHat UBI 8.9 Micro +```bash +docker pull ghcr.io/einstack/glide:latest-redhat +``` + +### Helm Chart + +Coming Soon ## Community - Join [Discord](https://discord.gg/pt53Ej7rrc) for real-time discussion -Open [an issue](https://github.com/modelgateway/glide/issues) or start [a discussion](https://github.com/modelgateway/glide/discussions) +Open [an issue](https://github.com/EinStack/glide/issues) or start [a discussion](https://github.com/EinStack/glide/discussions) if there is a feature or an enhancement you'd like to see in Glide. ## Contribute @@ -126,6 +185,12 @@ if there is a feature or an enhancement you'd like to see in Glide. Thanks everyone for already put their effort to make Glide better and more feature-rich: - + + +## License + +Apache 2.0 + +[![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2FEinStack%2Fglide.svg?type=large)](https://app.fossa.com/projects/git%2Bgithub.com%2FEinStack%2Fglide?ref=badge_large) diff --git a/ROADMAP.md b/ROADMAP.md index a6e7eecf..facda76a 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -12,9 +12,9 @@ This document describes the current status and the upcoming milestones of the Gl | :---: | :--- | :---: | | 🍏 | **Unified Chat Endpoint Support** | 4 / 4 | | 🍏 | **Fallback Routing Strategy** | 1 / 1 | -| 🍏 | **Priority, Round Robin, Weighted Round Robin, Least Latency** | 2 / 4 | -| 🍎 | **Documentation** | 1 / 1 | -| 🍎 | **Private Preview** | 4 / 5 | +| 🍏 | **Priority, Round Robin, Weighted Round Robin, Least Latency** | 4 / 4 | +| 🍏 | **Documentation** | 1 / 1 | +| 🍎 | **Private Preview** | 4.5 / 5 | | 🍎 | **Streaming Support** | 0 / 4 | | 🍎 | **Embedding Support** | 0 / 4 | | 🍎 | **Caching** | 0 / 1 | diff --git a/SECURITY.md b/SECURITY.md index 05beb8da..ba0fb400 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -3,4 +3,4 @@ We want to keep Glide safe for everyone. If you've discovered a security vulnerability in Glide, -we appreciate your help in disclosing it to us in a responsible manner, using this email: roman.glushko.m@gmail.com +we appreciate your help in disclosing it to us in a responsible manner, using this email: [contact@einstack.ai](mailto:contact@einstack.ai) diff --git a/docs/docs.go b/docs/docs.go index f5a9cc48..5ede09c8 100644 --- a/docs/docs.go +++ b/docs/docs.go @@ -10,12 +10,13 @@ const docTemplate = `{ "description": "{{escape .Description}}", "title": "{{.Title}}", "contact": { - "name": "Glide Community", - "url": "https://github.com/modelgateway/glide" + "name": "EinStack Community", + "url": "https://github.com/EinStack/glide/", + "email": "contact@einstack.ai" }, "license": { "name": "Apache 2.0", - "url": "https://github.com/modelgateway/glide/blob/develop/LICENSE" + "url": "https://github.com/EinStack/glide/blob/develop/LICENSE" }, "version": "{{.Version}}" }, @@ -626,6 +627,17 @@ const docTemplate = `{ } } }, + "schemas.OverrideChatRequest": { + "type": "object", + "properties": { + "message": { + "$ref": "#/definitions/schemas.ChatMessage" + }, + "model_id": { + "type": "string" + } + } + }, "schemas.ProviderResponse": { "type": "object", "properties": { @@ -639,11 +651,11 @@ const docTemplate = `{ } }, "tokenCount": { - "$ref": "#/definitions/schemas.TokenCount" + "$ref": "#/definitions/schemas.TokenUsage" } } }, - "schemas.TokenCount": { + "schemas.TokenUsage": { "type": "object", "properties": { "promptTokens": { @@ -668,6 +680,9 @@ const docTemplate = `{ "items": { "$ref": "#/definitions/schemas.ChatMessage" } + }, + "override": { + "$ref": "#/definitions/schemas.OverrideChatRequest" } } }, @@ -700,16 +715,20 @@ const docTemplate = `{ } } } + }, + "externalDocs": { + "description": "Documentation", + "url": "https://glide.einstack.ai/" } }` // SwaggerInfo holds exported Swagger Info so clients can modify it var SwaggerInfo = &swag.Spec{ - Version: "1.0", + Version: "0.0.1", Host: "localhost:9099", BasePath: "/", Schemes: []string{"http"}, - Title: "Glide Gateway", + Title: "Glide", Description: "API documentation for Glide, an open-source lightweight high-performance model gateway", InfoInstanceName: "swagger", SwaggerTemplate: docTemplate, diff --git a/docs/images/marketecture.svg b/docs/images/marketecture.svg new file mode 100644 index 00000000..9928b068 --- /dev/null +++ b/docs/images/marketecture.svg @@ -0,0 +1,311 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/logo/glide.png b/docs/logo/glide.png new file mode 100644 index 00000000..48eb15a8 Binary files /dev/null and b/docs/logo/glide.png differ diff --git a/docs/logo/glide_bird_in_circle.png b/docs/logo/glide_bird_in_circle.png new file mode 100644 index 00000000..1e8ffaad Binary files /dev/null and b/docs/logo/glide_bird_in_circle.png differ diff --git a/docs/logo/glide_bird_title.png b/docs/logo/glide_bird_title.png new file mode 100644 index 00000000..0b152207 Binary files /dev/null and b/docs/logo/glide_bird_title.png differ diff --git a/docs/logo/glide_no_bgd.png b/docs/logo/glide_no_bgd.png new file mode 100644 index 00000000..2563bf54 Binary files /dev/null and b/docs/logo/glide_no_bgd.png differ diff --git a/docs/logo/glide_title.png b/docs/logo/glide_title.png new file mode 100644 index 00000000..9f8e4af7 Binary files /dev/null and b/docs/logo/glide_title.png differ diff --git a/docs/logo/glide_title_underscore.png b/docs/logo/glide_title_underscore.png new file mode 100644 index 00000000..1412a60b Binary files /dev/null and b/docs/logo/glide_title_underscore.png differ diff --git a/docs/swagger.json b/docs/swagger.json index c6bf90fd..8146a113 100644 --- a/docs/swagger.json +++ b/docs/swagger.json @@ -5,16 +5,17 @@ "swagger": "2.0", "info": { "description": "API documentation for Glide, an open-source lightweight high-performance model gateway", - "title": "Glide Gateway", + "title": "Glide", "contact": { - "name": "Glide Community", - "url": "https://github.com/modelgateway/glide" + "name": "EinStack Community", + "url": "https://github.com/EinStack/glide/", + "email": "contact@einstack.ai" }, "license": { "name": "Apache 2.0", - "url": "https://github.com/modelgateway/glide/blob/develop/LICENSE" + "url": "https://github.com/EinStack/glide/blob/develop/LICENSE" }, - "version": "1.0" + "version": "0.0.1" }, "host": "localhost:9099", "basePath": "/", @@ -623,6 +624,17 @@ } } }, + "schemas.OverrideChatRequest": { + "type": "object", + "properties": { + "message": { + "$ref": "#/definitions/schemas.ChatMessage" + }, + "model_id": { + "type": "string" + } + } + }, "schemas.ProviderResponse": { "type": "object", "properties": { @@ -636,11 +648,11 @@ } }, "tokenCount": { - "$ref": "#/definitions/schemas.TokenCount" + "$ref": "#/definitions/schemas.TokenUsage" } } }, - "schemas.TokenCount": { + "schemas.TokenUsage": { "type": "object", "properties": { "promptTokens": { @@ -665,6 +677,9 @@ "items": { "$ref": "#/definitions/schemas.ChatMessage" } + }, + "override": { + "$ref": "#/definitions/schemas.OverrideChatRequest" } } }, @@ -697,5 +712,9 @@ } } } + }, + "externalDocs": { + "description": "Documentation", + "url": "https://glide.einstack.ai/" } } \ No newline at end of file diff --git a/docs/swagger.yaml b/docs/swagger.yaml index 0ff8a476..c0b25776 100644 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -342,6 +342,13 @@ definitions: or assistant. type: string type: object + schemas.OverrideChatRequest: + properties: + message: + $ref: '#/definitions/schemas.ChatMessage' + model_id: + type: string + type: object schemas.ProviderResponse: properties: message: @@ -351,9 +358,9 @@ definitions: type: string type: object tokenCount: - $ref: '#/definitions/schemas.TokenCount' + $ref: '#/definitions/schemas.TokenUsage' type: object - schemas.TokenCount: + schemas.TokenUsage: properties: promptTokens: type: number @@ -370,6 +377,8 @@ definitions: items: $ref: '#/definitions/schemas.ChatMessage' type: array + override: + $ref: '#/definitions/schemas.OverrideChatRequest' type: object schemas.UnifiedChatResponse: properties: @@ -390,18 +399,22 @@ definitions: router: type: string type: object +externalDocs: + description: Documentation + url: https://glide.einstack.ai/ host: localhost:9099 info: contact: - name: Glide Community - url: https://github.com/modelgateway/glide + email: contact@einstack.ai + name: EinStack Community + url: https://github.com/EinStack/glide/ description: API documentation for Glide, an open-source lightweight high-performance model gateway license: name: Apache 2.0 - url: https://github.com/modelgateway/glide/blob/develop/LICENSE - title: Glide Gateway - version: "1.0" + url: https://github.com/EinStack/glide/blob/develop/LICENSE + title: Glide + version: 0.0.1 paths: /v1/health/: get: diff --git a/main.go b/main.go index eaa16bdd..0173f5ff 100644 --- a/main.go +++ b/main.go @@ -6,15 +6,19 @@ import ( "glide/pkg/cmd" ) -// @title Glide Gateway -// @version 1.0 +// @title Glide +// @version 0.0.1 // @description API documentation for Glide, an open-source lightweight high-performance model gateway -// @contact.name Glide Community -// @contact.url https://github.com/modelgateway/glide +// @contact.name EinStack Community +// @contact.url https://github.com/EinStack/glide/ +// @contact.email contact@einstack.ai // @license.name Apache 2.0 -// @license.url https://github.com/modelgateway/glide/blob/develop/LICENSE +// @license.url https://github.com/EinStack/glide/blob/develop/LICENSE + +// @externalDocs.description Documentation +// @externalDocs.url https://glide.einstack.ai/ // @host localhost:9099 // @BasePath / diff --git a/pkg/api/http/handlers.go b/pkg/api/http/handlers.go index 9db2e5fc..e25bd6ca 100644 --- a/pkg/api/http/handlers.go +++ b/pkg/api/http/handlers.go @@ -33,10 +33,12 @@ type Handler = func(ctx context.Context, c *app.RequestContext) // @Router /v1/language/{router}/chat [POST] func LangChatHandler(routerManager *routers.RouterManager) Handler { return func(ctx context.Context, c *app.RequestContext) { + // Unmarshal request body var req *schemas.UnifiedChatRequest err := json.Unmarshal(c.Request.Body(), &req) if err != nil { + // Return bad request error c.JSON(consts.StatusBadRequest, ErrorSchema{ Message: err.Error(), }) @@ -44,8 +46,10 @@ func LangChatHandler(routerManager *routers.RouterManager) Handler { return } + // Bind JSON to request err = c.BindJSON(&req) if err != nil { + // Return bad request error c.JSON(consts.StatusBadRequest, ErrorSchema{ Message: err.Error(), }) @@ -53,10 +57,12 @@ func LangChatHandler(routerManager *routers.RouterManager) Handler { return } + // Get router ID from path routerID := c.Param("router") router, err := routerManager.GetLangRouter(routerID) if errors.Is(err, routers.ErrRouterNotFound) { + // Return not found error c.JSON(consts.StatusNotFound, ErrorSchema{ Message: err.Error(), }) @@ -64,9 +70,10 @@ func LangChatHandler(routerManager *routers.RouterManager) Handler { return } + // Chat with router resp, err := router.Chat(ctx, req) if err != nil { - // TODO: do a better handling, not everything is going to be an internal error + // Return internal server error c.JSON(consts.StatusInternalServerError, ErrorSchema{ Message: err.Error(), }) @@ -74,6 +81,7 @@ func LangChatHandler(routerManager *routers.RouterManager) Handler { return } + // Return chat response c.JSON(consts.StatusOK, resp) } } diff --git a/pkg/api/schemas/language.go b/pkg/api/schemas/language.go index 068e0588..c06699c5 100644 --- a/pkg/api/schemas/language.go +++ b/pkg/api/schemas/language.go @@ -2,8 +2,14 @@ package schemas // UnifiedChatRequest defines Glide's Chat Request Schema unified across all language models type UnifiedChatRequest struct { - Message ChatMessage `json:"message"` - MessageHistory []ChatMessage `json:"messageHistory"` + Message ChatMessage `json:"message"` + MessageHistory []ChatMessage `json:"messageHistory"` + Override OverrideChatRequest `json:"override,omitempty"` +} + +type OverrideChatRequest struct { + Model string `json:"model_id"` + Message ChatMessage `json:"message"` } func NewChatFromStr(message string) *UnifiedChatRequest { @@ -33,10 +39,10 @@ type UnifiedChatResponse struct { type ProviderResponse struct { SystemID map[string]string `json:"responseId,omitempty"` Message ChatMessage `json:"message"` - TokenCount TokenCount `json:"tokenCount"` + TokenUsage TokenUsage `json:"tokenCount"` } -type TokenCount struct { +type TokenUsage struct { PromptTokens float64 `json:"promptTokens"` ResponseTokens float64 `json:"responseTokens"` TotalTokens float64 `json:"totalTokens"` diff --git a/pkg/cmd/cli.go b/pkg/cmd/cli.go index 7a2cbcc0..ffa2b933 100644 --- a/pkg/cmd/cli.go +++ b/pkg/cmd/cli.go @@ -9,13 +9,31 @@ import ( var cfgFile string +const Description = ` + β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ•— β–ˆβ–ˆβ•—β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— +β–ˆβ–ˆβ•”β•β•β•β•β• β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β–ˆβ–ˆβ•—β–ˆβ–ˆβ•”β•β•β•β•β• +β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— +β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•‘ β–ˆβ–ˆβ•‘β–ˆβ–ˆβ•”β•β•β• +β•šβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•—β–ˆβ–ˆβ•‘β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•”β•β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ•— + β•šβ•β•β•β•β•β• β•šβ•β•β•β•β•β•β•β•šβ•β•β•šβ•β•β•β•β•β• β•šβ•β•β•β•β•β•β• +🐦An open-source, lightweight, high-performance model gateway +to make your LLM applications production ready πŸŽ‰ + +πŸ“šDocumentation: https://glide.einstack.ai +πŸ› οΈSource: https://github.com/EinStack/glide +πŸ’¬Discord: https://discord.gg/pt53Ej7rrc +πŸ›Bug Tracker: https://github.com/EinStack/glide/issues + +πŸ—οΈEinStack Community (mailto:contact@einstack.ai), 2024-Present (c) +` + // NewCLI Create a Glide CLI func NewCLI() *cobra.Command { // TODO: Chances are we could use the build in flags module in this is all we need from CLI cli := &cobra.Command{ Use: "glide", Short: "🐦Glide is an open-source, lightweight, high-performance model gateway", - Long: "TODO", + Long: Description, Version: pkg.FullVersion, RunE: func(cmd *cobra.Command, args []string) error { configProvider, err := config.NewProvider().Load(cfgFile) diff --git a/pkg/config/provider.go b/pkg/config/provider.go index da0856d9..ccfb35e2 100644 --- a/pkg/config/provider.go +++ b/pkg/config/provider.go @@ -57,7 +57,6 @@ func (p *Provider) Load(configPath string) (*Provider, error) { } err = p.validator.Struct(cfg) - if err != nil { return p, p.formatValidationError(configPath, err) } diff --git a/pkg/providers/anthropic/chat.go b/pkg/providers/anthropic/chat.go index 11c742f0..b525bcb9 100644 --- a/pkg/providers/anthropic/chat.go +++ b/pkg/providers/anthropic/chat.go @@ -178,7 +178,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche Content: anthropicCompletion.Content[0].Text, Name: "", }, - TokenCount: schemas.TokenCount{ + TokenUsage: schemas.TokenUsage{ PromptTokens: 0, // Anthropic doesn't send prompt tokens ResponseTokens: 0, TotalTokens: 0, diff --git a/pkg/providers/anthropic/client_test.go b/pkg/providers/anthropic/client_test.go index 7ffb0557..321d38ed 100644 --- a/pkg/providers/anthropic/client_test.go +++ b/pkg/providers/anthropic/client_test.go @@ -38,7 +38,6 @@ func TestAnthropicClient_ChatRequest(t *testing.T) { w.Header().Set("Content-Type", "application/json") _, err = w.Write(chatResponse) - if err != nil { t.Errorf("error on sending chat response: %v", err) } diff --git a/pkg/providers/azureopenai/chat.go b/pkg/providers/azureopenai/chat.go index 320c90ee..6fda0305 100644 --- a/pkg/providers/azureopenai/chat.go +++ b/pkg/providers/azureopenai/chat.go @@ -190,7 +190,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche Content: openAICompletion.Choices[0].Message.Content, Name: "", }, - TokenCount: schemas.TokenCount{ + TokenUsage: schemas.TokenUsage{ PromptTokens: openAICompletion.Usage.PromptTokens, ResponseTokens: openAICompletion.Usage.CompletionTokens, TotalTokens: openAICompletion.Usage.TotalTokens, diff --git a/pkg/providers/azureopenai/client_test.go b/pkg/providers/azureopenai/client_test.go index 5e96753b..b7800aec 100644 --- a/pkg/providers/azureopenai/client_test.go +++ b/pkg/providers/azureopenai/client_test.go @@ -38,7 +38,6 @@ func TestAzureOpenAIClient_ChatRequest(t *testing.T) { w.Header().Set("Content-Type", "application/json") _, err = w.Write(chatResponse) - if err != nil { t.Errorf("error on sending chat response: %v", err) } diff --git a/pkg/providers/cohere/chat.go b/pkg/providers/cohere/chat.go index ffcc017c..28712887 100644 --- a/pkg/providers/cohere/chat.go +++ b/pkg/providers/cohere/chat.go @@ -195,7 +195,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche Content: cohereCompletion.Text, Name: "", }, - TokenCount: schemas.TokenCount{ + TokenUsage: schemas.TokenUsage{ PromptTokens: cohereCompletion.TokenCount.PromptTokens, ResponseTokens: cohereCompletion.TokenCount.ResponseTokens, TotalTokens: cohereCompletion.TokenCount.TotalTokens, diff --git a/pkg/providers/cohere/client_test.go b/pkg/providers/cohere/client_test.go index 5e49a3e0..c88fef4a 100644 --- a/pkg/providers/cohere/client_test.go +++ b/pkg/providers/cohere/client_test.go @@ -38,7 +38,6 @@ func TestCohereClient_ChatRequest(t *testing.T) { w.Header().Set("Content-Type", "application/json") _, err = w.Write(chatResponse) - if err != nil { t.Errorf("error on sending chat response: %v", err) } diff --git a/pkg/providers/octoml/chat.go b/pkg/providers/octoml/chat.go index 00ab6aa0..29ca6b7d 100644 --- a/pkg/providers/octoml/chat.go +++ b/pkg/providers/octoml/chat.go @@ -176,7 +176,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche Content: openAICompletion.Choices[0].Message.Content, Name: "", }, - TokenCount: schemas.TokenCount{ + TokenUsage: schemas.TokenUsage{ PromptTokens: openAICompletion.Usage.PromptTokens, ResponseTokens: openAICompletion.Usage.CompletionTokens, TotalTokens: openAICompletion.Usage.TotalTokens, diff --git a/pkg/providers/octoml/client_test.go b/pkg/providers/octoml/client_test.go index a8f0d625..5e99e3f1 100644 --- a/pkg/providers/octoml/client_test.go +++ b/pkg/providers/octoml/client_test.go @@ -38,7 +38,6 @@ func TestOctoMLClient_ChatRequest(t *testing.T) { w.Header().Set("Content-Type", "application/json") _, err = w.Write(chatResponse) - if err != nil { t.Errorf("error on sending chat response: %v", err) } diff --git a/pkg/providers/openai/chat.go b/pkg/providers/openai/chat.go index f8a69525..c296c080 100644 --- a/pkg/providers/openai/chat.go +++ b/pkg/providers/openai/chat.go @@ -190,7 +190,7 @@ func (c *Client) doChatRequest(ctx context.Context, payload *ChatRequest) (*sche Content: openAICompletion.Choices[0].Message.Content, Name: "", }, - TokenCount: schemas.TokenCount{ + TokenUsage: schemas.TokenUsage{ PromptTokens: openAICompletion.Usage.PromptTokens, ResponseTokens: openAICompletion.Usage.CompletionTokens, TotalTokens: openAICompletion.Usage.TotalTokens, diff --git a/pkg/providers/openai/client_test.go b/pkg/providers/openai/client_test.go index d026298a..81c8f4df 100644 --- a/pkg/providers/openai/client_test.go +++ b/pkg/providers/openai/client_test.go @@ -38,7 +38,6 @@ func TestOpenAIClient_ChatRequest(t *testing.T) { w.Header().Set("Content-Type", "application/json") _, err = w.Write(chatResponse) - if err != nil { t.Errorf("error on sending chat response: %v", err) } diff --git a/pkg/providers/provider.go b/pkg/providers/provider.go index 11e89ae7..4a3774b2 100644 --- a/pkg/providers/provider.go +++ b/pkg/providers/provider.go @@ -79,14 +79,13 @@ func (m *LangModel) Weight() int { } func (m *LangModel) Chat(ctx context.Context, request *schemas.UnifiedChatRequest) (*schemas.UnifiedChatResponse, error) { - // TODO: we may want to track time-to-first-byte to "normalize" response latency wrt response size startedAt := time.Now() resp, err := m.client.Chat(ctx, request) - // Do we want to track latency in case of errors as well? - m.latency.Add(float64(time.Since(startedAt))) - if err == nil { + // record latency per token to normalize measurements + m.latency.Add(float64(time.Since(startedAt)) / resp.ModelResponse.TokenUsage.ResponseTokens) + // successful response resp.ModelID = m.modelID diff --git a/pkg/routers/router.go b/pkg/routers/router.go index 5b7747c4..c2149c7a 100644 --- a/pkg/routers/router.go +++ b/pkg/routers/router.go @@ -75,6 +75,14 @@ func (r *LangRouter) Chat(ctx context.Context, request *schemas.UnifiedChatReque langModel := model.(providers.LanguageModel) + // Check if there is an override in the request + if request.Override != (schemas.OverrideChatRequest{}) { + // Override the message if the language model ID matches the override model ID + if langModel.ID() == request.Override.Model { + request.Message = request.Override.Message + } + } + resp, err := langModel.Chat(ctx, request) if err != nil { r.telemetry.Logger.Warn(