-
Notifications
You must be signed in to change notification settings - Fork 181
Feat: add Google AI embedding provider support for vector db #107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. Weβll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,92 @@ | ||||||||||||||
| import { GoogleGenerativeAI } from '@google/generative-ai'; | ||||||||||||||
| import { BaseEmbedding, TEmbeddings } from './BaseEmbedding'; | ||||||||||||||
| import { AccessCandidate } from '@sre/Security/AccessControl/AccessCandidate.class'; | ||||||||||||||
| import { getLLMCredentials } from '@sre/LLMManager/LLM.service/LLMCredentials.helper'; | ||||||||||||||
| import { TLLMCredentials, TLLMModel, BasicCredentials } from '@sre/types/LLM.types'; | ||||||||||||||
|
|
||||||||||||||
| const DEFAULT_MODEL = 'gemini-embedding-001'; | ||||||||||||||
|
|
||||||||||||||
| export class GoogleEmbeds extends BaseEmbedding { | ||||||||||||||
| protected client: GoogleGenerativeAI; | ||||||||||||||
|
|
||||||||||||||
| public static models = ['gemini-embedding-001']; | ||||||||||||||
| public canSpecifyDimensions = true; | ||||||||||||||
|
|
||||||||||||||
|
Comment on lines
+12
to
+14
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. π‘ Verification agent π§© Analysis chainDimensions flag: verify end-to-end behavior
π Script executed: #!/bin/bash
# Where are embedding dimensions enforced?
rg -n -C2 -e 'dimensions?|dimensionality|vector.?size|numDimensions' packages/core/srcLength of output: 13157 π Script executed: #!/bin/bash
# Show GoogleEmbedding class and its methods
rg -nP --type=ts 'class GoogleEmbedding' -C5 packages/core/src/subsystems/IO/VectorDB.service/embedLength of output: 101 π Script executed: #!/bin/bash
# List files in embed directory to locate GoogleEmbedding file
ls -1 packages/core/src/subsystems/IO/VectorDB.service/embedLength of output: 126 π Script executed: #!/bin/bash
# Display the contents of GoogleEmbedding.ts to inspect dimension handling
sed -n '1,200p' packages/core/src/subsystems/IO/VectorDB.service/embed/GoogleEmbedding.tsLength of output: 3614 π Script executed: #!/bin/bash
sed -n '1,200p' packages/core/src/subsystems/IO/VectorDB.service/embed/BaseEmbedding.tsLength of output: 3912 Disable dimension override in GoogleEmbedding
π€ Prompt for AI Agents |
||||||||||||||
| constructor(private settings?: Partial<TEmbeddings>) { | ||||||||||||||
| super({ model: settings?.model ?? DEFAULT_MODEL, ...settings }); | ||||||||||||||
| } | ||||||||||||||
|
Comment on lines
+15
to
+17
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Default model can be unintentionally overridden to undefined Spreading - constructor(private settings?: Partial<TEmbeddings>) {
- super({ model: settings?.model ?? DEFAULT_MODEL, ...settings });
- }
+ constructor(private settings?: Partial<TEmbeddings>) {
+ super({ ...settings, model: settings?.model ?? DEFAULT_MODEL });
+ }π Committable suggestion
Suggested change
π€ Prompt for AI Agents |
||||||||||||||
|
|
||||||||||||||
| async embedTexts(texts: string[], candidate: AccessCandidate): Promise<number[][]> { | ||||||||||||||
| const batches = this.chunkArr(this.processTexts(texts), this.chunkSize); | ||||||||||||||
|
|
||||||||||||||
| const batchRequests = batches.map((batch) => { | ||||||||||||||
| return this.embed(batch, candidate); | ||||||||||||||
| }); | ||||||||||||||
| const batchResponses = await Promise.all(batchRequests); | ||||||||||||||
|
|
||||||||||||||
| const embeddings: number[][] = []; | ||||||||||||||
| for (let i = 0; i < batchResponses.length; i += 1) { | ||||||||||||||
| const batch = batches[i]; | ||||||||||||||
| const batchResponse = batchResponses[i]; | ||||||||||||||
| for (let j = 0; j < batch.length; j += 1) { | ||||||||||||||
| embeddings.push(batchResponse[j]); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| return embeddings; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| async embedText(text: string, candidate: AccessCandidate): Promise<number[]> { | ||||||||||||||
| const processedText = this.processTexts([text])[0]; | ||||||||||||||
| const embeddings = await this.embed([processedText], candidate); | ||||||||||||||
| return embeddings[0]; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| protected async embed(texts: string[], candidate: AccessCandidate): Promise<number[][]> { | ||||||||||||||
| let apiKey: string | undefined; | ||||||||||||||
|
|
||||||||||||||
| // Try to get from credentials first | ||||||||||||||
| try { | ||||||||||||||
| const modelInfo: TLLMModel = { | ||||||||||||||
| provider: 'GoogleAI', | ||||||||||||||
| modelId: this.model, | ||||||||||||||
| credentials: this.settings?.credentials as unknown as TLLMCredentials, | ||||||||||||||
| }; | ||||||||||||||
| const credentials = await getLLMCredentials(candidate, modelInfo); | ||||||||||||||
| apiKey = (credentials as BasicCredentials)?.apiKey; | ||||||||||||||
| } catch (e) { | ||||||||||||||
| // If credential system fails, fall back to environment variable | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| // Fall back to environment variable if not found in credentials | ||||||||||||||
| if (!apiKey) { | ||||||||||||||
| apiKey = process.env.GOOGLE_AI_API_KEY; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (!apiKey) { | ||||||||||||||
| throw new Error('Please provide an API key for Google AI embeddings via credentials or GOOGLE_AI_API_KEY environment variable'); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| if (!this.client) { | ||||||||||||||
| this.client = new GoogleGenerativeAI(apiKey); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| try { | ||||||||||||||
| const model = this.client.getGenerativeModel({ model: this.model }); | ||||||||||||||
|
|
||||||||||||||
| const embeddings: number[][] = []; | ||||||||||||||
|
|
||||||||||||||
| for (const text of texts) { | ||||||||||||||
| const result = await model.embedContent(text); | ||||||||||||||
| if (result?.embedding?.values) { | ||||||||||||||
| embeddings.push(result.embedding.values); | ||||||||||||||
| } else { | ||||||||||||||
| throw new Error('Invalid embedding response from Google AI'); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| return embeddings; | ||||||||||||||
| } catch (e) { | ||||||||||||||
| throw new Error(`Google Embeddings API error: ${e.message || e}`); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
π οΈ Refactor suggestion
Initialize client field safely
Mark
clientas optional to satisfy strict init checks and match the lazy init pattern used below.π Committable suggestion
π€ Prompt for AI Agents