-
Notifications
You must be signed in to change notification settings - Fork 0
Add DummySearchProvider Fallback and Settings Improvements #27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
56c90a2
6a0e924
d7cc4d3
16a8693
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
authormaton/ | ||
experimentalCode/.env | ||
.env | ||
|
||
# Ignore Python cache | ||
|
||
__pycache__/ | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -47,6 +47,27 @@ def upsert_vectors(self, vectors: List[List[float]], ids: List[str]): | |||||||||||||||||||||
if not self.index: | ||||||||||||||||||||||
raise RuntimeError("Index is not initialized. Call create_index first.") | ||||||||||||||||||||||
self.index.upsert(vectors=[(id, vec) for id, vec in zip(ids, vectors)]) | ||||||||||||||||||||||
|
||||||||||||||||||||||
def upsert(self, namespace, ids, vectors, metadata=None): | ||||||||||||||||||||||
""" | ||||||||||||||||||||||
Upsert vectors into the index, ensuring index is created and metadata is validated. | ||||||||||||||||||||||
""" | ||||||||||||||||||||||
if self.index is None: | ||||||||||||||||||||||
self.create_index() | ||||||||||||||||||||||
if not (len(ids) == len(vectors)): | ||||||||||||||||||||||
Comment on lines
+55
to
+57
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Guard against missing embedding dimension before auto-creating the index. Without a dimension, - if self.index is None:
- self.create_index()
+ if self.index is None:
+ if self.dimension is None:
+ raise RuntimeError(
+ "embedding dimension is not configured; set settings.embedding_dimension or pass dimension."
+ )
+ self.create_index() 📝 Committable suggestion
Suggested change
🤖 Prompt for AI Agents
|
||||||||||||||||||||||
raise ValueError("ids and vectors must have the same length") | ||||||||||||||||||||||
if metadata is not None and len(metadata) != len(ids): | ||||||||||||||||||||||
raise ValueError("metadata length must match ids/vectors length") | ||||||||||||||||||||||
items = [] | ||||||||||||||||||||||
for i, (id_, vector) in enumerate(zip(ids, vectors)): | ||||||||||||||||||||||
item = { | ||||||||||||||||||||||
"id": id_, | ||||||||||||||||||||||
"values": vector | ||||||||||||||||||||||
} | ||||||||||||||||||||||
if metadata is not None: | ||||||||||||||||||||||
item["metadata"] = metadata[i] | ||||||||||||||||||||||
items.append(item) | ||||||||||||||||||||||
self.index.upsert(vectors=items, namespace=namespace) | ||||||||||||||||||||||
Comment on lines
+57
to
+70
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add dimension checks, vector normalization, and metadata type validation. Parity with - if not (len(ids) == len(vectors)):
+ if len(ids) != len(vectors):
raise ValueError("ids and vectors must have the same length")
- if metadata is not None and len(metadata) != len(ids):
- raise ValueError("metadata length must match ids/vectors length")
- items = []
- for i, (id_, vector) in enumerate(zip(ids, vectors)):
- item = {
- "id": id_,
- "values": vector
- }
- if metadata is not None:
- item["metadata"] = metadata[i]
- items.append(item)
- self.index.upsert(vectors=items, namespace=namespace)
+ if metadata is not None and len(metadata) != len(ids):
+ raise ValueError("metadata length must match ids/vectors length")
+ items: list[dict] = []
+ for i, (id_, vector) in enumerate(zip(ids, vectors)):
+ # Dimension validation + normalization (supports numpy arrays)
+ if self.dimension is not None and len(vector) != self.dimension:
+ raise ValueError(
+ f"vector[{i}] dimensionality {len(vector)} != expected {self.dimension}"
+ )
+ values = vector.tolist() if hasattr(vector, "tolist") else list(vector)
+ if not all(isinstance(x, (int, float)) for x in values):
+ raise TypeError(f"vector[{i}] must be a sequence of numbers")
+ item = {"id": id_, "values": values}
+ if metadata is not None:
+ md = metadata[i]
+ if not isinstance(md, dict):
+ raise TypeError("each metadata entry must be a dict")
+ item["metadata"] = md
+ items.append(item)
+ self.index.upsert(vectors=items, namespace=namespace) 🧰 Tools🪛 Ruff (0.13.1)58-58: Avoid specifying long messages outside the exception class (TRY003) 60-60: Avoid specifying long messages outside the exception class (TRY003) 🤖 Prompt for AI Agents
|
||||||||||||||||||||||
|
||||||||||||||||||||||
def query(self, vector: List[float], top_k: int = 5): | ||||||||||||||||||||||
if not self.index: | ||||||||||||||||||||||
|
Uh oh!
There was an error while loading. Please reload this page.