Skip to content
This repository has been archived by the owner on Jul 20, 2024. It is now read-only.

Commit

Permalink
feat: add hybrid_search tool
Browse files Browse the repository at this point in the history
  • Loading branch information
ayoubmrx committed Mar 7, 2024
1 parent bd9b02f commit 0ca00a6
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 0 deletions.
40 changes: 40 additions & 0 deletions hyko_toolkit/models/similarity_search/hybrid_search/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.documents.base import Document
from metadata import Inputs, Outputs, Params, StartupParams, func


@func.on_startup
async def load(startup_params: StartupParams):
global embeddings
embeddings = HuggingFaceEmbeddings(model_name="intfloat/multilingual-e5-small")


@func.on_execute
async def main(inputs: Inputs, params: Params) -> Outputs:
"""
Perform Hybrid Search on a list of documents based on a given query.
Args:
bm25_k : Number of top results to consider in Best Matching Algorithm (BM25).
faiss_k : Number of top results to consider in Similarity Search Algorithm.
Returns:
Outputs: Processed text containing relevant documents based on the hybrid search.
"""
docs = inputs.docs
lang_docs = [Document(page_content=i) for i in docs]
# BM25
bm25_retriever = BM25Retriever.from_documents(lang_docs)
bm25_retriever.k = params.bm25_k
# Similarity Search
faiss_vectorstore = FAISS.from_documents(documents=lang_docs, embedding=embeddings)
faiss_retriever = faiss_vectorstore.as_retriever(
search_kwargs={"k": params.faiss_k}
)
# Hybrid Search
ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5]
)
relevant_documents = ensemble_retriever.get_relevant_documents(inputs.query)
relevant_documents = [i.page_content for i in relevant_documents]
return Outputs(result=relevant_documents)
42 changes: 42 additions & 0 deletions hyko_toolkit/models/similarity_search/hybrid_search/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from pydantic import Field

from hyko_sdk.function import SDKFunction
from hyko_sdk.metadata import CoreModel

func = SDKFunction(
description="Perform Hybrid Search on a list of documents based on a given query."
)


@func.set_startup_params
class StartupParams(CoreModel):
pass


@func.set_input
class Inputs(CoreModel):
docs: list[str] = Field(
...,
description="Input Documents.",
)
query: str = Field(
...,
description="Query or the Question to compare against the input text.",
)


@func.set_param
class Params(CoreModel):
bm25_k: int = Field(
...,
description="Number of top results to consider in Best Matching Algorithm (BM25).",
)
faiss_k: int = Field(
...,
description="Number of top results to consider in Similarity Search Algorithm.",
)


@func.set_output
class Outputs(CoreModel):
result: list[str] = Field(..., description="Top K results.")

0 comments on commit 0ca00a6

Please sign in to comment.