In [1]:
from langchain_community.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer


model_name = "BAAI/llm-embedder"
model_kwargs = {"device": "cpu"}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)

vectorstore = Chroma(persist_directory="llm-embedder", embedding_function=embeddings)
model = SentenceTransformer('BAAI/bge-reranker-base')


No sentence-transformers model found with name BAAI/bge-reranker-base. Creating a new one with MEAN pooling.
Some weights of XLMRobertaModel were not initialized from the model checkpoint at BAAI/bge-reranker-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [2]:
vectorstore.similarity_search("what is buggy?")

[Document(page_content='Sweepstakes Slang\n\nBuggy: Vehicle being raced and also a nickname for the competition.\n\nChute: A section of the freeroll portion of the buggy course (near the southwestern end of Frew Street at its intersection with Schenley Drive) where buggies make the sharp righthand turn from Schenley Drive onto Frew Street.\n\nChute Flagger: Team member who provides a signal for buggy drivers to know when to start the right-hand turn from Schenley Drive onto Frew Street.', metadata={'source': 'Data/Buggy News/01.txt'}),
 Document(page_content="Driver: Person who travels with a buggy and controls the vehicles via steering and braking systems.\n\nPushbar: Structure attached to a buggy that a person pushes to propel that buggy forward.\n\nPusher: Person who propels a buggy via a pushbar along one of the five hills of the buggy course.\n\nShell: Entire outer structure or covering of a buggy that determines that buggy's aerodynamic characteristics.", metadata={'source': 'Dat

In [15]:
from langchain_core.retrievers import BaseRetriever, RetrieverLike, RetrieverOutputLike
from langchain_core.language_models import BaseLLM
from langchain_core.embeddings import Embeddings
from langchain_core.callbacks import CallbackManagerForRetrieverRun
from langchain_core.documents import Document
from typing import List



class CustomRetriever(BaseRetriever):
  

    # vectorstore = Chroma(persist_directory="llm-embedder", embedding_function=embeddings)
    # model = SentenceTransformer('BAAI/bge-reranker-base')

    vectorstore : RetrieverLike

    model : SentenceTransformer

    def _get_relevant_documents(self, query: str, *, run_manager: CallbackManagerForRetrieverRun) -> List[Document]:

        docs = self.vectorstore.get_relevant_documents(query, k=10)

        queries = [query]
        sentences = []
        for i in docs:
            sentences.append(i.page_content)

        embeddings_1 = self.model.encode(sentences, normalize_embeddings=True)
        embeddings_2 = self.model.encode(queries, normalize_embeddings=True)
        similarity = embeddings_1 @ embeddings_2.T

        sim = [i[0] for i in similarity]

        return [x for _, x in sorted(zip(sim, docs), reverse=True)][0:4]



    # return [Document(page_content=query)]


In [16]:
custom = CustomRetriever(vectorstore=vectorstore.as_retriever(), model=model)

In [17]:
custom.get_relevant_documents("What is buggy?")

[Document(page_content='Sweepstakes Slang\n\nBuggy: Vehicle being raced and also a nickname for the competition.\n\nChute: A section of the freeroll portion of the buggy course (near the southwestern end of Frew Street at its intersection with Schenley Drive) where buggies make the sharp righthand turn from Schenley Drive onto Frew Street.\n\nChute Flagger: Team member who provides a signal for buggy drivers to know when to start the right-hand turn from Schenley Drive onto Frew Street.', metadata={'source': 'Data/Buggy News/01.txt'}),
 Document(page_content='The Machine\n\nThe basics of a buggy are straightforward, but teams are often secretive in how they build the machines, in particular the way they brake, steer and what types of wheels are used.\n\nEach has a body, pushbar for runners to move the machine up the hills, wheels, a safety harness and driving and braking mechanisms. Some also include fairings, a type of housing around the wheels that help reduce drag, make the vehicle 