In [None]:
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings 
query="molecule"
top_k=10
embedding_name="BAAI/bge-small-en-v1.5"
embedding_model = HuggingFaceEmbeddings(model_name=embedding_name,
                                    model_kwargs={"device": "mps"},
                                    encode_kwargs={"normalize_embeddings": True},)


vector_store = FAISS.load_local(
    "tmp/vector_stores/ex1", 
    embedding_model, 
    allow_dangerous_deserialization=True  
    )

vector_data = {"num_vectors" : vector_store.index.ntotal,
"vector_dim" : vector_store.index.d,
"distance_strategy" : vector_store.distance_strategy}


results = vector_store.similarity_search_with_score(query, top_k)

context = [r[0].page_content for r in results][::-1]
sources = [r[0].metadata["source"] for r in results][::-1]
scores = [r[1] for r in results][::-1]

dict_ = {
    "context": context,
    "sources": sources,
    "scores": scores,
    "store_name": "s"
}
dict_

{'context': ['. Author manuscript; available in PMC 2017 June 05.\nAuthor Manuscript Author Manuscript Author Manuscript Author ManuscriptMHV-JHM is able to infect neural cells where CEACAM1 expression level is very low, at \nleast in part because its spike can mediate receptor-independent entry (131, 132). Taken \ntogether, the membrane fusion mechanism of MHV spike depends on both proteolysis and \nreceptor binding, and it may or may not depend on the low pH of endosomes; in addition, \nreceptor-independent membrane fusion by MHV-JHM spike contributes to the neutral \ntropism of MHV-JHM.\nResearch on the cell entry mechanism of SARS-CoV has led to novel findings. First, SARS-\nCoV spike is not cleaved by proprotein convertases during virus packaging and hence \nremains intact on mature virions (133, 134). Instead, SARS-CoV enters host cells through \nendocytosis, and its spike is processed by lysosomal proteases (e.g., cathepsin L and \ncathepsin B) (135–137). This is supported by th

In [27]:
help(vector_store.index)

Help on IndexFlatL2 in module faiss.swigfaiss object:

class IndexFlatL2(IndexFlat)
 |  IndexFlatL2(*args)
 |  
 |  Method resolution order:
 |      IndexFlatL2
 |      IndexFlat
 |      IndexFlatCodes
 |      Index
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __getstate__ = index_getstate(self)
 |  
 |  __init__(self, *args)
 |      :type d: int
 |      :param d: dimensionality of the input vectors
 |  
 |  __repr__ = _swig_repr(self)
 |  
 |  __setattr__ = replacement_setattr(self, name, value)
 |  
 |  __setstate__ = index_setstate(self, st)
 |  
 |  clear_l2norms(self)
 |  
 |  get_FlatCodesDistanceComputer(self)
 |       a FlatCodesDistanceComputer offers a distance_to_code method
 |      
 |      The default implementation explicitly decodes the vector with sa_decode.
 |  
 |  sync_l2norms(self)
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __swig_destroy__ = delete_IndexFlatL2(...)
 | 

In [None]:
{"num_vectors" : vector_store.index.ntotal,
"vector_dim" : vector_store.index.d,
"distance_strategy" : vector_store.distance_strategy}

{'num_vectors': 22, 'vector_dim': 384, 'distance_strategy': 0.0}

In [7]:
help(results[0])

Help on Document in module langchain_core.documents.base object:

class Document(BaseMedia)
 |  Document(page_content: 'str', *, id: typing.Annotated[str | None, _PydanticGeneralMetadata(coerce_numbers_to_str=True)] = None, metadata: dict = <factory>, type: Literal['Document'] = 'Document') -> None
 |  
 |  Class for storing a piece of text and associated metadata.
 |  
 |  !!! note
 |      `Document` is for **retrieval workflows**, not chat I/O. For sending text
 |      to an LLM in a conversation, use message types from `langchain.messages`.
 |  
 |  Example:
 |      ```python
 |      from langchain_core.documents import Document
 |  
 |      document = Document(
 |          page_content="Hello, world!", metadata={"source": "https://example.com"}
 |      )
 |      ```
 |  
 |  Method resolution order:
 |      Document
 |      BaseMedia
 |      langchain_core.load.serializable.Serializable
 |      pydantic.main.BaseModel
 |      abc.ABC
 |      builtins.object
 |  
 |  Methods defined

In [11]:
help(vector_store)

Help on FAISS in module langchain_community.vectorstores.faiss object:

class FAISS(langchain_core.vectorstores.base.VectorStore)
 |  FAISS(embedding_function: 'Union[Callable[[str], List[float]], Embeddings]', index: 'Any', docstore: 'Docstore', index_to_docstore_id: 'Dict[int, str]', relevance_score_fn: 'Optional[Callable[[float], float]]' = None, normalize_L2: 'bool' = False, distance_strategy: 'DistanceStrategy' = <DistanceStrategy.EUCLIDEAN_DISTANCE: 'EUCLIDEAN_DISTANCE'>)
 |  
 |  FAISS vector store integration.
 |  
 |  See [The FAISS Library](https://arxiv.org/pdf/2401.08281) paper.
 |  
 |  Setup:
 |      Install ``langchain_community`` and ``faiss-cpu`` python packages.
 |  
 |      .. code-block:: bash
 |  
 |          pip install -qU langchain_community faiss-cpu
 |  
 |  Key init args — indexing params:
 |      embedding_function: Embeddings
 |          Embedding function to use.
 |  
 |  Key init args — client params:
 |      index: Any
 |          FAISS index to use.
 | 

In [14]:
res = vector_store.similarity_search_with_score("molecule",2)
res

[(Document(id='6987e0fd-61d2-4a49-ad72-2b29cd5978b1', metadata={'source': '27578435', 'start_index': 62298}, page_content='. [PubMed: 19901337] \n84. Reguera J, Santiago C, Mudgal G, Ordono D, Enjuanes L, Casasnovas JM. Structural bases of \ncoronavirus attachment to host aminopeptidase N and its inhibition by neutralizing antibodies. \nPLOS Pathog. 2012; 8:e1002859. [PubMed: 22876187] \n85. Chen L, Lin YL, Peng G, Li F. Structural basis for multifunctional roles of mammalian aminopep-\ntidase N. PNAS. 2012; 109:17966–71. [PubMed: 23071329] \n86. Wong AH, Zhou D, Rini JM. The X-ray crystal structure of human aminopeptidase N reveals a \nnovel dimer and the basis for peptide processing. J Biol Chem. 2012; 287:36804–13. [PubMed: \n22932899] \n87. Tusell SM, Schittone SA, Holmes KV . Mutational analysis of aminopeptidase N, a receptor for \nseveral group 1 coronaviruses, identifies key determinants of viral host range. J Virol. 2007; \n81:1261–73. [PubMed: 17093189] \n88. Peng GQ, Sun DW,

In [20]:
res[0][0]

Document(id='6987e0fd-61d2-4a49-ad72-2b29cd5978b1', metadata={'source': '27578435', 'start_index': 62298}, page_content='. [PubMed: 19901337] \n84. Reguera J, Santiago C, Mudgal G, Ordono D, Enjuanes L, Casasnovas JM. Structural bases of \ncoronavirus attachment to host aminopeptidase N and its inhibition by neutralizing antibodies. \nPLOS Pathog. 2012; 8:e1002859. [PubMed: 22876187] \n85. Chen L, Lin YL, Peng G, Li F. Structural basis for multifunctional roles of mammalian aminopep-\ntidase N. PNAS. 2012; 109:17966–71. [PubMed: 23071329] \n86. Wong AH, Zhou D, Rini JM. The X-ray crystal structure of human aminopeptidase N reveals a \nnovel dimer and the basis for peptide processing. J Biol Chem. 2012; 287:36804–13. [PubMed: \n22932899] \n87. Tusell SM, Schittone SA, Holmes KV . Mutational analysis of aminopeptidase N, a receptor for \nseveral group 1 coronaviruses, identifies key determinants of viral host range. J Virol. 2007; \n81:1261–73. [PubMed: 17093189] \n88. Peng GQ, Sun DW, R

In [3]:
context = "\n\n".join([f"source:{r.metadata['source']}\ncontent:{r.page_content}" for r in results])
context

"source:27578435\ncontent:. [PubMed: 19901337] \n84. Reguera J, Santiago C, Mudgal G, Ordono D, Enjuanes L, Casasnovas JM. Structural bases of \ncoronavirus attachment to host aminopeptidase N and its inhibition by neutralizing antibodies. \nPLOS Pathog. 2012; 8:e1002859. [PubMed: 22876187] \n85. Chen L, Lin YL, Peng G, Li F. Structural basis for multifunctional roles of mammalian aminopep-\ntidase N. PNAS. 2012; 109:17966–71. [PubMed: 23071329] \n86. Wong AH, Zhou D, Rini JM. The X-ray crystal structure of human aminopeptidase N reveals a \nnovel dimer and the basis for peptide processing. J Biol Chem. 2012; 287:36804–13. [PubMed: \n22932899] \n87. Tusell SM, Schittone SA, Holmes KV . Mutational analysis of aminopeptidase N, a receptor for \nseveral group 1 coronaviruses, identifies key determinants of viral host range. J Virol. 2007; \n81:1261–73. [PubMed: 17093189] \n88. Peng GQ, Sun DW, Rajashankar KR, Qian ZH, Holmes KV , Li F. Crystal structure of mouse \ncoronavirus receptor-bin