##Build a local InMemory VectorDb using Langchain

In [1]:
%pip install langchain langgraph langchain_community langchain_huggingface chromadb pypdf

Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.1.0-py3-none-any.whl (297 kB)
Installing collected packages: pypdf
Successfully installed pypdf-5.1.0
Note: you may need to restart the kernel to use updated packages.


In [2]:
from langchain_core.documents import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_loaders import PyPDFLoader

In [5]:

# 1. Load PDF documents
# Specify the path to your PDF file
pdf_loader = PyPDFLoader("A Comprehensive Langchain Guide.pdf")
docs_list = pdf_loader.load()  # Load and parse the PDF content into Document objects

# 2. Split documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,  # Size of each chunk
    chunk_overlap=100,  # Overlap between chunks
    length_function=len,  # Function to measure chunk length
    is_separator_regex=False,  # If the separator is regex-based
)
doc_splits = text_splitter.split_documents(docs_list)

# 3. Generate embeddings
embedding = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

# 4. Initialize the vector store
vectorstore = InMemoryVectorStore(embedding=embedding)

# 5. Add documents to the vector store
vectorstore.add_documents(doc_splits)

# 6. Create a retriever for querying
retriever = vectorstore.as_retriever(search_kwargs={"k":2})

# Example usage of the retriever
query = "Langchain features for building llm applications"
results = retriever.get_relevant_documents(query)
docs = vectorstore.similarity_search(query)
print(docs)

# Print the top results
for i, result in enumerate(results, 1):
    print(f"Result {i}:")
    print(f"Content: {result.page_content}")
    # print(f"Metadata: {result.metadata}")
    # print("\n")


  embedding = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
  from .autonotebook import tqdm as notebook_tqdm


[Document(id='e6d6f875-31ab-44d2-afa2-b696523b8273', metadata={'source': 'A Comprehensive Langchain Guide.pdf', 'page': 0}, page_content='LangChain is a Python library designed tosimplify thedevelopment of applications that utilizelargelanguagemodels(LLMs), suchasthosefromOpenAI, HuggingFace, andotherproviders.As artificial intelligence evolves, LLMs have proven to be powerful tools across industries,enabling applications that generate text, answer questions, summarize documents, and evenassist withdecision-makingprocesses. However, buildingsophisticatedapplicationsusingLLMscan be challenging due to the complexities of chaining'), Document(id='cea2cda6-1243-4432-b72c-3c574b97d9be', metadata={'source': 'A Comprehensive Langchain Guide.pdf', 'page': 1}, page_content='1. LLM Wrappers: These wrappers enable developers to interact with LLMs through a commoninterface, abstracting away the complexities of different APIs. For example, with the sameOpenAIwrapper, youcaneasilyswitchmodelsbychang

  results = retriever.get_relevant_documents(query)


In [6]:
retriever = vectorstore.as_retriever(search_kwargs={"k":1})
query = "what are prompttemplates in langchain"
results = retriever.get_relevant_documents(query)

for i, result in enumerate(results, 1):
    print(f"Content: {result.page_content}")

Content: Prompt templatesareideal for building applications where prompts are dynamically generated, suchaschatbots, Q&Asystems, orcontent generationtools.3. Chains: LangChain’s chaining capabilities allowdeveloperstolinkmultiplecomponentstogether,creating workflows where each step relies on the previous one’s output. For instance, asummarization workflowmight involve a pre-processing step, followed by a text generationstep,
