#### Get

In [7]:
from langchain_community.document_loaders import TextLoader

In [8]:
loader = TextLoader("./files/LangchainRetrieval.txt")

In [9]:
text = loader.load()

#### Split

In [10]:
from langchain_text_splitters import CharacterTextSplitter

In [11]:
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)

In [12]:
docs = splitter.split_documents(text)

#### Embed and store

In [13]:
from langchain_openai import OpenAIEmbeddings

In [14]:
embedding_function = OpenAIEmbeddings()

In [15]:
from langchain_community.vectorstores.chroma import Chroma

In [16]:
db = Chroma.from_documents(docs, embedding_function)

#### Query

In [18]:
query = "What is text embedding and how does langchain help in doing it?"

In [21]:
queried_docs = db.similarity_search(query)

In [22]:
print(queried_docs[0].page_content)

Text embedding models
Another key part of retrieval is creating embeddings for documents. Embeddings capture the semantic meaning of the text, allowing you to quickly and efficiently find other pieces of a text that are similar. LangChain provides integrations with over 25 different embedding providers and methods, from open-source to proprietary API, allowing you to choose the one best suited for your needs. LangChain provides a standard interface, allowing you to easily swap between models.

Vector stores
With the rise of embeddings, there has emerged a need for databases to support efficient storage and searching of these embeddings. LangChain provides integrations with over 50 different vectorstores, from open-source local ones to cloud-hosted proprietary ones, allowing you to choose the one best suited for your needs. LangChain exposes a standard interface, allowing you to easily swap between vector stores.


#### Retrievers

In [23]:
retriever = db.as_retriever()

In [24]:
from langchain.prompts import ChatPromptTemplate

In [25]:
template = """Answer the following question based only on the following context.

Question: {question}

Context:
{context}"""
prompt = ChatPromptTemplate.from_template(template)

In [27]:
from langchain_openai import ChatOpenAI

In [28]:
llm = ChatOpenAI()

In [32]:
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [33]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

In [35]:
chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [38]:
chain.invoke("What is text embedding and how does langchain help in doing it?")

'Text embedding is the process of creating embeddings for documents, which capture the semantic meaning of the text and allow for efficient retrieval of similar pieces of text. LangChain helps in text embedding by providing integrations with various embedding providers and methods, allowing users to choose the best option for their needs. It also offers a standard interface for easy swapping between different models.'