In [32]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv('OPENAI_API_KEY')
os.environ['HF_TOKEN']=os.getenv('HF_TOKEN')

## Langsmith Tracking
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")



In [2]:
## Data Ingestion--From the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [19]:
loader=WebBaseLoader("https://docs.smith.langchain.com/")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x119a4cb78e0>

In [20]:
docs=loader.load()
docs

[Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='\n\n\n\n\nGet started with LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\nSkip to main contentLearn the essentials of LangSmith in the new Introduction to LangSmith course!  Enroll for free. API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppQuick StartObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceQuick StartOn this pageGet started with LangSmith\nLangSmith is a platform for building production-grade LLM applications.\nIt allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence.\nWith LangSmith you

In [21]:
### Load Data--> Docs-->Divide our Docuemnts into chunks dcouments-->text-->vectors-->Vector Embeddings--->Vector Store DB
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=50)
documents=text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'https://docs.smith.langchain.com/', 'title': 'Get started with LangSmith | 🦜️🛠️ LangSmith', 'description': 'LangSmith is a platform for building production-grade LLM applications.', 'language': 'en'}, page_content='Get started with LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\nSkip to main contentLearn the essentials of LangSmith in the new Introduction to LangSmith course!  Enroll for free. API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppQuick StartObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceQuick StartOn this pageGet started with LangSmith\nLangSmith is a platform for building production-grade LLM applications.\nIt allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence.\nWith LangSmith you can:'),
 

In [22]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

In [23]:
from langchain_community.vectorstores import FAISS
vectorstoredb=FAISS.from_documents(documents,embeddings)

In [26]:
## Query From a vector db
query="LangSmith is a platform for building production-grade LLM applications."
result=vectorstoredb.similarity_search(query)
result[0].page_content

'Get started with LangSmith | 🦜️🛠️ LangSmith\n\n\n\n\n\n\nSkip to main contentLearn the essentials of LangSmith in the new Introduction to LangSmith course!  Enroll for free. API ReferenceRESTPythonJS/TSSearchRegionUSEUGo to AppQuick StartObservabilityEvaluationPrompt EngineeringDeployment (LangGraph Platform)AdministrationSelf-hostingPricingReferenceCloud architecture and scalabilityAuthz and AuthnAuthentication methodsdata_formatsEvaluationDataset transformationsRegions FAQsdk_referenceQuick StartOn this pageGet started with LangSmith\nLangSmith is a platform for building production-grade LLM applications.\nIt allows you to closely monitor and evaluate your application, so you can ship quickly and with confidence.\nWith LangSmith you can:'

- In LangChain, the create_stuff_documents_chain function is used to create a document chain that can process multiple documents (or "stuff") in a sequence, typically for the purpose of performing tasks like document processing, retrieval-augmented generation (RAG), or other tasks that involve working with large amounts of text data.

- Purpose:
  The create_stuff_documents_chain function is part of LangChain's abstraction for chaining multiple documents together in a processing pipeline. It essentially creates a chain of operations that can work with documents to:

- Process multiple documents in a specified order.
- Perform tasks like retrieval, generation, or augmentation based on those documents.
- Enable document summarization, QA on multiple documents, or similar tasks that involve extracting insights from a collection of  documents.

- Reference https://api.python.langchain.com/en/latest/chains/langchain.chains.combine_documents.stuff.create_stuff_documents_chain.html

In [45]:
#Retrival and Document chain
#we have to use these in most of the rag applications

#
from langchain_openai import ChatOpenAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.documents import Document


llm=ChatOpenAI(model="gpt-4o")

prompt = ChatPromptTemplate.from_messages(
    [("system", "What are everyone's favorite colors:\n Output Format is dictionary key is name and value is color he/she like \n{context}")]
)
chain = create_stuff_documents_chain(llm, prompt)

docs = [
    Document(page_content="Jesse loves red but not yellow"),
    Document(page_content = "Jamal loves green but not as much as he loves orange"),
    Document(page_content = "Shanmukh loves green"),
    Document(page_content = "Shanmukh hates blue"),
    Document(page_content = "Shanmukh hates red"),

]

chain.invoke({"context": docs})



'```python\nfavorite_colors = {\n    "Jesse": "red",\n    "Jamal": "orange",\n    "Shanmukh": "green"\n}\n```'