In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.getenv("OPENAI_API_KEY")

### **Webbase loader**

In [3]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(temperature=0.7)

In [6]:
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [10]:
loader = WebBaseLoader("https://www.theguardian.com/us")
documents = loader.load()

In [11]:
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [12]:
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embeddings,
    persist_directory="chroma_store"
)

In [14]:
from langchain.chains import RetrievalQA
llm = ChatOpenAI()
retriever = vectorstore.as_retriever()

In [16]:
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"
)

In [17]:
response = qa_chain.run("What is the lastest news from the Guardian?")
print(response)

  response = qa_chain.run("What is the lastest news from the Guardian?")


The latest news from the Guardian includes headlines about Donald Trump's Memorial Day speech, a car plowing into a crowd at Liverpool FC victory parade, Russia's drone strikes targeting Ukraine, and various other global events.


### **Wikipedia Retrieval**

In [18]:
from langchain.retrievers import WikipediaRetriever
retriever = WikipediaRetriever()

In [21]:
results = retriever.invoke("Python programming language")
print(results)

[Document(metadata={'title': 'Python (programming language)', 'summary': 'Python is a high-level, general-purpose programming language. Its design philosophy emphasizes code readability with the use of significant indentation.\nPython is dynamically type-checked and garbage-collected. It supports multiple programming paradigms, including structured (particularly procedural), object-oriented and functional programming. It is often described as a "batteries included" language due to its comprehensive standard library.\nGuido van Rossum began working on Python in the late 1980s as a successor to the ABC programming language, and he first released it in 1991 as Python 0.9.0. Python 2.0 was released in 2000. Python 3.0, released in 2008, was a major revision not completely backward-compatible with earlier versions. Python 2.7.18, released in 2020, was the last release of Python 2.\nPython consistently ranks as one of the most popular programming languages, and it has gained widespread use i

### **Multi Query Retriever**

In [22]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.retrievers import MultiQueryRetriever

In [23]:
llm = ChatOpenAI(temperature=0.7)   

In [24]:
docs = [
    Document(page_content="LangChain is a framework to build apps with LLMs."),
    Document(page_content="LangChain enables chains, agents, and memory for language model workflows."),
    Document(page_content="You can orchestrate multiple LLM calls using LangChain's chain system."),
    Document(page_content="LangChain integrates with vector stores like FAISS and Chroma."),
    Document(page_content="LangChain helps developers manage tool use, memory, and prompt templates.")
]


In [25]:
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(docs, embeddings)

In [26]:
vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x274b272ef90>

In [29]:
multi_query_retriever=MultiQueryRetriever.from_llm(
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
    llm=llm
)

In [31]:
query = "Explain Langchain role"
results = multi_query_retriever.invoke(query)

In [33]:
for i, doc in enumerate(results):
    print(f"[Doc {i+1}] {doc.page_content}\n")

[Doc 1] LangChain is a framework to build apps with LLMs.

[Doc 2] LangChain enables chains, agents, and memory for language model workflows.

[Doc 3] You can orchestrate multiple LLM calls using LangChain's chain system.



### **Contextual Retrieval**

In [35]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

from langchain.vectorstores import FAISS
from langchain.schema import Document

from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [36]:
llm = ChatOpenAI(temperature=0)
embeddings = OpenAIEmbeddings()

In [37]:
documents = [
    Document(page_content="LangChain allows you to build applications using LLMs like GPT-4."),
    Document(page_content="It supports memory, chains, tools, agents, and retrievers."),
    Document(page_content="FAISS is a library used for vector similarity search."),
    Document(page_content="LangChain is highly modular and supports plug-and-play components."),
    Document(page_content="Chroma is another vector store like FAISS, used for storing embeddings.")
]

In [39]:
vectorstore = FAISS.from_documents(documents, embeddings)

In [41]:
base_retriever=vectorstore.as_retriever(search_kwargs={"k": 4})

In [42]:
base_retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002750AE809D0>, search_kwargs={'k': 4})

In [43]:
compressor = LLMChainExtractor.from_llm(llm=llm)

In [44]:
compression_retreiver = ContextualCompressionRetriever(base_retriever=base_retriever,
                                                       base_compressor=compressor)

In [46]:
results = compression_retreiver.invoke("What is LangChain?")

In [47]:
results

[Document(metadata={}, page_content='LangChain allows you to build applications using LLMs like GPT-4.'),
 Document(metadata={}, page_content='LangChain is highly modular and supports plug-and-play components.')]

In [48]:
for i, doc in enumerate(results):
    print(f"[Doc {i+1}] {doc.page_content}\n")

[Doc 1] LangChain allows you to build applications using LLMs like GPT-4.

[Doc 2] LangChain is highly modular and supports plug-and-play components.



In [49]:
print("The End")

The End
