In [None]:
!pip install langchain langchain_openai langchain-community wikipedia tiktoken openai chromadb faiss-cpu

In [None]:
# Install OpenAI if not installed
!pip install openai

# Load key securely without displaying it
import os
from getpass import getpass

os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")


In [None]:
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")

In [None]:
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.documents import Document

In [None]:
documents = [
    Document(
        page_content="Climate change is a long-term alteration of temperature and typical weather patterns in a place.",
        metadata={"topic": "Climate Change"}
    ),
    Document(
        page_content="AI enables computers and machines to mimic human intelligence and perform tasks like problem-solving.",
        metadata={"topic": "Artificial Intelligence"}
    ),
    Document(
        page_content="The Great Wall of China was built to protect Chinese states against invasions and raids from nomadic groups.",
        metadata={"topic": "The Great Wall of China"}
    ),
    Document(
        page_content="Electric vehicles use electric motors powered by batteries, reducing carbon emissions significantly.",
        metadata={"topic": "Electric Vehicles"}
    ),
    Document(
        page_content="The human brain is a complex organ responsible for thought, memory, emotion, and sensory processing.",
        metadata={"topic": "The Human Brain"}
    ),
    Document(
        page_content="Paris is the capital of France. Photosynthesis is the process used by plants to convert light energy into chemical energy. Biology is an important subject.",
        metadata={"topic": "Photosynthesis"}
    ),
    Document(
        page_content="Space exploration involves investigating outer space using astronomy, space technology, and spacecraft.",
        metadata={"topic": "Space Exploration"}
    ),
    Document(
        page_content="COVID-19 affected global health systems and economies, highlighting the need for preparedness and vaccines.",
        metadata={"topic": "COVID-19 Pandemic"}
    ),
    Document(
        page_content="Ancient Egypt was a civilization known for pyramids, hieroglyphics, and a deep belief in the afterlife.",
        metadata={"topic": "Ancient Egypt"}
    ),
    Document(
        page_content="Blockchain is a decentralized ledger system that records transactions in a secure and transparent way.",
        metadata={"topic": "Blockchain Technology"}
    ),
]

In [None]:
from langchain.retrievers.multi_query import MultiQueryRetriever

In [None]:
from langchain_community.vectorstores import FAISS

In [None]:
from langchain_openai import OpenAIEmbeddings, ChatOpenAI

In [None]:
from langchain.retrievers.document_compressors import LLMChainExtractor

In [None]:
from langchain.retrievers import ContextualCompressionRetriever

**Contextual Compression Retriever**

In [None]:
vs = FAISS.from_documents(
    documents = documents,
    embedding=OpenAIEmbeddings()
)

In [None]:
base_retriever = vs.as_retriever(search_type="similarity", search_kwargs={"k": 1})

In [None]:
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

In [None]:
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=base_retriever
)

In [None]:
result_compression = compression_retriever.invoke("What is Photosynthesis?")

In [None]:
for i, doc in enumerate(result_compression):
    print(f"{i+1}. {doc.page_content}")

**Multi Query Retriever**

In [None]:
vector_store_faiss = FAISS.from_documents(
    documents = documents,
    embedding=OpenAIEmbeddings()
)

In [None]:
retriever_similarity = vector_store_faiss.as_retriever(search_type="similarity", search_kwargs={"k": 2})

In [None]:
retriever_multi = MultiQueryRetriever.from_llm(
    retriever=retriever_similarity,
    llm=ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
)

In [None]:
query = "What protects environment?"

In [None]:
result_similarity = retriever_similarity.invoke(query)

In [None]:
result_multi = retriever_multi.invoke(query)

In [None]:
for i, doc in enumerate(result_similarity):
    print(f"{i+1}. {doc.page_content}")

In [None]:
for i, doc in enumerate(result_multi):
    print(f"{i+1}. {doc.page_content}")

**MMR**

In [None]:
retriever_mmr = vector_store_faiss.as_retriever(search_type="mmr", search_kwargs={"k": 2, "lambda_mult":0.9})

In [None]:
query = 'Which docs talks about Biology?'

In [None]:
result_faiss = retriever_mmr.invoke(query)

In [None]:
for i, doc in enumerate(result_faiss):
    print(f"{i+1}. {doc.page_content}")

**Vector Store Retriever**

In [None]:
vector_store = Chroma.from_documents(
    documents = documents,
    embedding=OpenAIEmbeddings(),
    collection_name="my_collection"
)

In [None]:
retriever_vs = vector_store.as_retriever(search_kwargs={"k": 2})

In [None]:
query='Why EVs are important?'

In [None]:
result = retriever_vs.invoke(query)

In [None]:
for i, doc in enumerate(result):
    print(f"{i+1}. {doc.page_content}")

**Wikipedia Retriever**

In [None]:
from langchain_community.retrievers import WikipediaRetriever
retriever_wk = WikipediaRetriever(top_n_results=2, lang='en')

In [None]:
query = 'Badr Wars non-existence in non-islamic literature'

In [None]:
docs = retriever_wk.invoke(query)

In [None]:
for i, doc in enumerate(docs):
    print(f"{i+1}. {doc.page_content}")