**Setup and Installation**

In [None]:
# Setup and Installation
!pip install langchain langchain_openai faiss-cpu chromadb qdrant_client sentence_transformers langchain-community

# Required imports
import os
import numpy as np
from langchain.vectorstores import FAISS, Chroma, Qdrant
from langchain_openai import OpenAIEmbeddings
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA

**Set up OpenAI API key**

In [15]:
# Set up OpenAI API key - You'll need to provide your own API key
os.environ["OPENAI_API_KEY"] = "YOUR_OPENAI_API_KEY"  # Replace with your API key

**Create sample documents**

In [None]:
# Create sample documents for our vector stores
documents = [
    Document(
        page_content="Solar panels convert sunlight directly into electricity through photovoltaic cells.",
        metadata={"source": "renewable_textbook", "year": 2022, "topic": "solar"}
    ),
    Document(
        page_content="Wind turbines generate electricity by harnessing the power of moving air.",
        metadata={"source": "renewable_textbook", "year": 2022, "topic": "wind"}
    ),
    Document(
        page_content="Hydroelectric power uses the energy of flowing water to generate electricity.",
        metadata={"source": "renewable_textbook", "year": 2021, "topic": "hydro"}
    ),
    Document(
        page_content="Geothermal energy utilizes heat from the Earth's core for heating and electricity.",
        metadata={"source": "renewable_textbook", "year": 2021, "topic": "geothermal"}
    ),
    Document(
        page_content="Biomass energy is derived from organic materials like plants and waste.",
        metadata={"source": "renewable_textbook", "year": 2020, "topic": "biomass"}
    ),
    Document(
        page_content="Recent advances in perovskite solar cells have reached efficiency levels of 25.2%.",
        metadata={"source": "research_paper", "year": 2023, "topic": "solar"}
    ),
    Document(
        page_content="Floating offshore wind farms can access stronger, more consistent wind patterns.",
        metadata={"source": "research_paper", "year": 2023, "topic": "wind"}
    ),
    Document(
        page_content="Green hydrogen production uses renewable electricity to split water molecules.",
        metadata={"source": "research_paper", "year": 2022, "topic": "hydrogen"}
    ),
    Document(
        page_content="Energy storage solutions are crucial for managing intermittent renewable sources.",
        metadata={"source": "policy_brief", "year": 2023, "topic": "storage"}
    ),
    Document(
        page_content="Government incentives have accelerated the adoption of renewable energy technologies.",
        metadata={"source": "policy_brief", "year": 2023, "topic": "policy"}
    ),
]

print(f"Created {len(documents)} sample documents for our experiments")

**FAISS Implementation**

In [None]:
# Part 1: Different Vector Store Implementations
# ----------------------------------------------

# Let's first choose an embedding model
# Option 1: OpenAI Embeddings (requires API key)
embeddings = OpenAIEmbeddings()

# Option 2: Local Hugging Face embeddings (free alternative)
# Uncomment to use instead of OpenAI
# embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

print("Embedding model initialized")

# 1. FAISS Implementation
# -----------------------

# Create a FAISS vector store
faiss_vectorstore = FAISS.from_documents(documents, embeddings)

print("FAISS vector store created")

# Save to disk
faiss_vectorstore.save_local("faiss_index")

# Load from disk
# Note: We're using allow_dangerous_deserialization=True since we trust the source (our own code)
# ONLY use this flag with files you've created yourself or from trusted sources
loaded_vectorstore = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)

print("FAISS vector store saved and loaded successfully")

# Basic similarity search with FAISS
faiss_results = faiss_vectorstore.similarity_search(
    "renewable energy technologies",
    k=3
)

print("\nFAISS Similarity Search Results:")
for i, doc in enumerate(faiss_results):
    print(f"{i+1}. {doc.page_content}")
    print(f"   Source: {doc.metadata['source']}, Year: {doc.metadata['year']}\n")

**Chroma Implementation**

In [None]:
# 2. Chroma Implementation
# ------------------------

# Create a Chroma vector store
chroma_vectorstore = Chroma.from_documents(
    documents,
    embeddings,
    persist_directory="./chroma_db",
    collection_metadata={"hnsw:space": "cosine"}  # Explicitly setting distance metric
)

print("Chroma vector store created and persisted")

# Search with metadata filtering
chroma_results = chroma_vectorstore.similarity_search(
    "solar technology advancements",
    k=2,
    filter={"year": 2023}
)

print("\nChroma Filtered Search Results (2023 documents only):")
for i, doc in enumerate(chroma_results):
    print(f"{i+1}. {doc.page_content}")
    print(f"   Source: {doc.metadata['source']}, Year: {doc.metadata['year']}\n")

**Qdrant Implementation (In-Memory)**

In [None]:
# 3. Qdrant Implementation (In-Memory)
# ------------------------------------

# Create an in-memory Qdrant instance
qdrant_vectorstore = Qdrant.from_documents(
    documents,
    embeddings,
    location=":memory:",  # Use in-memory storage
    collection_name="renewable_energy"
)

print("In-memory Qdrant vector store created")

**Common Vector Store Operations**

In [None]:
# Part 2: Common Vector Store Operations
# --------------------------------------

# 1. Adding new documents to existing store
new_documents = [
    Document(
        page_content="Tidal energy harnesses the power of ocean tides to generate electricity.",
        metadata={"source": "renewable_textbook", "year": 2023, "topic": "tidal"}
    ),
    Document(
        page_content="Agrivoltaics combines solar panel deployment with agricultural land use.",
        metadata={"source": "research_paper", "year": 2023, "topic": "solar"}
    ),
]

# Add to FAISS store
faiss_vectorstore.add_documents(new_documents)
print("Added new documents to FAISS store")

# 2. Similarity search with scores
results_with_scores = faiss_vectorstore.similarity_search_with_score(
    "innovative solar technologies",
    k=3
)

print("\nSimilarity Search with Scores:")
for doc, score in results_with_scores:
    print(f"Content: {doc.page_content}")
    print(f"Metadata: {doc.metadata}")
    print(f"Similarity Score: {score}\n")

# 3. Maximum Marginal Relevance Search (for diversity)
mmr_results = faiss_vectorstore.max_marginal_relevance_search(
    "renewable energy generation",
    k=3,             # Number of results to return
    fetch_k=10,      # Initial pool of results to consider
    lambda_mult=0.5  # Diversity parameter (0 = max diversity, 1 = max relevance)
)

print("\nMaximum Marginal Relevance Search Results:")
for i, doc in enumerate(mmr_results):
    print(f"{i+1}. {doc.page_content}")
    print(f"   Topic: {doc.metadata['topic']}\n")

# Part 3: Integration with RAG pipelines
# --------------------------------------

# Create a retriever from the vector store
retriever = faiss_vectorstore.as_retriever(
    search_type="mmr",  # Using MMR for diverse results
    search_kwargs={"k": 3, "fetch_k": 10}
)

# Create a simple RAG chain
qa_chain = RetrievalQA.from_chain_type(
    llm=OpenAI(),  # You can replace with any other LLM
    chain_type="stuff",
    retriever=retriever
)

# Query the chain
query = "What are different ways to generate electricity from renewable sources?"
try:
    response = qa_chain.run(query)
    print("\nRAG Pipeline Response:")
    print(response)
except Exception as e:
    print(f"Error running RAG pipeline: {e}")
    print("To run this section, ensure you have a valid OpenAI API key configured")

# Part 4: Advanced Usage - Contextual Compression
# ----------------------------------------------

# Create a contextual compression retriever to extract only the relevant parts
# of retrieved documents
try:
    llm = OpenAI(temperature=0)
    compressor = LLMChainExtractor.from_llm(llm)

    compression_retriever = ContextualCompressionRetriever(
        base_compressor=compressor,
        base_retriever=retriever
    )

    compressed_docs = compression_retriever.get_relevant_documents(
        "How do solar panels work?"
    )

    print("\nContextual Compression Results:")
    for i, doc in enumerate(compressed_docs):
        print(f"{i+1}. {doc.page_content}")
except Exception as e:
    print(f"Error running contextual compression: {e}")
    print("To run this section, ensure you have a valid OpenAI API key configured")

print("\nNotebook execution complete!")

# Note: To use cloud-based vector stores like Pinecone, Weaviate, or Milvus,
# you would need to set up accounts and install additional packages.
# Examples for these implementations are excluded from this notebook for simplicity.