In [13]:
!pip install langchain-openai langchain-core langchain-community openai tiktoken



In [19]:
from langchain_openai import AzureOpenAIEmbeddings, AzureOpenAI  # Changed to AzureOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document

# Setup embedding
embedding = AzureOpenAIEmbeddings(
    azure_endpoint="Type your Azure Endpoint",
    openai_api_key="Type your Azure Open API Key",
    azure_deployment="text-embedding-ada-002",
    openai_api_version="2024-02-15-preview"
)

# Setup Azure OpenAI with the correct model type
llm = AzureOpenAI(  # Changed to non-chat model
    azure_endpoint="Type your Azure Endpoint",
    openai_api_key="Type your Azure OpenAPI Key",
    azure_deployment="gpt-35-turbo-instruct",
    openai_api_version="2024-02-15-preview",
    temperature=0
)

In [20]:
# Create documents with metadata
texts = [
    Document(
        page_content="The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).",
        metadata={"source": "text1", "page": 1}
    ),
    Document(
        page_content="A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.",
        metadata={"source": "text2", "page": 1}
    ),
    Document(
        page_content="A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.",
        metadata={"source": "text3", "page": 1}
    )
]

# Create vector store
vectordb = FAISS.from_documents(texts, embedding)
print(f"Number of vectors: {vectordb.index.ntotal}")

Number of vectors: 3


In [21]:
# Test different search methods
question = "Tell me about all-white mushrooms with large fruiting bodies"

# Regular search
print("Regular Search Results:")
regular_results = vectordb.similarity_search(question, k=2)
for doc in regular_results:
    print("\nRegular:", doc.page_content)

# MMR search
print("\nMMR Search Results:")
mmr_results = vectordb.max_marginal_relevance_search(question, k=2, fetch_k=3)
for doc in mmr_results:
    print("\nMMR:", doc.page_content)

Regular Search Results:

Regular: A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.

Regular: The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).

MMR Search Results:

MMR: A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.

MMR: A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.


In [22]:
# Search with metadata filter
question = "Tell me about poisonous mushrooms"
print("\nSearching with metadata filter for 'text3':")
docs = vectordb.similarity_search(
    question,
    k=2,
    filter={"source": "text3"}
)

print("\nResults with metadata filter:")
for doc in docs:
    print("\nContent:", doc.page_content)
    print("Metadata:", doc.metadata)

# Try another metadata filter
print("\nSearching with metadata filter for page 1:")
docs_page = vectordb.similarity_search(
    question,
    k=2,
    filter={"page": 1}
)

print("\nResults with page filter:")
for doc in docs_page:
    print("\nContent:", doc.page_content)
    print("Metadata:", doc.metadata)


Searching with metadata filter for 'text3':

Results with metadata filter:

Content: A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.
Metadata: {'source': 'text3', 'page': 1}

Searching with metadata filter for page 1:

Results with page filter:

Content: A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.
Metadata: {'source': 'text3', 'page': 1}

Content: A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.
Metadata: {'source': 'text2', 'page': 1}


In [23]:
# Let's create a simpler version of document compression
def compress_docs(docs, question):
    compressed = []
    for doc in docs:
        # Get relevant sentences containing key terms from the question
        content = doc.page_content
        if any(term.lower() in content.lower() for term in question.split()):
            compressed.append(doc)
    return compressed

try:
    # Get base results
    base_retriever = vectordb.as_retriever()
    base_results = base_retriever.get_relevant_documents("What are the characteristics of Death Cap?")
    
    print("\nOriginal Results:")
    for doc in base_results:
        print(f"\nDocument: {doc.page_content}")
    
    # Compress results
    compressed_results = compress_docs(base_results, "What are the characteristics of Death Cap?")
    
    print("\nCompressed Results:")
    for doc in compressed_results:
        print(f"\nCompressed Document: {doc.page_content}")

except Exception as e:
    print(f"Error details: {str(e)}")


Original Results:

Document: A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.

Document: The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).

Document: A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.

Compressed Results:

Compressed Document: A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.

Compressed Document: The Amanita phalloides has a large and imposing epigeous (aboveground) fruiting body (basidiocarp).

Compressed Document: A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.


In [24]:
try:
    # Get base results with MMR
    base_results = vectordb.max_marginal_relevance_search(
        "Tell me about mushroom characteristics",
        k=2,
        fetch_k=3
    )
    
    print("\nOriginal MMR Results:")
    for doc in base_results:
        print(f"\nDocument: {doc.page_content}")
    
    # Apply compression
    compressed_results = compress_docs(base_results, "Tell me about mushroom characteristics")
    
    print("\nCompressed MMR Results:")
    for doc in compressed_results:
        print(f"\nCompressed Document: {doc.page_content}")

except Exception as e:
    print(f"Error in combined approach: {str(e)}")


Original MMR Results:

Document: A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.

Document: A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.

Compressed MMR Results:

Compressed Document: A mushroom with a large fruiting body is the Amanita phalloides. Some varieties are all-white.

Compressed Document: A. phalloides, a.k.a Death Cap, is one of the most poisonous of all known mushrooms.
