This notebook implements two methods for semantic search:

**OpenAI API**:
Uses OpenAI's embedding generation to calculate semantic similarity. This approach offers high accuracy but is subject to usage limits (API key restrictions and quota).

**Sentence Transformers**:
Utilizes the sentence-transformers library to compute embeddings locally. This is a cost-effective alternative that works seamlessly without external API dependencies, making it suitable for unrestricted or large-scale usage.

Since the OpenAI API usage limit has been reached, Sentence Transformers is used in this notebook for further processing. Pre-trained models such as all-MiniLM-L6-v2 are leveraged to generate high-quality embeddings for caption retrieval.

Both methods enable ranking and retrieval of captions based on semantic similarity, and the implementation ensures consistency between approaches.

**Sentence Transformers**

In [17]:
# Install required libraries
!pip install \
    "pinecone-client" \
    "langchain" \
    "langchain-text-splitters" \
    "sentence-transformers" \
    "langchain-pinecone"

# Set environment variables
import os
os.environ["PINECONE_API_KEY"] = "replace your key"

# Load business data
business_document = """
# Company XYZ Product Manual

## Introduction
Welcome to Company XYZ! This document provides an overview of our flagship product, the XYZ3000, and its features.

## Product Overview
The XYZ3000 is a state-of-the-art device designed to streamline your workflow. Key features include:
- Advanced AI integration for predictive analytics.
- Cloud-based synchronization for seamless data access.
- Intuitive user interface for ease of use.

## Getting Started
To set up the XYZ3000, follow these steps:
1. Unbox the device and ensure all components are present.
2. Connect the device to a power source and turn it on.
3. Follow the on-screen instructions to complete the setup.

## Troubleshooting
If you encounter issues, try the following:
- Issue: Device not turning on.
  Solution: Ensure the power cable is securely connected.
- Issue: Cloud sync not working.
  Solution: Check your internet connection and restart the device.
"""

# Split the document into chunks
from langchain_text_splitters import MarkdownHeaderTextSplitter
headers_to_split_on = [("##", "Header 2")]
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=False)
document_chunks = markdown_splitter.split_text(business_document)

# Initialize Sentence Transformers embeddings
from sentence_transformers import SentenceTransformer
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

# Initialize Pinecone and delete existing index (if any)
from pinecone import Pinecone, ServerlessSpec
import time
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
index_name = "business-qa-index"
if index_name in pc.list_indexes().names():
    pc.delete_index(index_name)
    print(f"Deleted index: {index_name}")

# Create a new Pinecone index with 384 dimensions
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,  # Sentence Transformers embeddings have 384 dimensions
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
    print(f"Created new index: {index_name}")

# Wait for the index to be ready
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

# Embed and upsert chunks
document_embeddings = embedding_model.encode([chunk.page_content for chunk in document_chunks])
index = pc.Index(index_name)
namespace = "business-docs"
for i, (chunk, embedding) in enumerate(zip(document_chunks, document_embeddings)):
    index.upsert(
        vectors=[
            {
                "id": f"chunk-{i}",
                "values": embedding,
                "metadata": {"text": chunk.page_content}
            }
        ],
        namespace=namespace
    )
time.sleep(5)

# Initialize the retriever
from langchain_pinecone import PineconeVectorStore  # Updated import

# Define a custom Embeddings class for Sentence Transformers
from langchain_core.embeddings import Embeddings

class SentenceTransformerEmbeddings(Embeddings):
    def __init__(self, model):
        self.model = model

    def embed_documents(self, texts):
        return self.model.encode(texts).tolist()

    def embed_query(self, text):
        return self.model.encode(text).tolist()

# Initialize the custom embeddings
embeddings = SentenceTransformerEmbeddings(embedding_model)

# Initialize the vector store
vectorstore = PineconeVectorStore(index, embeddings, "text", namespace=namespace)
retriever = vectorstore.as_retriever(search_kwargs={"k": 1})  # Retrieve only the top 1 result

# Define a simple QA function
def ask_question(query):
    # Generate query embedding
    query_embedding = embeddings.embed_query(query)

    # Query Pinecone
    query_response = index.query(
        vector=query_embedding,
        top_k=1,  # Retrieve only the top 1 result
        include_metadata=True,
        namespace=namespace
    )

    # Combine the retrieved documents into a single context
    if query_response["matches"]:
        context = "\n\n".join([match["metadata"]["text"] for match in query_response["matches"]])
    else:
        context = "No relevant documents found."

    # Print the question and context
    print(f"Question: {query}")
    print(f"Context:\n{context}\n")

    # Return the context (you can replace this with a custom LLM if needed)
    return context

# Interactive loop for user input
while True:
    # Get user input
    query = input("Enter your question (or type 'exit' to quit): ")

    # Exit the loop if the user types 'exit'
    if query.lower() == "exit":
        print("Exiting...")
        break

    # Get and display the answer
    ask_question(query)

# Clean up
pc.delete_index(index_name)

Created new index: business-qa-index
Enter your question (or type 'exit' to quit): how to troubleshoot?
Question: how to troubleshoot?
Context:
## Troubleshooting
If you encounter issues, try the following:
- Issue: Device not turning on.
Solution: Ensure the power cable is securely connected.
- Issue: Cloud sync not working.
Solution: Check your internet connection and restart the device.

Enter your question (or type 'exit' to quit): exit
Exiting...


**OpenAI API**

In [None]:
# Install required libraries
!pip install \
    "pinecone-client" \
    "langchain" \
    "langchain-openai" \
    "langchain-text-splitters"

# Set environment variables
import os
os.environ["PINECONE_API_KEY"] = "your key"
os.environ["OPENAI_API_KEY"] = "your key"

# Load business data
business_document = """
# Company XYZ Product Manual

## Introduction
Welcome to Company XYZ! This document provides an overview of our flagship product, the XYZ3000, and its features.

## Product Overview
The XYZ3000 is a state-of-the-art device designed to streamline your workflow. Key features include:
- Advanced AI integration for predictive analytics.
- Cloud-based synchronization for seamless data access.
- Intuitive user interface for ease of use.

## Getting Started
To set up the XYZ3000, follow these steps:
1. Unbox the device and ensure all components are present.
2. Connect the device to a power source and turn it on.
3. Follow the on-screen instructions to complete the setup.

## Troubleshooting
If you encounter issues, try the following:
- Issue: Device not turning on.
  Solution: Ensure the power cable is securely connected.
- Issue: Cloud sync not working.
  Solution: Check your internet connection and restart the device.
"""

# Split the document into chunks
from langchain_text_splitters import MarkdownHeaderTextSplitter
headers_to_split_on = [("##", "Header 2")]
markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on, strip_headers=False)
document_chunks = markdown_splitter.split_text(business_document)

# Initialize OpenAI embeddings
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(openai_api_key=os.environ.get("OPENAI_API_KEY"))

# Create a Pinecone index
from pinecone import Pinecone, ServerlessSpec
import time
pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
cloud = os.environ.get('PINECONE_CLOUD') or 'aws'
region = os.environ.get('PINECONE_REGION') or 'us-east-1'
spec = ServerlessSpec(cloud=cloud, region=region)
index_name = "business-qa-index"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,  # OpenAI embeddings have 1536 dimensions
        metric="cosine",
        spec=spec
    )
    while not pc.describe_index(index_name).status['ready']:
        time.sleep(1)

# Embed and upsert chunks
document_embeddings = embeddings.embed_documents([chunk.page_content for chunk in document_chunks])
index = pc.Index(index_name)
namespace = "business-docs"
for i, (chunk, embedding) in enumerate(zip(document_chunks, document_embeddings)):
    index.upsert(
        vectors=[
            {
                "id": f"chunk-{i}",
                "values": embedding,
                "metadata": {"text": chunk.page_content}
            }
        ],
        namespace=namespace
    )
time.sleep(5)

# Initialize the chatbot
from langchain_openai import ChatOpenAI
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain import hub
retrieval_qa_chat_prompt = hub.pull("langchain-ai/retrieval-qa-chat")
from langchain.vectorstores import Pinecone
vectorstore = Pinecone(index, embeddings, "text", namespace=namespace)
retriever = vectorstore.as_retriever()
llm = ChatOpenAI(openai_api_key=os.environ.get('OPENAI_API_KEY'), model_name='gpt-4', temperature=0.0)
combine_docs_chain = create_stuff_documents_chain(llm, retrieval_qa_chat_prompt)
retrieval_chain = create_retrieval_chain(retriever, combine_docs_chain)

# Ask questions
query1 = "What are the key features of the XYZ3000?"
query2 = "How do I troubleshoot if the device is not turning on?"
answer1 = retrieval_chain.invoke({"input": query1})
print("Query 1:", query1)
print("\nAnswer with knowledge:\n\n", answer1['answer'])
print("\nContext used:\n\n", answer1['context'])
answer2 = retrieval_chain.invoke({"input": query2})
print("\nQuery 2:", query2)
print("\nAnswer with knowledge:\n\n", answer2['answer'])
print("\nContext used:\n\n", answer2['context'])

# Clean up
pc.delete_index(index_name)