In [None]:
# Required Library Installation
# Install LangChain-Pinecone and LangChain-Google-GenAI
%pip install -qU langchain-pinecone langchain-google-genai

In [2]:
# Import Libraries
from pinecone import Pinecone, ServerlessSpec
from google.colab import userdata
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_pinecone import PineconeVectorStore
from langchain_core.documents import Document
from uuid import uuid4
import os
import time

In [3]:
# Initialize Pinecone
pinecone_api_key = userdata.get('Pinecone')  # Retrieve Pinecone API key from user data
pc = Pinecone(api_key=pinecone_api_key)

In [6]:
# Delete Existing Index (if applicable)
pc.delete_index(index_name)  # Delete the index if it exists
print(f"Index '{index_name}' deleted successfully.")

Index 'online-rag-project' deleted successfully.


In [7]:
index_name = "online-rag-project"  # Specify the index name

# Create New Index
pc.create_index(
    name=index_name,
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)


In [8]:
# Connect to the Newly Created Index
index = pc.Index(index_name)

In [9]:
# Initialize Google Generative AI Embeddings
os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')  # Set the Google API key as an environment variable
embeddings = GoogleGenerativeAIEmbeddings(model='models/embedding-001')

In [10]:
# Embed a Sample Query
vector = embeddings.embed_query('muhammad Junaid')


In [11]:
# Initialize Pinecone Vector Store
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [12]:
# Add Documents to the Vector Store
# Define Sample Documents
document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)


In [13]:
# Generate Unique IDs for the Documents
documents = [document_1, document_2, document_3]
uuids = [str(uuid4()) for _ in range(len(documents))]

In [14]:
# Add Documents to the Vector Store
vector_store.add_documents(documents=documents, ids=uuids)

['26bc3679-f9d8-4211-8599-94022ce072b4',
 'a159482c-d5bf-4fab-a553-2b14376f2b83',
 '77baecd2-ba55-44a2-b7d4-ba1bef97f4b1']

In [15]:
# Retrieve Documents Using Similarity Search
# Search by Content and Filter by Metadata
results = vector_store.similarity_search(
    'LangeChain provide abstractions to make working with LLMs easy',
    k=2,
    filter={'source': 'tweet'}
)

In [16]:
# Print Retrieved Results
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]
* I had chocolate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet'}]


In [17]:
# Retrieve Documents Using Similarity Search with Scores
results_with_scores = vector_store.similarity_search_with_score(
    "will it be hot tomorrow"
)

In [18]:
print(results_with_scores)

[(Document(id='a159482c-d5bf-4fab-a553-2b14376f2b83', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.'), 0.675364912), (Document(id='26bc3679-f9d8-4211-8599-94022ce072b4', metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'), 0.493864655), (Document(id='77baecd2-ba55-44a2-b7d4-ba1bef97f4b1', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'), 0.479618132)]


In [19]:
# Print Retrieved Results with Scores
for res, score in results_with_scores:
    print(f"* [SIM={score:.3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.675] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit. [{'source': 'news'}]
* [SIM=0.494] I had chocolate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet'}]
* [SIM=0.480] Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]


In [21]:
# Query Example for Non-Matching Case
results_with_scores = vector_store.similarity_search_with_score(
    "Who won 2024 Presidential Election in USA?"
)
for res, score in results_with_scores:
    print(f"* [SIM={score:.3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.510] I had chocolate chip pancakes and scrambled eggs for breakfast this morning. [{'source': 'tweet'}]
* [SIM=0.492] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit. [{'source': 'news'}]
* [SIM=0.480] Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]


In [24]:
from langchain_google_genai import ChatGoogleGenerativeAI
# Initialize Google Generative AI LLM
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)

In [25]:
# Define a Function to Answer User Queries
def answer_to_user(query: str) -> str:
    """
    Answers the user query by retrieving relevant documents and using LLM for generation.

    Args:
        query (str): User's query.

    Returns:
        str: Final answer generated by the LLM.
    """
    # Perform Vector Search
    vector_results = vector_store.similarity_search(query, k=2)
    print(len(vector_results))

    # Generate Answer Using LLM
    final_answer = llm.invoke(f"ANSWER THIS USER QUERY: {query}, Here are some reference to answer: {vector_results}")

    return final_answer

In [27]:
# Example Query
answer = answer_to_user("LangeChain provide abstractions to make working with LLMs easy")


2


In [28]:
answer.content

"LangChain simplifies working with Large Language Models (LLMs).  The provided documents don't offer details on *how* LangChain achieves this simplification, but one document mentions someone is using LangChain in a new project.  The other document is irrelevant to the query.\n"