In [16]:
from dotenv import load_dotenv
load_dotenv()

True

In [17]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings= HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [18]:
embedded_content = embeddings.embed_query("Hi, How are you")

In [19]:
len(embedded_content)

384

In [20]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embedded_content = embeddings.embed_query("Hi, How are you")

In [21]:
len(embedded_content)

768

In [29]:
import os
pc_api_key=os.getenv("PINECONE_API_KEY")


In [30]:
from pinecone import Pinecone
from pinecone import ServerlessSpec # Managed by the cloud provider
pc = Pinecone(api_key=pc_api_key)
pc_api_key=os.getenv("PINECONE_API_KEY")
index_name="agentic-ai-task"



In [31]:
# Creating a index
if not pc.has_index(index_name):
    pc.create_index(
    name=index_name, 
    dimension=768,
    metric="cosine",
    spec= ServerlessSpec(cloud="aws", region="us-east-1")

)


In [32]:
# loading the index
index = pc.Index(index_name )

In [33]:
from langchain_pinecone import PineconeVectorStore
vectorDB = PineconeVectorStore(index=index, embedding=embeddings,pinecone_api_key=pc_api_key)

In [34]:
# will give empty list, as no data is stored in it yet
vectorDB.similarity_search("What is langchain")

[Document(id='4cd54661-628c-4f71-8ef6-844e7c89d227', metadata={'source': 'tweet'}, page_content='Bulding an exciting new project with Langchain - come check it out!'),
 Document(id='674aa7d9-2819-4010-b805-bf2eae126c96', metadata={'source': 'tweet'}, page_content='Bulding an exciting new project with Langchain - come check it out!'),
 Document(id='ee48222e-0e6d-4b8b-8449-507b6d8aa6d1', metadata={'source': 'news'}, page_content='I had choclate chip pancakes and eggs for the breakfast this morning'),
 Document(id='d7cee374-dee7-4e9a-949f-2f1119913d52', metadata={'source': 'news'}, page_content='I had choclate chip pancakes and eggs for the breakfast this morning')]

In [35]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had choclate chip pancakes and eggs for the breakfast this morning",
    metadata={"source":"news"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy iand overcast, with a high of 62 degress",
    metadata={"source":"news"},
)

document_3 = Document(
    page_content="Bulding an exciting new project with Langchain - come check it out!",
    metadata={"source":"tweet"},
)
document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source":"news"},
)

documents = [ document_1, document_2, document_3, document_4]


In [36]:
uuids = [str(uuid4()) for _ in range(len(documents))]
uuids

['8a12a030-2aef-4d7a-a297-d97efb53b4f2',
 'e65ac0d1-401a-4c20-9386-c3b8bfceeec8',
 '51d5a23e-99b4-4f54-b2e1-73361d1ed4f5',
 '9ed91d23-8119-41bf-938b-e1ce8b5b3f6f']

In [37]:
vectorDB.add_documents(documents=documents, ids=uuids)

['8a12a030-2aef-4d7a-a297-d97efb53b4f2',
 'e65ac0d1-401a-4c20-9386-c3b8bfceeec8',
 '51d5a23e-99b4-4f54-b2e1-73361d1ed4f5',
 '9ed91d23-8119-41bf-938b-e1ce8b5b3f6f']

In [38]:
vectorDB.similarity_search("What is langchain", k=1)

[Document(id='4cd54661-628c-4f71-8ef6-844e7c89d227', metadata={'source': 'tweet'}, page_content='Bulding an exciting new project with Langchain - come check it out!')]

In [39]:
retriver = vectorDB.as_retriever(search_type="similarity_score_threshold",
                                 search_kwargs={"k":3, "score_threshold": 0.8}
                                 )

In [40]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

In [41]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [42]:
import pprint

pprint.pprint(prompt)

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])


In [43]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [44]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [45]:
rag_chain = (
    {"context": retriver | format_docs, "question": RunnablePassthrough()} 
    | prompt 
    | model
    | StrOutputParser()
)

In [46]:
rag_chain.invoke("tell me about the pancakes")

"This morning's breakfast included chocolate chip pancakes and eggs.  The pancakes contained chocolate chips.  No further details about the pancakes are provided."