In [17]:
from dotenv import load_dotenv
load_dotenv()

True

In [18]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings= HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [19]:
embedded_content = embeddings.embed_query("Hi, How are you")

In [20]:
len(embedded_content)

384

In [21]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embedded_content = embeddings.embed_query("Hi, How are you")

In [22]:
len(embedded_content)

768

In [23]:
import os


In [35]:
from pinecone import Pinecone
from pinecone import ServerlessSpec # Managed by the cloud provider
pc = Pinecone(api_key=pc_api_key)
pc_api_key=os.getenv("PINECONE_API_KEY")
index_name="agentic-ai-task"



In [None]:
# Creating a index
if not pc.has_index(index_name):
    pc.create_index(
    name=index_name, 
    dimension=768,
    metric="cosine",
    spec= ServerlessSpec(cloud="aws", region="us-east-1")

)


In [37]:
# loading the index
index = pc.Index(index_name )

In [39]:
from langchain_pinecone import PineconeVectorStore
vectorDB = PineconeVectorStore(index=index, embedding=embeddings,pinecone_api_key=pc_api_key)

In [47]:
# will give empty list, as no data is stored in it yet
vectorDB.similarity_search("What is langchain")

[Document(id='674aa7d9-2819-4010-b805-bf2eae126c96', metadata={'source': 'tweet'}, page_content='Bulding an exciting new project with Langchain - come check it out!'),
 Document(id='d7cee374-dee7-4e9a-949f-2f1119913d52', metadata={'source': 'news'}, page_content='I had choclate chip pancakes and eggs for the breakfast this morning'),
 Document(id='efcbfdb2-dea2-4e91-b373-ea89ebe320f5', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(id='bf65f69a-6378-4a43-9374-b61eb3ee5999', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy iand overcast, with a high of 62 degress')]

In [48]:
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had choclate chip pancakes and eggs for the breakfast this morning",
    metadata={"source":"news"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy iand overcast, with a high of 62 degress",
    metadata={"source":"news"},
)

document_3 = Document(
    page_content="Bulding an exciting new project with Langchain - come check it out!",
    metadata={"source":"tweet"},
)
document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source":"news"},
)

documents = [ document_1, document_2, document_3, document_4]


In [49]:
uuids = [str(uuid4()) for _ in range(len(documents))]
uuids

['ee48222e-0e6d-4b8b-8449-507b6d8aa6d1',
 '8ab44ba6-d246-4bab-bcf6-e81a5e872753',
 '4cd54661-628c-4f71-8ef6-844e7c89d227',
 'bf53b4ec-8a50-4069-90b3-18bd0b1896d3']

In [50]:
vectorDB.add_documents(documents=documents, ids=uuids)

['ee48222e-0e6d-4b8b-8449-507b6d8aa6d1',
 '8ab44ba6-d246-4bab-bcf6-e81a5e872753',
 '4cd54661-628c-4f71-8ef6-844e7c89d227',
 'bf53b4ec-8a50-4069-90b3-18bd0b1896d3']

In [54]:
vectorDB.similarity_search("What is langchain", k=1)

[Document(id='4cd54661-628c-4f71-8ef6-844e7c89d227', metadata={'source': 'tweet'}, page_content='Bulding an exciting new project with Langchain - come check it out!')]

In [70]:
retriver = vectorDB.as_retriever(search_type="similarity_score_threshold",
                                 search_kwargs={"k":3, "score_threshold": 0.8}
                                 )

In [71]:
retriver.invoke("langchain")

[Document(id='674aa7d9-2819-4010-b805-bf2eae126c96', metadata={'source': 'tweet'}, page_content='Bulding an exciting new project with Langchain - come check it out!'),
 Document(id='4cd54661-628c-4f71-8ef6-844e7c89d227', metadata={'source': 'tweet'}, page_content='Bulding an exciting new project with Langchain - come check it out!')]

In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

In [2]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [3]:
import pprint

pprint.pprint(prompt)

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])
