### Initializing API keys

In [3]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["HUGGING_FACE_API"] = os.getenv("HUGGING_FACE_API")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")


### HuggingFace Embeddings

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
res = embeddings.embed_query("hello AI")
len(res)


  from .autonotebook import tqdm as notebook_tqdm


384

### Google Gemini Embeddings

In [2]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings=GoogleGenerativeAIEmbeddings(model="models/embedding-001")
res = embeddings.embed_query("hello AI")
len(res)

768

### Initializing Pinecone

In [4]:
from pinecone import Pinecone, ServerlessSpec
pc = Pinecone()

In [7]:
pc.has_index("trading-bot")

False

### Creating Index in Pinecone

In [8]:
index_name="agenticbatch2"
if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws",region="us-east-1") 
    )


In [9]:
index = pc.Index(index_name)

  from .autonotebook import tqdm as notebook_tqdm


In [11]:
from langchain_pinecone import PineconeVectorStore

vectore_store = PineconeVectorStore(index=index, embedding=embeddings)

In [12]:
results = vectore_store.similarity_search("what is langchain?")
results

[]

In [13]:
from uuid import uuid4
from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},#additional info
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [14]:
len(documents)

10

In [15]:
range(len(documents))

range(0, 10)

In [16]:
for _ in range(len(documents)):
    print(_)
    print(str(uuid4()))

0
c3791bff-c473-4882-a825-69e7c8211d8b
1
feaff9c3-6599-4b7c-8eaa-71d2634bee48
2
631587b0-4073-4579-88d7-202784687f3d
3
b71c8abd-9086-41a9-89f5-9759e569a1b6
4
754d9de0-6ed8-4ce6-964e-861d1117cb55
5
31b7a551-9554-4ff0-a623-e9d4987a8873
6
5d26560d-2e7d-42ef-884b-104569b5cb99
7
ef543525-4bf3-49e8-8bed-bf466b012ddd
8
a6b9eeee-9457-4d25-a9e0-fda0b25bb0de
9
e28ea3cc-f61c-4d49-942c-dae3c90fd334


In [21]:
uuids = [str(uuid4()) for _ in range(len(documents))]

In [22]:
uuids

['f7ad5a1b-d764-46e3-b759-a2f283f9baa2',
 'a6a61b8a-3357-4f1f-bd6e-7e0abcb8de48',
 '0e9f3b46-9a61-425c-8f55-e177aa6a8fe6',
 '29d00734-1ff6-407d-9022-d0860f71fc3f',
 '5022ec38-da15-43f1-b9fc-17c2f9ba51f9',
 'ad8836ca-781c-4182-881d-04fdc8d5718a',
 'fc88130b-cbe5-43aa-b487-59e5440bdeaa',
 '4d8a9dad-de98-4ae9-b7d3-fa5804d75703',
 '4370f839-d841-473e-a2d2-158702c740c0',
 '019e2ba7-b9ff-4230-a04e-945c91f131ba']

### Adding documents in vectorDB

In [23]:
vectore_store.add_documents(documents=documents, ids=uuids)

['f7ad5a1b-d764-46e3-b759-a2f283f9baa2',
 'a6a61b8a-3357-4f1f-bd6e-7e0abcb8de48',
 '0e9f3b46-9a61-425c-8f55-e177aa6a8fe6',
 '29d00734-1ff6-407d-9022-d0860f71fc3f',
 '5022ec38-da15-43f1-b9fc-17c2f9ba51f9',
 'ad8836ca-781c-4182-881d-04fdc8d5718a',
 'fc88130b-cbe5-43aa-b487-59e5440bdeaa',
 '4d8a9dad-de98-4ae9-b7d3-fa5804d75703',
 '4370f839-d841-473e-a2d2-158702c740c0',
 '019e2ba7-b9ff-4230-a04e-945c91f131ba']

### using similarity search to find relevant docs

In [25]:
results = vectore_store.similarity_search("what langchain provides to us?",k=2)
results

[Document(id='0e9f3b46-9a61-425c-8f55-e177aa6a8fe6', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='4d8a9dad-de98-4ae9-b7d3-fa5804d75703', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!')]

In [26]:
results = vectore_store.similarity_search("what langchain provides to us?",filter={"source": "tweet"})
results

[Document(id='0e9f3b46-9a61-425c-8f55-e177aa6a8fe6', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='4d8a9dad-de98-4ae9-b7d3-fa5804d75703', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='5022ec38-da15-43f1-b9fc-17c2f9ba51f9', metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(id='f7ad5a1b-d764-46e3-b759-a2f283f9baa2', metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.')]

### Vector Store as retriever

In [None]:
retriver = vectore_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.7}    ## 0.7 is 70% match
)
retriver.invoke("langchain")

[Document(id='0e9f3b46-9a61-425c-8f55-e177aa6a8fe6', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(id='4d8a9dad-de98-4ae9-b7d3-fa5804d75703', metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic applications!'),
 Document(id='019e2ba7-b9ff-4230-a04e-945c91f131ba', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='5022ec38-da15-43f1-b9fc-17c2f9ba51f9', metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again.")]

### Simple RAG pipeline

In [28]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

In [29]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")



In [30]:
import pprint
pprint.pprint(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [31]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriver | format_docs , "question": RunnablePassthrough()}
    |prompt
    |model
    |StrOutputParser()

)

In [34]:
rag_chain.invoke("what is langgraph?")

"LangGraph is a framework for creating stateful, agentic applications.  It's presented as a strong alternative to LangChain.  No further details are provided in the given context."