In [None]:
 %pip install -qU langchain-pinecone langchain-google-genai

In [None]:
from google.colab import userdata
from pinecone import Pinecone, ServerlessSpec



pinecone_api_key = userdata.get('PINECONE_API_KEY')

pc = Pinecone(api_key=pinecone_api_key)

In [None]:
index_name = "rag-project23"  # change if desired

pc.create_index(
    name=index_name,
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)


index = pc.Index(index_name)

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os
os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


In [None]:
vector = embeddings.embed_query("My name is Hamza Sheikh")
vector[:5]

[0.05423726886510849,
 -0.03553210571408272,
 -0.04950312152504921,
 -0.026713678613305092,
 0.050312358886003494]

In [None]:
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [None]:
from langchain_core.documents import Document
document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

In [None]:
document_1


Document(metadata={'source': 'tweet'}, page_content='I had chocalate chip pancakes and scrambled eggs for breakfast this morning.')

In [None]:
# Data Save

from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]


In [None]:
len(documents)

10

In [None]:
uuids = [str(uuid4()) for _ in range(len(documents))]
# The data will be sent in the form of embeddings through this method.
vector_store.add_documents(documents=documents, ids=uuids)

['066b7475-62c1-471c-b4d4-c3a935071d52',
 'a35b89e6-1be9-49a5-9c42-5a9098bf856b',
 '14366144-74af-4cdb-ab47-91204aeed6fb',
 '91ce9ac5-5aac-4d91-a244-3e9ca305220b',
 'dcf61182-e625-47a8-9d44-92caf20b4a31',
 'bb6d77f3-9b6c-40ce-9219-8d8aceda47f5',
 '710f62ca-2618-4478-99a0-2f6bf992d319',
 '4d17f185-2c1c-4796-9050-eb2d7ad41ddd',
 '9b961e07-92a0-40b2-8db4-f36a7f4f9e98',
 '5e04814c-f548-4ce6-be01-fba62387940c']

In [None]:
# generate unique random  ID

from uuid import uuid4
uuid4()

UUID('a390545a-71a0-454f-8abb-4b3b582aba57')

In [None]:
# Data Retrieve
results = vector_store.similarity_search(
    "langchain provides a abstraction to make working with llms easy",
     k=1, #return Top 3 results
    filter={"source": "tweet"}
)
for res in results:
  print(f"*{res.page_content} [{res.metadata}]")

*LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]


In [None]:
results = vector_store.similarity_search_with_score(
    "will it be cold tomorrow?",
    k=1,
    filter={"source": "news"}
    )
for res, score in results:
  print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.682070] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [None]:
def ans_to_user(query:str):

  # vector search
  vector_result = vector_store.similarity_search_with_score(query, k=2)
  # pass to model vector result abd user query
  final_answer = llm.invoke(f"Answer this user query {query} here are some reference to answer{vector_result}")
  return final_answer

In [None]:
answer = ans_to_user("What is the weather forecast for tomorrow?")


In [None]:
answer.content

'The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.\n'