<a href="https://colab.research.google.com/github/IqraZainab23/AI-201/blob/main/Project_02_RAG_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qU langchain-pinecone langchain-google-genai

In [None]:
from google.colab import userdata

from pinecone import Pinecone, ServerlessSpec


pinecone_api_key = userdata.get('PINECONE_API_KEY')

pc = Pinecone(api_key=pinecone_api_key)

<!----- Embeddings ->  Embeddings Models ->
Openai -> textsmall
Google ->

In [None]:
pc.delete_index(index_name)

In [None]:
import time

index_name = "online-rag-project"  # change if desired
pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )

index = pc.Index(index_name)

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os

os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


In [None]:
vector = embeddings.embed_query("We are building a RAG text")

In [None]:
vector[:5]

[0.03927880525588989,
 -0.05155408754944801,
 -0.06100920960307121,
 -0.004878396634012461,
 0.003232848597690463]

In [None]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [None]:
# Data Save

from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]


In [None]:
len(documents)

10

In [None]:
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)


['d415f84a-af2e-4083-a244-2f5b53ec877a',
 '4e578f11-ac36-47ce-9fcb-cd87185834f3',
 'ed7fdcc4-f869-4e7f-8cc5-77af85f3114e',
 '7eb05c89-d51e-4eaf-8102-ea7ea122deeb',
 'ad01fc9c-ab26-4a01-8f58-34c782155d8a',
 '0ef75a2d-46f5-4595-9c29-ac3bdfcbd6e2',
 'de275e5a-88ab-45e5-ab46-3a945a5c428b',
 '48df154a-58a3-4a72-96ca-92c99a034905',
 'a349e2ad-eb83-4df4-8606-bc5417d13efe',
 '5c3f92de-3a67-4aed-a15a-1dba2aaffd1c']

In [None]:
#Data Reterivel

results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,
    filter={"source": "tweet"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]
* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]


In [None]:
results = vector_store.similarity_search_with_score(
    "Will it be hot tomorrow?",
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.668031] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]
* [SIM=0.668031] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]
* [SIM=0.668031] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]
* [SIM=0.577411] I have a bad feeling I am going to get deleted :( [{'source': 'tweet'}]


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [None]:
def answer_to_user(query: str):

    #Vector search
    vector_results = vector_store.similarity_search(query, k=2)
    print(len(vector_results))

    #todo:Pass to model Vector Results + User Query
    final_answer = llm.invoke(f"Answer this query {query}, Here are some reference to answer{vector_results}")
    return final_answer

In [None]:
answer = answer_to_user("LangChain provides abstractions to make working with LLMs easy",)

2


In [None]:
answer.content

"The provided text focuses on LangGraph, not LangChain.  Therefore, it doesn't answer the query about LangChain's abstractions for working with LLMs.  The documents only state that LangGraph is a good framework for building certain types of applications.  No information about LangChain is present."