<a href="https://colab.research.google.com/github/ArfaShoaib/PIAIC-PROJECTS/blob/main/RAG_Project2ipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [33]:
%pip install -qU langchain-pinecone langchain-google-genai

In [34]:

from google.colab import userdata
userdata.get('PINECONE_API_KEY')
from pinecone import Pinecone, ServerlessSpec



pinecone_api_key = userdata.get('PINECONE_API_KEY')

pc = Pinecone(api_key=pinecone_api_key)

In [36]:
import time

index_name = "rag-project"  # change if desired

# Connect to the existing index if it exists, otherwise create it
try:
    index = pc.Index(index_name)  # Attempt to connect to the existing index
    print(f"Connected to existing index: {index_name}")
except:
    pc.create_index(  # Create a new index if it doesn't exist
            name=index_name,
            dimension=768,
            metric="cosine",
            spec=ServerlessSpec(cloud="aws", region="us-east-1"),
     )
    index = pc.Index(index_name)
    print(f"Created new index: {index_name}")

Connected to existing index: rag-project


In [37]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os
from google.colab import userdata



os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')


embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


In [38]:
vector = embeddings.embed_query("hello, world!")
vector[:5]

[0.05168594419956207,
 -0.030764883384108543,
 -0.03062233328819275,
 -0.02802734263241291,
 0.01813093200325966]

In [39]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [40]:
#Data Save
from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]


In [41]:
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['e91180af-3c5d-4234-a158-21c34c05dc13',
 '0e6c16bc-3d16-4722-a837-44499e799b70',
 '2326c101-4ce3-4568-9315-c66f6f43916b',
 'be6f48d1-5285-4225-8f8e-e7647c5b627e',
 'db878ffe-6a19-438e-9305-58b5ded8b145',
 '47420564-3eb3-49c7-a078-e9c612969260',
 'b65b520a-2965-4f2b-a94c-05fe3c6a124c',
 '01aaada1-f54d-4ebb-81df-5bc9693d528f',
 'a46be66c-0490-4b4a-a189-1d7cecc738c2',
 '9f47f7e5-d344-496e-8485-a79aa9401f69']

In [42]:
#Data Reterive
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,
    filter={"source": "tweet"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

* LangGraph is the best framework for building stateful, agentic applications! [{'source': 'tweet'}]
* Building an exciting new project with LangChain - come check it out! [{'source': 'tweet'}]


In [43]:
results = vector_store.similarity_search_with_score(
    "Will it be hot tomorrow?", k=1, filter={"source": "news"}
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.668031] The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees. [{'source': 'news'}]


In [44]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [45]:
def answers(query:str):
  #Vector Search
  vector_results = vector_store.similarity_search(query , k=2)
  print(len(vector_results))

  #PassnTo Model Vector Result + User Query
  final_answer =llm.invoke(f"answer this user query: {query}, here are some refrences {vector_results}")


  return final_answer

In [46]:
answers("LangChain provides abstractions to make working with LLMs easy")

2


AIMessage(content="The provided text mentions LangChain's ease of use with LLMs (Large Language Models), but the references are actually about LangGraph, a different framework for building stateful, agentic applications.  The references don't support the claim about LangChain.  Therefore, the query cannot be answered using the provided information.  To answer the query about LangChain's ease of use, different references focusing on LangChain would be needed.\n", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-97b0b78e-36d9-4211-930e-cbd3e7b18d91-0', usage_metadata={'input_tokens': 149, 'output_tokens': 91, 'total_tokens': 240, 'input_token_details': {'cache_read': 0}})