<a href="https://colab.research.google.com/github/EshaAmjad26/Projects-on-Colab/blob/main/Project_2_Rag_Using_Lang_Chain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Project 2 Rag Using Lang Chain

In [None]:
!pip install -qU langchain langchain-pinecone langchain-google-genai

In [None]:
from google.colab import userdata
import os
os.environ["GEMINI_API_KEY"] = userdata.get('Gemini-API-Key')

In [None]:
from pinecone import Pinecone, ServerlessSpec
pinecone_api_key = userdata.get('PINECONE_API_KEY')
pcone = Pinecone(api_key = pinecone_api_key)

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
api_key = userdata.get('Gemini-API-Key')
embedding_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001",google_api_key=api_key)
vector = embedding_model.embed_query("hello world")
vector[:5]

[0.04909781739115715,
 -0.044328317046165466,
 -0.025365281850099564,
 -0.030721040442585945,
 0.019068587571382523]

In [None]:
index_name = 'online-rag-project-creating'
pcone.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
index = pcone.Index(index_name)

In [None]:
from langchain_pinecone import PineconeVectorStore
vector_store = PineconeVectorStore(index= index , embedding = embedding_model)


In [None]:
#pcone.delete_index("online-rag-project")


In [None]:

from langchain_core.documents import Document
document1 = Document(
    page_content = "LangChain is a framework for developing applications powered by language models. It simplifies integration with LLMs like OpenAI's GPT models.",
    metadata = {
            "title": "LangChain Overview",
            "category": "AI Frameworks",
            "published_date": "2025-01-01"}
)

document2 = Document(
    page_content ="Pinecone is a vector database that provides fast and scalable storage for embeddings, enabling efficient similarity searches.",
    metadata = {
            "title": "Pinecone Introduction",
            "category": "Databases",
            "published_date": "2024-12-15"}
)

document3 = Document(
    page_content = "Google Gemini, also known as PaLM 2, is a powerful LLM by Google that excels in multilingual understanding and reasoning tasks.",
    metadata = {
            "title": "Google Gemini Overview",
            "category": "Language Models",
            "published_date": "2025-01-05"}
)


document = [document1, document2, document3]



In [None]:
doc_id =[ f"doc_{i+1}" for i in range(len(document))]
print(f"Number of documents: {len(document)}")
print(f"Generated IDs: {doc_id}")

vector_store.add_documents(documents=document, ids=doc_id)

Number of documents: 3
Generated IDs: ['doc_1', 'doc_2', 'doc_3']


['doc_1', 'doc_2', 'doc_3']

In [None]:
result = vector_store.similarity_search("What is LangChain?", k=2)
result_list = []
for res in result:
  result_list.append(res.page_content)
  print(f"*{res.page_content} [{res.metadata}]")

*LangChain is a framework for developing applications powered by language models. It simplifies integration with LLMs like OpenAI's GPT models. [{'category': 'AI Frameworks', 'published_date': '2025-01-01', 'title': 'LangChain Overview'}]
*Pinecone is a vector database that provides fast and scalable storage for embeddings, enabling efficient similarity searches. [{'category': 'Databases', 'published_date': '2024-12-15', 'title': 'Pinecone Introduction'}]


In [None]:
results = vector_store.similarity_search_with_score("What is LangChain?", k=2)
for res, score in results:
 print(f"*[SIM : {score:3f}] {res.page_content} [{res.metadata}]")

*[SIM : 0.768002] LangChain is a framework for developing applications powered by language models. It simplifies integration with LLMs like OpenAI's GPT models. [{'category': 'AI Frameworks', 'published_date': '2025-01-01', 'title': 'LangChain Overview'}]
*[SIM : 0.561469] Pinecone is a vector database that provides fast and scalable storage for embeddings, enabling efficient similarity searches. [{'category': 'Databases', 'published_date': '2024-12-15', 'title': 'Pinecone Introduction'}]


In [None]:
import langchain_google_genai as genai
from langchain_google_genai import ChatGoogleGenerativeAI
model : ChatGoogleGenerativeAI = ChatGoogleGenerativeAI(
    model = 'gemini-2.0-flash-exp',
    api_key= userdata.get('Gemini-API-Key')

)

In [None]:
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain
template = """Question : {question},
The answer is may be in the following context : {result}
Give me the summarized response from this context
"""

prompt = PromptTemplate.from_template(template)
 # Create the chain (method 1)
chain = prompt | model


In [None]:
input = {'question': 'What is langchain',  'result': result_list[0]}

In [None]:
response = chain.invoke(input)

In [None]:
response.content

"LangChain is a framework designed to make it easier to build applications that use language models (LLMs) like OpenAI's GPT models."