<a href="https://colab.research.google.com/github/KingAbdulRehman/LangChain/blob/main/Rag_LangChain/RAG_LangChain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**NOTE:** Before run this project need to setup just two things


1.   GOOGLE_API_KEY
2.   PINECONE_API_KEY

In below example add my data from text files, i need to so you need to add that files please see below [file_code](https://colab.research.google.com/drive/1-y_g1qUj2JcxWD9A0xv1KJo6MpglAevM#scrollTo=_fzxtErPw1Hh&line=12&uniqifier=1)

# Install And Import Packages

In [None]:
!pip install langchain langchain-google-genai pinecone-client langchain-pinecone langchain-community

In [None]:
from langchain_google_genai import GoogleGenerativeAI, ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
import langchain
import os
from google.colab import userdata
from pinecone import Pinecone, ServerlessSpec
from langchain_pinecone import PineconeVectorStore
from langchain_community.document_loaders import TextLoader
from tqdm import tqdm
from uuid import uuid4
from langchain_core.documents import Document
from langchain.chains import RetrievalQA

# Setup Model and Key

In [None]:
# Setup Api key in Enviroment
if "GOOGLE_API_KEY" not in os.environ:
  os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

if "PINECONE_API_KEY" not in os.environ:
  os.environ["PINECONE_API_KEY"] = userdata.get("PINECONE_API_KEY")

In [None]:
# Config Model Google Gemini Model with Langchain
llm = ChatGoogleGenerativeAI(
  model = "gemini-2.0-flash-exp",
  temperature=0.9,
  max_tokens=500,
  max_retries=5,
  timeout=None,
  top_k=10,
  top_p=0.1,
  )

In [None]:
# test model setup successfully
print(llm.invoke("hello, short reply").content)

Okay.



# Setup Pinecone and Create New Index

In [None]:
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

index_name = "try-rag"
if index_name not in pc.list_indexes():
  pc.create_index(
    name=index_name,
    dimension=768,
    metric='cosine',
    spec=ServerlessSpec(cloud='aws', region='us-east-1')
  )

index = pc.Index(index_name)

# Setup Embedding and Vector

In [None]:
embedding_model = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")

In [None]:
# test embedding model
vector = embedding_model.embed_query("test")
print(vector[:3])

[0.026467779651284218, 0.019067756831645966, -0.053323060274124146]


In [None]:
# setup vector store
vector_store = PineconeVectorStore(
    index_name=index,
    embedding=embedding_model
)

In [None]:
# load documents
documents = []
for i in range(3):
  loader = TextLoader(f"/content/example{ ('' if i == 0 else str(i)) }.txt")
  doc = loader.load()
  document = Document(
      page_content=doc[0].page_content,
      metadata=doc[0].metadata
  )
  documents.append(document)


uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(
    documents=documents,
    ids=uuids
)

['59ed7150-031b-4c64-a3da-e2b99aafeaeb',
 'a6aff73a-a7b1-42b0-87c8-f8017c9f25c5',
 '73179398-6c0f-4217-b151-bd741ebdc4d6']

In [None]:
# for just delete case
vector_store.delete(ids=['8f334f3b-5420-4770-8a46-35b505c3dc73'])

# Retrieval

In [None]:
results = vector_store.similarity_search(
  "in 1900",
  k=3
)

for res in results:
  print(res.page_content)

My Name is Abdul Rehman, and i born in karachi pakistan, in 1900
Abdul Rehman has two brothers
Abdul Samad has two brothers as well, he also born in karachi pakistan


In [None]:
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 2, "score_threshold": 1},
)
retriever.invoke("How is Abdul Rehman")

[Document(id='a6aff73a-a7b1-42b0-87c8-f8017c9f25c5', metadata={'source': '/content/example1.txt'}, page_content='Abdul Rehman has two brothers'),
 Document(id='59ed7150-031b-4c64-a3da-e2b99aafeaeb', metadata={'source': '/content/example.txt'}, page_content='My Name is Abdul Rehman, and i born in karachi pakistan, in 1900')]

# Gen AI with Retriever

In [None]:
llm_with_rag = RetrievalQA.from_chain_type(
  llm=llm,
  chain_type="stuff",
  retriever=retriever
)

ai_result = llm_with_rag.invoke("How is Abdul Rehman")

{'query': 'How is Abdul Rehman', 'result': 'Abdul Rehman was born in 1900 and has two brothers.\n'}


In [None]:
print(ai_result['result'])

Abdul Rehman was born in 1900 and has two brothers.

