In [23]:
from pinecone import Pinecone, ServerlessSpec
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import google.generativeai as genai
import os
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from pinecone import ServerlessSpec, Pinecone
from langchain_pinecone import PineconeVectorStore

from dotenv import load_dotenv
load_dotenv()

True

In [62]:
print("🔄 Loading environment variables...")
GEMINI_API = os.getenv("GEMINI_API")
PINECONE_API_KEY = os.getenv("PINECONE_API")

if not PINECONE_API_KEY:
    raise ValueError("❌ PINECONE_API_KEY is not set. Please check your environment variables.")

# Initialize Pinecone
print("✅ Pinecone API Key loaded successfully!")
pc = Pinecone(api_key=PINECONE_API_KEY)

# Create or connect to an index
index_name = "chatbot"
print(f"🔍 Checking if Pinecone index '{index_name}' exists...")
print(pc.list_indexes())

if index_name not in pc.list_indexes()[0]['name']:
    print(f"🆕 Creating a new Pinecone index: {index_name}")
    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",  # Replace with your model metric
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )
else:
    print(f"✅ Pinecone index '{index_name}' already exists!")

index = pc.Index(index_name)

🔄 Loading environment variables...
✅ Pinecone API Key loaded successfully!
🔍 Checking if Pinecone index 'chatbot' exists...
[{
    "name": "chatbot",
    "dimension": 768,
    "metric": "cosine",
    "host": "chatbot-momcfao.svc.aped-4627-b74a.pinecone.io",
    "spec": {
        "serverless": {
            "cloud": "aws",
            "region": "us-east-1"
        }
    },
    "status": {
        "ready": true,
        "state": "Ready"
    },
    "deletion_protection": "disabled"
}]
✅ Pinecone index 'chatbot' already exists!


In [12]:
pc = Pinecone(api_key=PINECONE_API_KEY)

In [20]:
print(PINECONE_API_KEY)

pcsk_2AusHT_G1Y6hFSc7yrGZEH5CYvcyQNXzDFD5Q7jTEvvxD1RWo1G2Rujm3DCnauje3DEZik


In [28]:

# Load text files
folder_path = "/home/shtlp_0101/Documents/Project-Based_on_RAG-LLM_model/scraped_city_data"
print(f"📂 Loading text files from: {folder_path}")
text_documents = []



from langchain.document_loaders import PyPDFLoader, DirectoryLoader
 
 
#Extract Data From the PDF File
def load_txt_file(data):
    loader= DirectoryLoader(data,
                            glob="*.txt",
                            loader_cls=TextLoader)
 
    documents=loader.load()
 
    return documents
 
 
extracted_data = load_txt_file(folder_path)
 
#Split the Data into Text Chunks
def text_split(extracted_data):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    text_chunks=text_splitter.split_documents(extracted_data)
    return text_chunks
 
 
text_chunks = text_split(extracted_data)
#Download the Embeddings from HuggingFace
def embeddings():
    embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GEMINI_API)
    return embeddings
 


📂 Loading text files from: /home/shtlp_0101/Documents/Project-Based_on_RAG-LLM_model/scraped_city_data


In [29]:
from langchain_pinecone import PineconeVectorStore

os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY  # Explicitly set the API key
# Correcting the embeddings initialization
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=os.getenv("GEMINI_API"))

# Upload documents to Pinecone
print("📤 Uploading documents to Pinecone...")
docsearch = PineconeVectorStore.from_documents(
    documents=text_chunks,  
    index_name=index_name,
    embedding=embeddings  # No parentheses here
)
print("✅ Documents stored in Pinecone!")

# Initialize retriever
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 3})
print("🔍 Retriever initialized successfully!")


📤 Uploading documents to Pinecone...
✅ Documents stored in Pinecone!
🔍 Retriever initialized successfully!


In [51]:
OPENAI_API = os.getenv("OPENAI_API")

In [52]:
from langchain_openai import OpenAI
llm = OpenAI(temperature=0.4, max_tokens=500,api_key=OPENAI_API)

In [56]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    api_key=GEMINI_API
)

In [57]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate


system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)


prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [58]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [59]:
response = rag_chain.invoke({"input": "famous places in delhi?"})
print(response["answer"])

Delhi is a city with a rich history and many famous places. It has ancient roots dating back to the Mahabharata and has been shaped by empires like the Delhi Sultanate and the Mughals. Some of its famous places include Chandni Chowk, Amrit Udyan (formerly known as the Mughal Gardens), and Gurudwara Bangla Sahib.


In [49]:
response = rag_chain.invoke({"input": "Speciality of chandni chowk?"})
print(response["answer"])

Chandni Chowk is the bustling heart of Old Delhi, weaving together centuries of history, culture, and commerce. This vibrant market has narrow lanes and chaotic energy. It was built in the 17th century by Mughal Emperor Shah Jahan.


In [50]:
response = rag_chain.invoke({"input": "what are the best food items there?"})
print(response["answer"])

Amritsar's bazaars offer a sensory feast of sizzling street food that will captivate food lovers. The holy 'Mahaprasad', also known as Abhada, is a culinary discovery made in traditional earthen vessels. The delectable cuisine tantalises taste buds with its unique flavours.
