In [99]:
import langchain
from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader,TextLoader
from langchain.prompts import PromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.vectorstores import Chroma
import os
from dotenv import load_dotenv

In [51]:
pdf_doc=PyPDFLoader(r"C:\Users\Avijit\Desktop\RAG_BASEDBOT\pdftext\AVIJIT.pdf")
text_doc=TextLoader(r"C:\Users\Avijit\Desktop\RAG_BASEDBOT\pdftext\text.txt")


In [92]:
def add_knowledge(filepath):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=100,chunk_overlap=10)
    if(filepath.lower().endswith('.pdf')):
        pdf=PyPDFLoader(filepath)
        pdf_load=pdf.load()
        file=text_splitter.split_documents(pdf_load)
        print("PDF IS ADDED")
        return file

    elif(filepath.lower().endswith('.txt')):
        txt=TextLoader(filepath)
        txt_load=txt.load()
        file=text_splitter.split_documents(txt_load)
        print("TEXT IS ADDED")
        return file


In [77]:
pdf=add_knowledge(r"C:\Users\Avijit\Desktop\RAG_BASEDBOT\pdftext\AVIJIT.pdf")

PDF IS ADDED


In [31]:
load_dotenv()
client=ChatGoogleGenerativeAI(
    model="gemma-3-27b-it",
    google_api_key=os.getenv("GEMINI_KEY"),
    temperature=0.9
)

In [35]:
embeddings=GoogleGenerativeAIEmbeddings(
    model='models/embedding-001',
    google_api_key=os.getenv("GEMINI_KEY")
)

In [80]:
database1=Chroma.from_documents(
    pdf,
    embedding=embeddings,
    persist_directory=r"C:\Users\Avijit\Desktop\RAG_BASEDBOT\database1",
)
database1.persist()

  database1.persist()


In [94]:
def add_knowledge_to_database1(database1,file):
    existing_docs=database1.get(include=['documents'])['documents']
    file=add_knowledge(file)
    new_docs=[doc for doc in file if doc.page_content not  in existing_docs ]
    if new_docs:
        database1.add_documents(new_docs)
        database1.persist()
        print(f"database1 is update with {file}")
    else:
        print(f"database1 is not updated")

add_knowledge_to_database1(database1,r"C:\Users\Avijit\Desktop\RAG_BASEDBOT\pdftext\text.txt")

TEXT IS ADDED
database1 is update with [Document(metadata={'source': 'C:\\Users\\Avijit\\Desktop\\RAG_BASEDBOT\\pdftext\\text.txt'}, page_content='North 24 Parganas (abv. 24 PGS (N)) or sometimes North Twenty Four Parganas is a district in'), Document(metadata={'source': 'C:\\Users\\Avijit\\Desktop\\RAG_BASEDBOT\\pdftext\\text.txt'}, page_content='in southern West Bengal, of eastern India. North 24 Parganas extends in the tropical zone from'), Document(metadata={'source': 'C:\\Users\\Avijit\\Desktop\\RAG_BASEDBOT\\pdftext\\text.txt'}, page_content="zone from latitude 22° 11′ 6″ north to 23° 15′ 2″ north and from longitude 88º20' east to 89º5'"), Document(metadata={'source': 'C:\\Users\\Avijit\\Desktop\\RAG_BASEDBOT\\pdftext\\text.txt'}, page_content="to 89º5' east. Barasat is the district headquarters of North 24 Parganas. North 24 Parganas is West"), Document(metadata={'source': 'C:\\Users\\Avijit\\Desktop\\RAG_BASEDBOT\\pdftext\\text.txt'}, page_content="is West Bengal's most populou

In [109]:
promppt=PromptTemplate.from_template(
"""
you are RAG who can only answer from this {context} else you will say i dont have any information 
you only answer this question - {input} just strictly answer or say i dont have any information and nothing else prove yourself as best rag bot
"""
)

In [110]:
retreiver=database1.as_retriever()
chain=create_stuff_documents_chain(llm=client,prompt=promppt)

In [111]:
retreiver_chain=create_retrieval_chain(retreiver,chain)

In [112]:
retreiver_chain.invoke({"input":"who is avijit bhadra"})['answer']

'Avijit Bhadra is a second-year B.Tech student in Computer Science and Engineering (CSE) at Narula.'