In [16]:
import langchain
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain_google_genai import ChatGoogleGenerativeAI,GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader,TextLoader
from langchain.prompts import PromptTemplate
from langchain.chains import create_retrieval_chain,ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.vectorstores import Chroma
import os
from dotenv import load_dotenv

In [17]:
pdf_doc=PyPDFLoader(r"C:\Users\Avijit\Desktop\RAG_BASEDBOT\pdftext\AVIJIT.pdf")
text_doc=TextLoader(r"C:\Users\Avijit\Desktop\RAG_BASEDBOT\pdftext\text.txt")


In [18]:
def add_knowledge(filepath):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=100,chunk_overlap=10)
    if(filepath.lower().endswith('.pdf')):
        pdf=PyPDFLoader(filepath)
        pdf_load=pdf.load()
        file=text_splitter.split_documents(pdf_load)
        print("PDF IS ADDED")
        return file

    elif(filepath.lower().endswith('.txt')):
        txt=TextLoader(filepath)
        txt_load=txt.load()
        file=text_splitter.split_documents(txt_load)
        print("TEXT IS ADDED")
        return file


In [19]:
pdf=add_knowledge(r"C:\Users\Avijit\Desktop\RAG_BASEDBOT\pdftext\AVIJIT.pdf")

PDF IS ADDED


In [26]:
load_dotenv()
key=os.getenv("GEMINI_KEY")

In [81]:
load_dotenv()
client=ChatGoogleGenerativeAI(
    model="gemma-3-27b-it",
    google_api_key=key,
    temperature=0.5
)

In [82]:
embeddings=GoogleGenerativeAIEmbeddings(
    model='models/embedding-001',
    google_api_key=os.getenv("GEMINI_KEY")
)

In [76]:
database3=Chroma.from_documents(
    pdf,
    embedding=embeddings,
    persist_directory=r"C:\Users\Avijit\Desktop\RAG_BASEDBOT\database3",
)
database3.persist()

In [83]:
def add_knowledge_to_database1(database1,file):
    existing_docs=database1.get(include=['documents'])['documents']
    file=add_knowledge(file)
    new_docs=[doc for doc in file if doc.page_content not  in existing_docs ]
    if new_docs:
        database1.add_documents(new_docs)
        database1.persist()
        print(f"database3 is update with {file}")
    else:
        print(f"database3 is not updated")

add_knowledge_to_database1(database3,r"C:\Users\Avijit\Desktop\RAG_BASEDBOT\pdftext\text.txt")

TEXT IS ADDED
database3 is update with [Document(metadata={'source': 'C:\\Users\\Avijit\\Desktop\\RAG_BASEDBOT\\pdftext\\text.txt'}, page_content='The meeting was held to resolve this longstanding issue through dialogue, in which the chief'), Document(metadata={'source': 'C:\\Users\\Avijit\\Desktop\\RAG_BASEDBOT\\pdftext\\text.txt'}, page_content='the chief secretaries of Chhattisgarh and Odisha, along with the secretaries of their respective'), Document(metadata={'source': 'C:\\Users\\Avijit\\Desktop\\RAG_BASEDBOT\\pdftext\\text.txt'}, page_content='water resource departments, took part.'), Document(metadata={'source': 'C:\\Users\\Avijit\\Desktop\\RAG_BASEDBOT\\pdftext\\text.txt'}, page_content='Both states acknowledged that the dispute is old and complex but emphasised that it is necessary to'), Document(metadata={'source': 'C:\\Users\\Avijit\\Desktop\\RAG_BASEDBOT\\pdftext\\text.txt'}, page_content='to work out a solution amicably for the benefit of the people, the officials said.')

In [32]:
def reset_database(database1):
    """
    Clears all documents from the database and persists the empty state.
    """
    # Retrieve all existing documents
    existing_docs = database1.get(include=['documents'])['documents']
    
    if existing_docs:
        # Assuming your database object has a method to delete documents
        database1.delete_documents([doc.id for doc in existing_docs])
        database1.persist()
        print("Database has been reset.")
    else:
        print("Database is already empty.")


In [84]:
prompt = PromptTemplate.from_template(
"""
You are a RAG-based assistant. The following context is part of your knowledge base:
<context>{context}</context>

Answer the question below using the context first. 
If the answer is not in the context, you may use your general knowledge. 
Provide only the answer—no extra explanations.

Question: {input}
"""
)

In [85]:
retreiver=database3.as_retriever()
chain=create_stuff_documents_chain(llm=client,prompt=prompt)

In [86]:

retrieval_chain = create_retrieval_chain(
    retreiver,chain
)


In [87]:
retrieval_chain.invoke({"input":"who is avijit and what is his college name"})['answer']

'Avijit Bhadra is a gym enthusiast with interests in software development, full-stack web development, and AI/ML. He studies CSE at the College of Technology, affiliated with MAKAUT.'