In [31]:
!pip install langchain



In [32]:
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_community.document_loaders import OnlinePDFLoader

In [33]:
local_path = ["consti.pdf"]
all_docs = []

if local_path:
    for path in local_path:
        loader = UnstructuredPDFLoader(file_path=path)
        data = loader.load()
        all_docs.extend(data)

    print(f"Total documents loaded: {len(all_docs)}")
else:
    print("PDF file not uploaded")

Total documents loaded: 1


In [34]:
!ollama pull nomic-embed-text

[?2026h[?25l[1Gpulling manifest â ‹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ™ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ´ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â § [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ‡ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â � [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ‹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ™ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ´ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â ¦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest â § [K[?25h

In [35]:
!ollama list

NAME                        ID              SIZE      MODIFIED               
nomic-embed-text:latest     0a109f422b47    274 MB    Less than a second ago    
mxbai-embed-large:latest    468836162de7    669 MB    About an hour ago         
qwen3:1.7b                  8f68893c685c    1.4 GB    2 weeks ago               
llama3.2:latest             a80c4f17acd5    2.0 GB    5 weeks ago               


In [36]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma

In [37]:
#Split and chunk
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 5000, chunk_overlap = 750)
chunks = text_splitter.split_documents(all_docs)

In [38]:
#Add to vector database
vector_db = Chroma.from_documents(
    documents = chunks,
    embedding = OllamaEmbeddings(model = "mxbai-embed-large", show_progress = True),
    collection_name = "local-rag"
)

OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████| 204/204 [07:48<00:00,  2.30s/it]


In [39]:
!pip install -U langchain-ollama



In [40]:
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [41]:
#LLM from Ollama
local_model = "llama3.2"
llm = ChatOllama(model=local_model)

In [42]:
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI assistant who only answers based on the given context. Help me resolve my doubts from the Indian Constitution, give an answer in no more than 100 characters: 
    Original question: {question}"""
)

In [43]:
retriever = MultiQueryRetriever.from_llm(
    vector_db.as_retriever(),
    llm,
    prompt=QUERY_PROMPT
)

#RAG prompt
template = """
You are a helping assistant for the government of India. For the given user input help me find relevant things from it through the context provided and 
also **keep the answer short**. Also tell the page number from where it can be found:


Context:
{context}

Question: {question}
"""


prompt = ChatPromptTemplate.from_template(template)

In [44]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [45]:
retriever

MultiQueryRetriever(retriever=VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x000001BC42E5E780>, search_kwargs={}), llm_chain=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='You are an AI assistant who only answers based on the given context. Help me resolve my doubts from the Indian Constitution, give an answer in no more than 100 characters: \n    Original question: {question}')
| ChatOllama(model='llama3.2')
| LineListOutputParser())

In [46]:
prompt = "What is the Preamble of the Constitution, and what are its key words (Sovereign, Socialist, Secular…)?"

In [47]:
back_prompt = "Explain your reasoning and tell section or page number where it can be found"

In [53]:
text = chain.invoke(prompt + back_prompt).replace("\\n", " ")
text

OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.09s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.14s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.28s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.18s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.22s/it]
OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00,  2.29s/it]


'The Preamble of the Indian Constitution is:\n\n"We, the people of India, having solemnly resolved to constitute India into a Sovereign, Socialist, Secular, Democratic Republic and to secure to all its citizens:\n\nJustice, social, economic and political;\nLiberty of thought, expression, belief, faith and worship;\nEquality of status and of opportunity;\nand to promote among them all\nFraternity assuring the dignity of the individual in social justice and liberty;\n\nIn our Constituent Assembly this twenty-sixth day of November, 1949, do hereby adopt, enact and give to ourselves this Constitution."\n\nThe key words mentioned in the Preamble are:\n\n1. **Sovereign**: This refers to India\'s independence and self-governance. It signifies that the country is no longer a part of any empire or dominion.\n2. **Socialist**: This indicates that the economy will be organized on socialist principles, aiming to provide equal opportunities for all citizens and promoting social welfare.\n3. **Secul

In [50]:
!pip install gtts

Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Collecting click<8.2,>=7.1 (from gtts)
  Downloading click-8.1.8-py3-none-any.whl.metadata (2.3 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Downloading click-8.1.8-py3-none-any.whl (98 kB)
Installing collected packages: click, gtts

  Attempting uninstall: click

    Found existing installation: click 8.2.1

    Uninstalling click-8.2.1:

      Successfully uninstalled click-8.2.1

   ---------------------------------------- 2/2 [gtts]

Successfully installed click-8.1.8 gtts-2.5.4


In [59]:
from gtts import gTTS

tts = gTTS(text, lang = "en",tld = "co.in")
tts.save("output.mp3")