 ## Importing Libraries

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader,DirectoryLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import Ollama
from langchain.chains import RetrievalQA



import numpy as np
import pandas as np
from typing import List

  from .autonotebook import tqdm as notebook_tqdm


## Create txt files


In [3]:
sample_docs =[
    """ 
    Speech

    The real remedy is to destroy the belief in the sanctity of the shastras. How do you expect to succeed if you allow the shastras to continue to be held as sacred and infallible? You must take a stand against the scriptures. Either you must stop the practice of caste or you must stop believing in the shastras. You cannot have both. The problem of caste is not a problem of social reform. It is a problem of overthrowing the authority of the shastras. So long as people believe in the sanctity of the shastras, they will never be able to get rid of caste. The work of social reform is like the work of a gardener who is constantly pruning the leaves and branches of a tree without ever attacking the roots. The real enemy is the belief in the shastras.
    """
]

sample_docs

[' \n    Speech\n\n    The real remedy is to destroy the belief in the sanctity of the shastras. How do you expect to succeed if you allow the shastras to continue to be held as sacred and infallible? You must take a stand against the scriptures. Either you must stop the practice of caste or you must stop believing in the shastras. You cannot have both. The problem of caste is not a problem of social reform. It is a problem of overthrowing the authority of the shastras. So long as people believe in the sanctity of the shastras, they will never be able to get rid of caste. The work of social reform is like the work of a gardener who is constantly pruning the leaves and branches of a tree without ever attacking the roots. The real enemy is the belief in the shastras.\n    ']

In [4]:
import os

os.makedirs("text_files",exist_ok=True)


In [5]:
import tempfile
temp_dir = tempfile.mkdtemp()

for i,doc in enumerate(sample_docs):
    with open(f"text_files/doc_{i}.txt","w") as f:
        f.write(doc)


## Document Loading


In [6]:
loader = DirectoryLoader(
    "text_files",
    glob= "*.txt",
    loader_cls= TextLoader,
    loader_kwargs= {'encoding':'utf-8'}
)

documents = loader.load()

In [7]:
documents

[Document(metadata={'source': 'text_files\\doc_0.txt'}, page_content=' \n    Speech\n\n    The real remedy is to destroy the belief in the sanctity of the shastras. How do you expect to succeed if you allow the shastras to continue to be held as sacred and infallible? You must take a stand against the scriptures. Either you must stop the practice of caste or you must stop believing in the shastras. You cannot have both. The problem of caste is not a problem of social reform. It is a problem of overthrowing the authority of the shastras. So long as people believe in the sanctity of the shastras, they will never be able to get rid of caste. The work of social reform is like the work of a gardener who is constantly pruning the leaves and branches of a tree without ever attacking the roots. The real enemy is the belief in the shastras.\n    ')]

## Text splitter

In [8]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 50,
    chunk_overlap = 10,
    separators= [" "],
    length_function = len
)

In [9]:
chunks = text_splitter.split_documents(documents)

In [10]:
print(len(chunks))

19


## Intializing Vector store

In [11]:
pd = "./chroma_db"

vector_store = Chroma.from_documents(
    documents= chunks,
    embedding= HuggingFaceEmbeddings(model='sentence-transformers/all-MiniLM-L6-v2'),
    persist_directory= pd,
    collection_name="rag-collection"
)

In [12]:
vector_store

<langchain_community.vectorstores.chroma.Chroma at 0x193e62d3b60>

## Intializing LLM

In [None]:

llm = Ollama(model="mistral")


  llm = Ollama(model="mistral")


In [13]:
retriever=vector_store.as_retriever(
    search_kwarg={"k":3} 
)
retriever

VectorStoreRetriever(tags=['Chroma', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x00000193E62D3B60>, search_kwargs={})

## Retrieval From Vector Store

In [20]:

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever,
    chain_type="stuff"
)


In [16]:
def ask(query):
    return qa_chain.run(query)


In [18]:
ask("What is the speech trying to say?")


' The speeches seem to suggest a call for revolution or upheaval, as they repeatedly mention "the real remedy is to destroy" and "overthrowing the authority." However, without more context about the specific authority or issue being addressed, it\'s difficult to definitively determine the exact subject matter of these speeches.'

In [19]:
ask("Explain the problem of caste according to the text.")


" According to the provided context, the problem of caste is not something that can be completely eradicated or solved once and for all. It's a persistent social issue in some societies where people are divided into rigid groups based on birth, with certain privileges and restrictions associated with each group. The work of addressing caste involves ongoing efforts to challenge discrimination, promote equality, and create societal change. However, the text does not provide specific details about the nature or origins of this problem, so a more comprehensive understanding would require additional context."