In [1]:
from langchain_community.document_loaders import PyPDFDirectoryLoader

from dotenv import load_dotenv

load_dotenv()

loader = PyPDFDirectoryLoader('data/')

documents = loader.load()

In [2]:
print(documents)
# print(documents[0].page_content)

[Document(page_content='Hostel Policy  \n1. Objective  \na. Hostels are run with the primary objective of providing students a “home away from home”. The \nhostel atmosphere must instill self -confidence and discipline in the minds of students, and provide \nscope for developing ideals of a harmonious communal living. The hostel must also enable \nstudents to share the joys of camaraderie, fellowship and professional fraternity. Self -help and a \nspirit of accommodation for the common good are some of the virtues expected from the \nhostellers.  \nb. The guiding principle towards the formulation of the rules and regulations for hostellers is to \nensure a safe, secure, comfortable and pleasant stay and to create an environment, which is \nconducive to learning. To this end, all the activities are geared towar ds your development and \nprogress. We expect our students to develop a sense of belonging towards SRM property.  \n \n2. Definition  \na. These rules will be termed as Hostel Oc

In [3]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(
    separator="\n\n",
    chunk_size=100,
    chunk_overlap=10,
    length_function=len,
    is_separator_regex=False,
)

In [4]:
chunks = text_splitter.split_documents(documents)
chunks
# chunks[7]


[Document(page_content='Hostel Policy  \n1. Objective  \na. Hostels are run with the primary objective of providing students a “home away from home”. The \nhostel atmosphere must instill self -confidence and discipline in the minds of students, and provide \nscope for developing ideals of a harmonious communal living. The hostel must also enable \nstudents to share the joys of camaraderie, fellowship and professional fraternity. Self -help and a \nspirit of accommodation for the common good are some of the virtues expected from the \nhostellers.  \nb. The guiding principle towards the formulation of the rules and regulations for hostellers is to \nensure a safe, secure, comfortable and pleasant stay and to create an environment, which is \nconducive to learning. To this end, all the activities are geared towar ds your development and \nprogress. We expect our students to develop a sense of belonging towards SRM property.  \n \n2. Definition  \na. These rules will be termed as Hostel Oc

In [5]:
from langchain_community.embeddings import OllamaEmbeddings

from langchain_community.vectorstores import Chroma
# from langchain_community.llms import Ollama

# llm = Ollama(model="llama3")

index = Chroma.from_documents(chunks, OllamaEmbeddings(model="nomic-embed-text:latest ",show_progress=True))


OllamaEmbeddings: 100%|██████████| 6/6 [00:40<00:00,  6.76s/it]


In [6]:
retriever = index.as_retriever()

In [8]:
retriever.get_relevant_documents("home away from home”")

OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.91s/it]


[Document(page_content='1 \n i. The support staff provided a t hostels is to ensure students’ stay is as comfortable as possible. Any \ncomplaints or feedback regarding the support staff, must be dealt only with the Hostel Office, \nrather than dealing directly with the concerned staff.  \n4. Timings  \na. Students should adhere to the timings. For student safety and to maintain the decorum of the \nUniversity, the Boys and Girls Hostel attendance will be taken at 9.00  pm. The timings may be \nchanged on particular days of events and students should be in their hostels thereafter.  \nb. If any student plans to stay away from the hostel after 9.00  pm, they have to plan in advance and \ninform the Deputy Warden  in writing. Verbal messages to the Warden will not suffice. Information \nsheets must contain the place where they have planned to go as well as the contact telephone \nnumber. Permission from their parents in writing or by way of written letter or email is mandatory \nfor stay

In [9]:
from operator import itemgetter

template = """

Answer the following question:
Question: {question}

Answer the question based only on the following context:
{context}

"""

In [10]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
# from langchain_openai import ChatOpenAI
from langchain_community.llms import Ollama

prompt = ChatPromptTemplate.from_template(template)
model = Ollama(model="ollama3")

In [11]:
rag_chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question")
    }
    | prompt
    | model
    | StrOutputParser()
)

In [12]:
rag_chain.invoke({"question": "list out some of the rules related to timings of hostel policy"})

OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.29s/it]


'Here is a sample translation of the Hostel Occupancy Rules and Regulations document:\n\n**Hostel Occupancy Rules and Regulations**\n\n**Objective**\n\nThe objective of these rules and regulations for hostels is to ensure safe, secure, comfortable, and conducive accommodation for students at hostels.\n\n**Hostelling Facilities**\n\nHostels are facilities that provide housing and amenities to individuals or families who are visiting a particular location for study purposes.\n\nSome of the key features of hostels include:\n\n1. Common living facilities: Hostels typically have common living facilities such as dormitory rooms, lounges, and communal kitchens.\n\n2. Amenities: Hostels often offer a range of amenities to meet the needs of their residents. These amenities may include private bedrooms with lockouts, shared bathrooms with sinks, showers, and hair dryers, laundry facilities with machines for washing clothes, internet access, Wi-Fi hotspots, and other amenities as may be required 