In [20]:
from langchain_community.llms import Ollama
import bs4
from langchain import hub
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter



In [21]:
llm = Ollama(model = "llama3")
llm

Ollama(model='llama3')

In [22]:
loader = WebBaseLoader(
    web_path=("https://ntb.gov.np/ilam/",),
    bs_kwargs= dict(
        parse_only = bs4.SoupStrainer(
            class_ = ("tm-page")
        )
    ),
)

In [23]:
docs =  loader.load()
print(docs)

[Document(page_content='\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nEnglish\n\n\nSpanish\n\n\nJapanese\n\n\nRussian\n\n\nChinese\n\n\nFrench\n\n\nThai\n\n\nKorean\n\n\nGerman\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPlaces to go\n\n\n\n\n\n\n\nHighlights\n\n\n\n\r\n                                                                                    Kathmandu\r\n                                                                                \n\n\n\n\n\r\n                                                                                    Pokhara\r\n                                                                                \n\n\n\n\n\r\n                                                                                    Everest\r\n                                                                                \n\n\n\n\n\r\n                                                                                    Janakpur\r\n                                                   

In [24]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000 , chunk_overlap = 200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents = splits , embedding = OllamaEmbeddings(model="llama3"))

In [26]:
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")

In [27]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [28]:
rag_chain = (
    {"context": retriever | format_docs , "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [32]:
rag_chain.invoke("who is sachin Tendulakr?")

"I don't know. The context doesn't mention Sachin Tendulakr at all."