# Libraries

In [1]:
# Creating a virtual environment so that libraries are contained within this workspace
# Terminal Command for creating a virtual environment: python -m venv .venv  
# Terminal Command for activating the virtual environment: .venv\scripts\activate

In [2]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.document_loaders.pdf import PyMuPDFLoader
from langchain.prompts import PromptTemplate
#from langchain_community.vectorstores import FAISS
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_text_splitters import CharacterTextSplitter
#from langchain_text_splitters import TokenTextSplitter
from operator import itemgetter

# Hello World

In [3]:
# Verifying that python is working :)
print("Hello RAG");

Hello RAG


# Loading the LLM Model

In [4]:
# Name of the model to be used 
Model = "llama3"; # Takes too much time to compute *sigh* we need better laptops
model = Ollama(model=Model)

In [None]:
model.invoke("Hi there")

# Prepare the template for the prompts

In [5]:
template = """
Answer the question based on the context below. If you can't 
answer the question, reply "Oof that's a tough one, i don't really know this"

Context : {context}

Question : {question}

"""

promt = PromptTemplate.from_template(template)

#Seeing if our template works
print(promt.format(context = "Here is some context", question = "Here is the question"))


Answer the question based on the context below. If you can't 
answer the question, reply "Oof that's a tough one, i don't really know this"

Context : Here is some context

Question : Here is the question




In [1]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [6]:
# Using langchain to pass the promt to our model 
chain = promt | model

In [None]:
chain.invoke({
    "context" : "Jack has two apples",
    "question" : "How many apples does mark have?"
})

In [None]:
chain.invoke({
    "context" : "Mary gave jack 2 apples",
    "question" : "How many apples does jack have?"
})

# Get the data ready

In [7]:
loader = DirectoryLoader("data",glob="*.pdf",loader_cls=PyMuPDFLoader)
documents = loader.load()
# documents

In [8]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

# Creating and testing the vectorstore

In [9]:
#faiss_index = FAISS.from_documents(pages, OllamaEmbeddings(model = Model))
vectorstore = DocArrayInMemorySearch.from_documents(docs, OllamaEmbeddings(model=Model))



In [10]:
retriever = vectorstore.as_retriever()
retriever.invoke("What are the steps in dns lookup?")

[Document(page_content='Layers are specified by the z-index CSS property. It represents the third dimension of the box: its position along the "z axis".\nThe boxes are divided into stacks (called stacking contexts). In each stack the back elements will be painted first and the\nforward elements on top, closer to the user. In case of overlap the foremost element will hide the former element. The stacks are\nordered according to the z-index property. Boxes with "z-index" property form a local stack.\nTrivia\nThe birth of the web\nTim Berners-Lee, a British scientist at CERN, invented the World Wide Web (WWW) in 1989. The web was originally conceived and\ndeveloped to meet the demand for automatic information-sharing between scientists in universities and institutes around the\nworld.\nThe first website at CERN - and in the world - was dedicated to the World Wide Web project itself and was hosted on Berners-\nLee\'s NeXT computer. The website described the basic features of the web; how t

# Sending the context and question to the LLM

In [11]:
chain = (
    {"context" : itemgetter("question") | retriever | format_docs, "question" : itemgetter("question")}
    | promt
    | model 
)

In [12]:
chain.invoke({"question" : "What is the steps involved in dns lookup?"})

"According to the provided context, the steps involved in DNS (Domain Name System) lookup are:\n\n1. **Browser cache**: The browser checks its cache for the desired DNS record.\n2. **OS cache**: If the browser cache doesn't contain the desired record, the browser makes a system call (gethostbyname in Windows) to check the OS's cache.\n3. **Router cache**: The request continues on to your router, which typically has its own DNS cache.\n4. **ISP DNS cache**: The next place checked is the cache of the ISP's DNS server.\n5. **Recursive search**: If the ISP's DNS server doesn't have the desired record in its cache, it begins a recursive search from the root nameserver, through the .com top-level namespace, to the target domain's nameserver.\n\nThese steps are illustrated by the provided diagram, which shows the recursive search process."

OMG IT WORKS AHAHHAHAHAHAHHA

In [13]:
chain.invoke({"question" : "What is the browser's high level structure?"})

"Based on the context, the browser's high-level structure consists of:\n\n1. UI Backend: Used for drawing basic widgets like combo boxes and windows.\n2. JavaScript Engine: Interpreter used to parse and execute JavaScript code.\n\nAdditionally, the text describes the browser's internal structures, including:\n\n3. DOM Tree: A tree of DOM element and attribute nodes that represents the HTML document.\n4. Render Tree: A tree of visual elements in the order they will be displayed, which enables painting the contents in their correct order.\n\nThese structures enable the browser to parse and render HTML documents, execute JavaScript code, and display the resulting content on the screen."