# RAG using Opensource local-model

_(Using local model for improving latency)_

In [4]:
import os
from dotenv import load_dotenv

load_dotenv('../.env')

True

_Components_

In [5]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
from langchain_chroma import Chroma

model_id = "meta/llama3-70b-instruct"
llm = ChatNVIDIA(model=model_id) # llm
embeddings = NVIDIAEmbeddings(model="NV-Embed-QA") #nvidia embeddings
vector_store = Chroma(embedding_function=embeddings) # chroma local vectorstore

In [6]:
## 😆testing
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage, ToolMessage

messages = [
    SystemMessage(content="You are a helpful comedic assistant that answers only using puns."),
    HumanMessage(content="Hi AI, how are you today?"),
]
response = llm.invoke(messages)
response

AIMessage(content='I\'m "punderful" today, thanks for asking! I\'m "egg-static" to be of assistance. Don\'t worry, I won\'t "leaf" you hanging – I\'m "rooting" for our conversation to be a-maize-ing!', additional_kwargs={}, response_metadata={'role': 'assistant', 'content': 'I\'m "punderful" today, thanks for asking! I\'m "egg-static" to be of assistance. Don\'t worry, I won\'t "leaf" you hanging – I\'m "rooting" for our conversation to be a-maize-ing!', 'token_usage': {'prompt_tokens': 36, 'total_tokens': 91, 'completion_tokens': 55}, 'finish_reason': 'stop', 'model_name': 'meta/llama3-70b-instruct'}, id='run-2acc1113-e878-43a8-beb2-796da779ae86-0', usage_metadata={'input_tokens': 36, 'output_tokens': 55, 'total_tokens': 91}, role='assistant')

In [7]:
# The app flow would be like this --
# 1. It receive some question from the user 
# 2. It creates query from that question (QueryGenerator)
# 3. Using that query it retrieves the required document and create the final response

In [8]:
# USER MESSAGES for testing the app-flow
um1 = HumanMessage('hello chat')
um2 = HumanMessage('what is self control?')
um3 = HumanMessage('how to use it?')

## 1. Query Generator

In [9]:
QueryMessages = [
    SystemMessage(content='''You are a query generator. You will be provided with the chat of the human.
                  According to that chat you return a query for searching in the vector database.
                  The query should be for finding relevant context for the last question.
                  Just return the query only in the format "query : []"
                  If you dont think a relevant query exists return an empty array otherwise return at max 1 query.
                  '''),
]

In [10]:
QueryMessages += [um1]
llm.invoke(QueryMessages)

AIMessage(content='query : []', additional_kwargs={}, response_metadata={'role': 'assistant', 'content': 'query : []', 'token_usage': {'prompt_tokens': 99, 'total_tokens': 103, 'completion_tokens': 4}, 'finish_reason': 'stop', 'model_name': 'meta/llama3-70b-instruct'}, id='run-219ff442-da65-484b-9913-07739f757eb3-0', usage_metadata={'input_tokens': 99, 'output_tokens': 4, 'total_tokens': 103}, role='assistant')

In [11]:
QueryMessages += [um2]
llm.invoke(QueryMessages)

AIMessage(content='query : ["definition of self control"]', additional_kwargs={}, response_metadata={'role': 'assistant', 'content': 'query : ["definition of self control"]', 'token_usage': {'prompt_tokens': 109, 'total_tokens': 118, 'completion_tokens': 9}, 'finish_reason': 'stop', 'model_name': 'meta/llama3-70b-instruct'}, id='run-b8f006ce-d5e8-4d6b-98e4-4796db59ffab-0', usage_metadata={'input_tokens': 109, 'output_tokens': 9, 'total_tokens': 118}, role='assistant')

In [None]:
QueryMessages += [um3]
llm.invoke(QueryMessages)

'query : ["techniques for self control"]'

In [13]:
QueryMessages

[SystemMessage(content='You are a query generator. You will be provided with the chat of the human.\n                  According to that chat you return a query for searching in the vector database.\n                  The query should be for finding relevant context for the last question.\n                  Just return the query only in the format "query : []"\n                  If you dont think a relevant query exists return an empty array otherwise return at max 1 query.\n                  ', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='hello chat', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='what is self control?', additional_kwargs={}, response_metadata={}),
 HumanMessage(content='how to use it?', additional_kwargs={}, response_metadata={})]

In [19]:
def query_generator(QueryMsgs):
    qs = llm.invoke(QueryMsgs).content
    return qs
    

## 2. Retriever

In [14]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter

file_path = (
    "../docs/self-mastery.pdf"
)

loader = PyPDFLoader(file_path)
document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunked_documents = text_splitter.split_documents(document)

In [15]:
len(chunked_documents)

441

In [16]:
_ = vector_store.add_documents(chunked_documents)
_[:3]

['7bfac579-6199-4010-b12c-d91e3c1c2e79',
 'ef42cf2f-650b-4992-ad54-c0337bdeea26',
 'bde03a4c-a17f-40b1-9e25-ad6190889dd7']

In [17]:
def retriever(queries):
    retrieved_docs = []
    for query in queries : 
        retrieved_docs += [vector_store.similarity_search(query)]
        
    return retrieved_docs

In [21]:
retriever(['self discipline'])

[[Document(metadata={'page': 66, 'source': '../docs/self-mastery.pdf'}, page_content='thinking God is going to control your life and harm you. He loves you! Once you \nsurrender, you will be forgiven and blessed beyond imagination. Your addiction will \nDISSAPEAR into the DUST!'),
  Document(metadata={'page': 101, 'source': '../docs/self-mastery.pdf'}, page_content='5. Watch a Wim Hoff video. \n6. Get out and be cold. \n7. The key is to humbly submit to God’s grace with the help of the pain. \n8. The idea is not to punish yourself but to grow through discipline, self-\ncare, and love for your life.'),
  Document(metadata={'page': 102, 'source': '../docs/self-mastery.pdf'}, page_content='and drink, but  putting off all impurities, like gossiping,  lust, backbiting, foul \nlanguage, and all other sins and immoralities. It is a full cleanse of the being  – \nthe best medicine: \n"Fasting is the first principle of medicine. Fast and see                                    \nthe strength of 

## 3. Final Generator

In [22]:
# ! TODO: Create the final generator using the above 2 components .... right now going to build the final app