## A Simple RAG using Llama3.1

In [1]:
import langchain

### 3 steps to build RAG
1. Data Ingestion
2. Data Retrieval
3. Data Generation

In [2]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA

In [3]:
#Load Document using PyPDFLoader document loader
loader = PyPDFLoader("Finetuning_LLMs.pdf")
documents = loader.load()

In [4]:
#Splitting the data into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30, separator = "\n")
docs = text_splitter.split_documents(documents=documents)

In [5]:
pip install -U langchain-huggingface

Note: you may need to restart the kernel to use updated packages.


In [6]:
#from langchain.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
embedding_model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}

embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)


  from tqdm.autonotebook import tqdm, trange


### FAISS vector Database for Storing Embeddings

In [7]:
#loading the data and corresponding embedding into FAISS
vectorstore = FAISS.from_documents(docs, embeddings)

In [8]:
# Persist the vectors locally on disc
vectorstore.save_local("faiss_index_")


In [9]:
#Load from local storage
persisted_vectorstore = FAISS.load_local("faiss_index_", embeddings, allow_dangerous_deserialization=True)

In [10]:
persisted_vectorstore

<langchain_community.vectorstores.faiss.FAISS at 0x249805acdf0>

In [11]:
##Create a retriever on the top of a database
retriever = persisted_vectorstore.as_retriever()

# Start loading your data to LLM

In [14]:
from langchain_community.llms import Ollama

In [15]:
#initialize an instance of the Ollama model 
llm = Ollama(model='llama3')


In [16]:
from langchain_ollama import ChatOllama


In [17]:
#invoke the model to generate response

chatllm = ChatOllama(
    model="llama3.1",
    temperature=0,
    # other params...
)

In [18]:
from langchain_core.messages import AIMessage


In [19]:

messages = [
    (
        "system",
        "You are a helpful assistant that translates English to Bengali. Translate the user sentence.",
    ),
    ("human", "I love my India"),
]
ai_msg = chatllm.invoke(messages)
ai_msg

AIMessage(content='আমি ভারতকে প্রিয় করি।', response_metadata={'model': 'llama3.1', 'created_at': '2024-08-31T11:49:04.7545599Z', 'message': {'role': 'assistant', 'content': ''}, 'done_reason': 'stop', 'done': True, 'total_duration': 32498320200, 'load_duration': 9197091000, 'prompt_eval_count': 36, 'prompt_eval_duration': 8287526000, 'eval_count': 28, 'eval_duration': 15004624000}, id='run-16a29a41-8a39-47ad-a7c9-f484a01f136c-0', usage_metadata={'input_tokens': 36, 'output_tokens': 28, 'total_tokens': 64})

In [21]:
response = llm.invoke("Who is Prime Minister of India")
print(response)


As of my knowledge cutoff, the current Prime Minister of India is Narendra Damodardas Modi. He has been serving as the 14th and current Prime Minister of India since May 26, 2014.


In [22]:
#RetrievalQA chain for orchestration
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

In [24]:
#Test with single query
myquery = "What is the document all about?"
my_result = qa.invoke(myquery)
print(my_result)

{'query': 'What is the document all about?', 'result': "The document is about fine-tuning Large Language Models (LLMs) for specific tasks, domains, or datasets, including when and why it's necessary, as well as alternatives to fine-tuning."}


In [25]:
#Test with single query
myquery = "When finetuning is required?"
my_result = qa.invoke(myquery)
print(my_result)

{'query': 'When finetuning is required?', 'result': 'According to the text, fine-tuning is required when:\n\n1. The Large Language Model (LLM) was pre-trained on general data but your task is domain-specific (e.g., medical, legal, financial text).\n2. You need the LLM to perform a specific task like sentiment analysis, summarization, question answering, or translation.\n3. Your application involves a particular language style, dialect, or jargon that is not well-represented in the pre-trained model.\n\nThese are the three scenarios where fine-tuning an LLM is required.'}


In [23]:
while True:
  query = input("Enter your query: ")
  if query == "exit":
    break
  #result = qa.run(query)
  result = qa.invoke(query)
  print(result)

{'query': 'write the summary in 50 words', 'result': 'Fine-tuning a Large Language Model (LLM) is required when adapting a pre-trained model to a specific task, domain, or dataset. This process improves performance, generalization, and control by customizing the model for niche applications, updating knowledge, aligning with metrics or constraints, and leveraging existing knowledge in a more efficient manner.'}
