In [2]:
from langchain_community.document_loaders import PyPDFLoader
loaderpdf=PyPDFLoader('A_Comprehensive_Review_of_Low_Rank_Adaptation_in_Large_Language_Models_for_Efficient_Parameter_Tuning-1.pdf')
docs=loaderpdf.load()

In [3]:
docs

[Document(metadata={'producer': 'pdfTeX-1.40.26', 'creator': 'TeX', 'creationdate': '2024-09-10T21:50:42+00:00', 'moddate': '2024-09-10T21:50:42+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) kpathsea version 6.4.0', 'trapped': '/False', 'source': 'A_Comprehensive_Review_of_Low_Rank_Adaptation_in_Large_Language_Models_for_Efficient_Parameter_Tuning-1.pdf', 'total_pages': 11, 'page': 0, 'page_label': '1'}, page_content='A Comprehensive Review of Low-Rank\nAdaptation in Large Language Models for\nEfficient Parameter Tuning\nSeptember 10, 2024\nAbstract\nNatural Language Processing (NLP) often involves pre-training large\nmodels on extensive datasets and then adapting them for specific tasks\nthrough fine-tuning. However, as these models grow larger, like GPT-3\nwith 175 billion parameters, fully fine-tuning them becomes computa-\ntionally expensive. We propose a novel method called LoRA (Low-Rank\nAdaptation) that significantly reduces the ove

In [7]:
import os
from dotenv import load_dotenv
load_dotenv()

os.getenv('HF_TOKEN')
groq_api_key=os.getenv("GROQ_API_KEY")

In [6]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [8]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
text_spliter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=30)
split=text_spliter.split_documents(docs)
vectorstore=FAISS.from_documents(documents=split,embedding=embeddings)

In [12]:
retriever=vectorstore.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000023781783FE0>, search_kwargs={})

In [9]:
from langchain_groq import ChatGroq
llm=ChatGroq(model_name='Llama3-8b-8192',groq_api_key=groq_api_key)

In [13]:
llm.invoke('hi').content

"Hi! It's nice to meet you. Is there something I can help you with or would you like to chat?"

In [23]:
## Prompt Template
from langchain_core.prompts import ChatPromptTemplate
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "act as a Q/A chatbot"
    "answer concise and detailed."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [24]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

question_answer_chain=create_stuff_documents_chain(llm,prompt)
rag_chain=create_retrieval_chain(retriever,question_answer_chain)

In [25]:
response=rag_chain.invoke({"input":"What this paper is talking about?"})

In [27]:
response['answer']

'This paper appears to be discussing a method for adapting pre-trained neural networks to new tasks in a more efficient and computationally feasible way. The authors propose a reparameterization of the weight updates in the network, which allows them to reduce the number of trainable parameters and make the adaptation process more efficient.\n\nSpecifically, the paper introduces a low-rank approach for representing the updates to the weight matrices in the network, which allows the authors to reduce the number of trainable parameters to as little as 0.01% of the original size. This is achieved by expressing the updates as a low-rank decomposition of the original weight matrix, and representing the trainable parameters as a small set of matrices and vectors.\n\nThe paper also discusses the benefits of this approach, including reduced computational requirements and memory usage, and presents experiments using pre-trained language models as an example.'