In [6]:
!pip install -qU langchain-text-splitters langchain-core langchain-community transformers huggingface-hub torch scikit-learn pypdf sentence-transformers

In [10]:
!pip install --upgrade transformers torch

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [1]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_community.embeddings import HuggingFaceEmbeddings
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain_core.runnables import RunnablePassthrough
from huggingface_hub import login
import torch

class RAGPipeline:
    def __init__(self, pdf_path: str, hf_token: str):
        self.pdf_path = pdf_path
        self.hf_token = hf_token
        self._load_components()
        
    def _load_components(self):
        """Carrega todos os componentes necessários"""
        # 1. Carregar e dividir o PDF
        self._load_and_split_documents()
        
        # 2. Configurar embeddings e vetorstore
        self._setup_embeddings()
        
        # 3. Carregar o modelo Llama 3
        self._load_llama_model()
        
        # 4. Configurar a cadeia RAG
        self._setup_rag_chain()
    
    def _load_and_split_documents(self):
        """Carrega e pré-processa os documentos"""
        loader = PyPDFLoader(self.pdf_path)
        documents = loader.load()
        
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,  # Aumentado para melhor contexto
            chunk_overlap=200,  # Adicionado overlap para manter contexto
            length_function=len,
            is_separator_regex=False,
        )
        self.all_splits = text_splitter.split_documents(documents)
    
    def _setup_embeddings(self):
        """Configura os embeddings e o vetorstore"""
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2",
            model_kwargs={"device": "cuda"},
            encode_kwargs={"normalize_embeddings": True}
        )
        
        self.vectorstore = SKLearnVectorStore.from_documents(
            documents=self.all_splits,
            embedding=self.embeddings,
        )
        
        self.retriever = self.vectorstore.as_retriever(
            search_type="mmr",  # Maximum Marginal Relevance para melhor diversidade
            search_kwargs={
                "k": 5,
                "score_threshold": 0.5,
                "fetch_k": 20  # Busca mais documentos inicialmente para melhor seleção
            }
        )
    
    def _load_llama_model(self):
        """Carrega o modelo Llama 3"""
        login(token=self.hf_token)
        model_name = 'meta-llama/Llama-3.2-3B'
        # model_name = 'meta-llama/Llama-3-1B', #"meta-llama/Meta-Llama-3-8B"
        self.tokenizer = AutoTokenizer.from_pretrained(model_name, token=self.hf_token)
        
        self.model = AutoModelForCausalLM.from_pretrained(
            model_name,
            token=self.hf_token,
            torch_dtype=torch.float16, #if torch.cuda.is_available() else torch.float32,
            #device_map="auto",
            #low_cpu_mem_usage=True
        )
        
        # Configurar pipeline otimizado
        self.pipeline = pipeline(
            "text-generation",
            model=self.model,
            tokenizer=self.tokenizer,
            max_new_tokens=300,
            temperature=0,
            top_p=0.9,
            do_sample=True,
        )
    
    def _setup_rag_chain(self):
        """Configura a cadeia RAG completa"""
        # Template otimizado para Llama 3
        prompt_template = """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
        You are an expert assistant in the analysis of scientific articles.
        Respond based on the documents provided, keeping the information accurate.
        
        Context: {context}<|eot_id|><|start_header_id|>user<|end_header_id|>
        Question: {question}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
        Response:"""
        
        self.prompt = ChatPromptTemplate.from_template(prompt_template)
        
        self.rag_chain = (
            {"context": self.retriever, "question": RunnablePassthrough()}
            | self.prompt
            | self.pipeline
            | StrOutputParser()
        )
    
    def query(self, question: str) -> str:
        """Executa uma consulta RAG completa"""
        try:
            # Busca e formata os documentos relevantes
            docs = self.retriever.invoke(question)
            context = "\n\n".join([f"Documento {i+1}:\n{doc.page_content}" 
                                 for i, doc in enumerate(docs)])
            
            # Gera a resposta
            response = self.pipeline(
                self.prompt.format(context=context, question=question),
                max_new_tokens=300,
                temperature=0.1
            )[0]['generated_text']
            
            # Extrai apenas a parte da resposta após o último prompt
            return response.split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
        except Exception as e:
            return f"Erro ao processar a pergunta: {str(e)}"

In [2]:
# Uso do pipeline
if __name__ == "__main__":
    # Configuração
    PDF_PATH = "Name_of_the_paper"
    HF_TOKEN = "HF_TOKEN"
    
    # Inicialização
    rag_app = RAGPipeline(PDF_PATH, HF_TOKEN)

  self.embeddings = HuggingFaceEmbeddings(


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


In [4]:
# Exemplo de consulta
question = "What deposition method was used by the author to make the films for the work?"
answer = rag_app.query(question)
print("\n" + "="*50)
print(f"Pergunta: {question}")
print("-"*50)
print(f"Resposta: {answer}")
print("="*50 + "\n")

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



Pergunta: What deposition method was used by the author to make the films for the work?
--------------------------------------------------
Resposta: Response: Chemical vapour deposition (CVD) is the deposition method used by the author to make the films for the work. CVD is a process that involves the deposition of a material onto a surface using a vaporized precursor. The precursor is typically a gas or liquid that is heated to a high temperature, where it decomposes and forms a thin film on the surface. CVD is a versatile technique that can be used to deposit a wide range of materials, including semiconductors, metals, and insulators. It is commonly used in the semiconductor industry to produce thin films for electronic devices, such as transistors and solar cells. CVD is also used in the production of coatings for optical and mechanical applications. The process is typically carried out in a vacuum chamber, where the precursor is introduced into the chamber and allowed to react wit