<a href="https://colab.research.google.com/github/Matheusbcy/-Data-Science-IA-/blob/main/RAG_%2B_Llama3_%2B_PDF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q transformers einops accelerate bitsandbytes
!pip install -q langchain langchain_community langchain-huggingface langchainhub langchain_chroma

In [None]:
import torch
import os
import getpass
import bs4

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig
from langchain_huggingface import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain_core.prompts import (ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder)
from langchain_core.messages import SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.document_loaders import WebBaseLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain.schema.runnable import RunnableLambda

In [None]:
os.environ["HF_TOKEN"] = getpass.getpass("Digite seu token Hugging Face: ")

In [None]:
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"

quantization_config = BitsAndBytesConfig(
    load_in_4bit = True, bnb_4bit_use_double_quant = True, bnb_4bit_quant_type = "nf4", bnb_4bit_compute_dtype= torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config = quantization_config)
tokenizer = AutoTokenizer.from_pretrained(model_id)

pipe = pipeline(
    model = model,
    tokenizer = tokenizer,
    task = "text-generation",
    temperature = 0.1,
    max_new_tokens = 500,
    do_sample = True,
    repetition_penalty = 1.1,
    return_full_text = False,
)

llm = HuggingFacePipeline(pipeline = pipe)

In [None]:
# LLAMA 3
template_rag = """Você é um assistente virtual treinado para responder perguntas com base no contexto fornecido.

Contexto:
{contexto}

Pergunta:
{pergunta}

Resposta:"""

In [None]:
# Carregador textos (pdf, web)
file_path = "/content/Perfil_Profissional_e_Pessoal_Matheus.pdf"
loader = PyPDFLoader(file_path)
docs = loader.load()

In [None]:
# Fazer split do documento
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200, add_start_index = True)
splits = text_splitter.split_documents(docs)

In [None]:
# Fazendo o embedding
hf_embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-mpnet-base-v2")

In [None]:
# Armazenamento no banco vetorial
vectorstore = Chroma.from_documents(documents = splits, embedding = hf_embeddings)

In [None]:
# Recuperando os textos do banco vetorial
retriever = vectorstore.as_retriever(search_type = "similarity", search_kwargs = {"k": 6})

In [None]:
prompt_rag = PromptTemplate(
    input_variables = ["contexto", "pergunta"],
    template = template_rag
)

In [None]:
# Formatando os textos em somente uma string
def format_docs(docs):
  return "\n\n".join(doc.page_content for doc in docs)

In [None]:
# Criando a pergunta para o modelo
chain_rag = {
    "contexto": retriever | RunnableLambda(format_docs),
    "pergunta": RunnablePassthrough()
} | prompt_rag | llm | StrOutputParser()

In [None]:
# Teste utilizando RAG

chain_rag.invoke("Qual Curso Matheus está fazendo no momento?")