In [9]:
!pip install langchain bitsandbytes accelerate langchain_community PyPDF opendatasets sentence-transformers --quiet

In [10]:
!pip install faiss-cpu



In [2]:
from langchain.document_loaders import PyPDFLoader, DirectoryLoader

from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import FAISS

from langchain import HuggingFaceHub
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain.chains import RetrievalQA

import os
import torch
import warnings
warnings.filterwarnings("ignore")

In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

device

'cuda'

In [4]:
import requests

url = "https://kvongcmehsanalibrary.wordpress.com/wp-content/uploads/2021/07/harrypotter.pdf"
pdf_path = "harrypotter.pdf"

response = requests.get(url)
with open(pdf_path, "wb") as f:
    f.write(response.content)

print("PDF descargado correctamente.")

PDF descargado correctamente.


In [5]:
pdf_loader = PyPDFLoader(pdf_path)

pages = pdf_loader.load_and_split()  # separamos por páginas

len(pages)

3604

In [6]:
from langchain.schema import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Paso 1: Limpiar las páginas como antes
clean_pages = []
for page in pages:
    text = page.page_content
    clean_text = text.replace('\t', ' ')
    clean_text = clean_text.replace('\n', ' ')
    clean_text = ' '.join(clean_text.split())
    clean_pages.append(Document(page_content=clean_text))
# Ver ejemplo de la primera página ya limpia
print(clean_pages[0])

page_content='CONTENTS Harry Potter and the Sorcerer’s Stone Harry Potter and the Chamber of Secrets Harry Potter and the Prisoner of Azkaban Harry Potter and the Goblet of Fire Harry Potter and the Order of the Phoenix Harry Potter and the Half-Blood Prince Harry Potter and the Deathly Hallows'


In [7]:
# Antes de pasar a la base de datos vectorial, dividimos el documento en fragmentos
# "pequeños" o "chunks" en inglés. Esto para procesar y analizar mejor los documentos:

text_splitter = RecursiveCharacterTextSplitter(
                               chunk_size = 512,   # estos valores se pueden ajustar
                               chunk_overlap  = 32,
                               length_function = len,    # divide los chunks contando caracteres
                               )

chunks = text_splitter.split_documents(clean_pages)   # llevamos a cabo el proceso de fragmentación en chunks

In [8]:
len(chunks)

14622

In [9]:
chunks[12000].page_content

'an involuntary movement; for a fraction of a second, it seemed he expected to receive Voldemort’s wand in exchange for his own. The gesture was not missed by Voldemort, whose eyes widened maliciously. “Give you my wand, Lucius? My wand?” Some of the throng sniggered. “I have given you your liberty, Lucius, is that not enough for you? But I have noticed that you and your family seem less than happy of late. . . . What is it about my presence in your home that displeases you, Lucius?” “Nothing — nothing, my'

## EMBEDINGS

In [10]:
Embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
                                   model_kwargs={'device': 'cuda' if torch.cuda.is_available() else 'cpu'}
                                   )

## VECTOR DATABASE

In [11]:
# Obtenemos los vectores embebidos de cada chunk:

store = FAISS.from_texts([str(chunk) for chunk in chunks], Embeddings)

In [12]:
# veamos uno de los vectores embebidos:

print(len(store.embeddings.embed_query(chunks[0].page_content)))

print(store.embeddings.embed_query(chunks[0].page_content))

384
[-0.043303921818733215, 0.04168776422739029, -0.04757193103432655, -0.000875918020028621, -0.075721874833107, 0.062160514295101166, 0.00960005447268486, -0.005526233464479446, -0.030632710084319115, 0.0017973687499761581, -0.053954366594552994, 0.024333883076906204, -0.029157670214772224, -0.005653927102684975, -0.026795687153935432, -0.0697607547044754, 0.01364040281623602, 0.0425613597035408, 0.01729034073650837, -0.04206441342830658, 0.01768612489104271, -0.011606981046497822, 0.03144080564379692, -0.004370811861008406, -0.0014239456504583359, 0.01688431203365326, -0.0387699156999588, -0.004028604365885258, -0.07019447535276413, -0.09112319350242615, -0.07281717658042908, -0.02180464006960392, -0.07362427562475204, -0.03071080707013607, -0.05967538058757782, -0.04950997605919838, 0.05095882713794708, 0.028488650918006897, 0.0773603692650795, 0.008546852506697178, 0.013358998112380505, 0.024391883984208107, -0.0680253729224205, 0.1288420855998993, 0.005871805362403393, 0.01283539

In [13]:
question = """
¿who is dumbledore?
"""

docs = store.similarity_search(question, k = 3)

In [14]:
docs[2].page_content

"page_content='Then she said, “He’s a funny boy.” “Yes,” said Dumbledore. “I thought he might be.” “He was a funny baby too. He hardly ever cried, you know. And then, when he got a little older, he was . . . odd.” “Odd in what way?” asked Dumbledore gently. “Well, he —” But Mrs. Cole pulled up short, and there was nothing blurry or vague about the inquisitorial glance she shot Dumbledore over her gin glass. “He’s definitely got a place at your school, you say?” “Definitely,” said Dumbledore. “And nothing I say can change'"

## RAG

In [15]:
from huggingface_hub import login
from google.colab import userdata


In [28]:
login(userdata.get('HF_TOKEN_TEC'))

In [16]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [33]:
# de nuevo los parámetros de la configuración de cuantización 4-bit:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [34]:
# cargamos el tokenizador y el modelo, en este caso de Mistral:

tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")

model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3",     # tarda unos 9 mins con gpu
                                             quantization_config = bnb_config
                                             )

config.json:   0%|          | 0.00/601 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.55G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [17]:

# Load model directly
#from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

#tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
#model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")

In [35]:
# Creamos el Pipeline de HF:

pipe = pipeline("text-generation",   # tipo de modelo utilizado
                model=model,    # debemos cargar el modelo cuantizado y no el original
                tokenizer=tokenizer,    # los tokens embebidos
                max_new_tokens=128      # puedes ajustar la salida al tamaño deseado de tokens
                )


# Y ahora lo trasladamos al Pipeline de LangChain:
hf = HuggingFacePipeline(pipeline=pipe)

Device set to use cuda:0


In [36]:
# usamos el template de Langchain para definir el prompt:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""Using the following context information provide the response to the question.

Context:
{context}

Question:
{question}

Response:
"""
)

In [37]:
# Como lo estamos haciendo un problema tipo Q&A, usamos entonces el método que
# tiene LangChain para estos caso, RetrievalQAs:

qa_chain = RetrievalQA.from_chain_type(llm=hf,   # Este ya es todo el modelo cuantizado y preparado en formato LangChain
                                       retriever = store.as_retriever(search_kwargs={'k': 3}),   # Este es el espacio de búsqueda, la base de datos vectorial
                                       return_source_documents=True,   # para que también devuelva de dónde obtiene la información.
                                       chain_type_kwargs={'prompt': prompt}
                                       )

In [43]:
pregunta = "¿Who is albus dumbledore?"

respuesta = qa_chain({"query": pregunta})

print(respuesta["result"])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Using the following context information provide the response to the question.

Context:
page_content='Nothing like this man had ever been seen on Privet Drive. He was tall, thin, and very old, judging by the silver of his hair and beard, which were both long enough to tuck into his belt. He was wearing long robes, a purple cloak that swept the ground, and high-heeled, buckled boots. His blue eyes were light, bright, and sparkling behind half-moon spectacles and his nose was very long and crooked, as though it had been broken at least twice. This man’s name was Albus Dumbledore. Albus Dumbledore didn’t seem'

page_content='jackets and had identical, shoulder-length hairstyles. Albus looked several years older, but otherwise the two boys looked very alike, for this was before Albus’s nose had been broken and before he started wearing glasses. The family looked quite happy and normal, smiling serenely up out of the newspaper. Baby Ariana’s arm waved vaguely out of her shawl. Harry looked 

In [44]:
import ipywidgets as widgets
from IPython.display import display

input_box = widgets.Text(
    value='',
    placeholder='Ask a question about Harry Potter...',
    description='You:',
    disabled=False,
    layout=widgets.Layout(width='100%')
)

send_button = widgets.Button(
    description="Send",
    button_style='primary'
)

output_area = widgets.Output()

def on_send_clicked(b):
    with output_area:
        user_input = input_box.value
        if not user_input.strip():
            print("Please enter a question.")
            return

        print(f"You: {user_input}")
        response = qa_chain({"query": user_input})
        response_string = response["result"]
        print(f"Bot: {response_string}\n")
        input_box.value = ""

send_button.on_click(on_send_clicked)

display(input_box, send_button, output_area)

Text(value='', description='You:', layout=Layout(width='100%'), placeholder='Ask a question about Harry Potter…

Button(button_style='primary', description='Send', style=ButtonStyle())

Output()