# FINE TUNING DE UN MODELO LLM

* "Train the retriever to improve what is retrieved — better inputs to the generator."

* "Train the generator to improve how it responds — better outputs based on those inputs."

1. Preparar dataset de QA (preguntas y respuestas)

In [None]:
import json
from pathlib import Path

# configuration files

MANUAL_PATH = "manual_usuario_mi_coto_clean.txt"
QUESTIONS_PATH = "questions.json"
OUTPUT_PATH = "qa_pairs.jsonl"


# load data
with open(QUESTIONS_PATH, "r") as f:
    questions = json.load(f)  # List of questions

2. Generar pares de preguntas y respuestas con las cuales será entrenado el Retriever, esto ayudará a que identifique con mayor facilidad las respuestas en los diferentes chunks

In [None]:
# generar pares de preguntas y respuestas

qa_pairs = []
for question in questions:
    result = qa_chain.run(question)
    qa_pairs.append({"question": question, "answer": result})

# guardar pares de preguntas y respuestas

with open(OUTPUT_PATH, "w", encoding="MacRoman") as f:
    for pair in qa_pairs:
        json.dump(pair, f)
        f.write("\n")

qa_pairs

3. Fine-tune Retriever: en esta parte se entrena el retriever con las diferentes formas de preguntas / responder a los usuarios


In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"
from sentence_transformers import SentenceTransformer, losses, InputExample
from torch.utils.data import DataLoader


train_examples = [InputExample(texts=[question, context]) for question, context in qa_pairs]
model_retriever = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)
train_loss = losses.MultipleNegativesRankingLoss(model_retriever)
model_retriever.fit(train_objectives=[(train_dataloader, train_loss)], epochs=3)
model_retriever.save("models/custom_retriever")

4. Fine tune Generator

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from torch.utils.data import DataLoader
import torch

# Assume qa_pairs is already defined
input_texts = [f"Pregunta: {qa['question']} Contexto: {qa['answer']}" for qa in qa_pairs]
target_texts = [qa['answer'] for qa in qa_pairs]

# Load a pre-trained model like T5 or BART for sequence-to-sequence tasks (BERT isn't ideal for generation tasks)
model_generator = AutoModelForSeq2SeqLM.from_pretrained("t5-small")
tokenizer = AutoTokenizer.from_pretrained("t5-small")

# Tokenize the input and target texts
inputs = tokenizer(input_texts, padding="max_length", truncation=True, max_length=512, return_tensors="pt")
labels = tokenizer(target_texts, padding="max_length", truncation=True, max_length=128, return_tensors="pt")

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_generator = model_generator.to(device)

# Create a DataLoader for batching
dataset = list(zip(inputs["input_ids"], inputs["attention_mask"], labels["input_ids"]))
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)

# Optimizer
optimizer = torch.optim.AdamW(model_generator.parameters(), lr=5e-5)

# Training loop
model_generator.train()
for epoch in range(3):
    for batch in dataloader:
        input_ids, attention_mask, label_ids = [b.to(device) for b in batch]

        outputs = model_generator(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=label_ids
        )

        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    print(f"Epoch {epoch+1} completed")

# Save model and tokenizer
output_dir = "./my_finetuned_model"
model_generator.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)


5. Loading customized generator

In [None]:
# Wrap it in a generation pipeline
text_gen_pipeline = pipeline(
    "text2text-generation",  # For encoder-decoder models
    model= model_generator,
    tokenizer=tokenizer,
    max_length=829,
    do_sample=False,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1
)

# Wrap pipeline in LangChain LLM
fine_tuned_llm = HuggingFacePipeline(pipeline=text_gen_pipeline)

6. Loading customized retriever

In [None]:
# Load trained_retriever
model_retriever = SentenceTransformer('models/custom_retriever')

# Convert SentenceTransformer into a LangChain embedding model
embedding_function = HuggingFaceEmbeddings(model_name='models/custom_retriever')

# Prepare LangChain Documents
langchain_docs = [Document(page_content=doc.page_content) for doc in docs]  # docs from text_splitter
# langchain_docs = docs

# Create FAISS vector store
vectorstore = FAISS.from_documents(langchain_docs, embedding_function)

# This is your retriever
fine_tuned_retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

7. Testing new rag with fine-tuned retriever and fine-tuned generator

In [None]:
prompt_template = PromptTemplate.from_template(
    """Responde en español basándote únicamente en el contexto proporcionado:

{context}

Pregunta: {question}
Respuesta:"""
)

qa_chain = RetrievalQA.from_chain_type(
    llm=fine_tuned_llm,
    chain_type="stuff",
    retriever= fine_tuned_retriever,
    chain_type_kwargs={"prompt": prompt_template},
    return_source_documents=True
)

In [None]:
questions = [
    "¿Cómo cambio mi contraseña?",
    "¿Dónde puedo actualizar mi teléfono?",
    "¿Dónde se sube la constancia fiscal?",
    "¿Dónde veo mi saldo o mis adeudos?",
    "¿Cómo informo que ya pagué?",
    "¿Dónde agrego mis datos fiscales?",
    "¿Cómo aparto el salón de eventos?",
    "¿Cómo puedo cancelar una reservación?",
    "Envié un mensaje y no me han respondido",
    "¿Dónde veo lo que escribió el administrador?",
    "¿Dónde están las actas de la asamblea?",
    "¿Dónde está el reglamento del condominio?"
]

for q in questions:
    result = qa_chain({"query": q})
    print(f"\nPregunta: {q}")
    print("Respuesta:", result["result"])

    # print("Contexto usado:")
    # for i, doc in enumerate(result["source_documents"], 1):
      # print(f"  [{i}] {doc.page_content.strip()}")