To run this, press "*Runtime*" and press "*Run all*" on a **free** Tesla T4 Google Colab instance!
<div class="align-center">
<a href="https://unsloth.ai/"><img src="https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png" width="115"></a>
<a href="https://discord.gg/unsloth"><img src="https://github.com/unslothai/unsloth/raw/main/images/Discord button.png" width="145"></a>
<a href="https://docs.unsloth.ai/"><img src="https://github.com/unslothai/unsloth/blob/main/images/documentation%20green%20button.png?raw=true" width="125"></a></a> Join Discord if you need help + ⭐ <i>Star us on <a href="https://github.com/unslothai/unsloth">Github</a> </i> ⭐
</div>

To install Unsloth your local device, follow [our guide](https://docs.unsloth.ai/get-started/install-and-update). This notebook is licensed [LGPL-3.0](https://github.com/unslothai/notebooks?tab=LGPL-3.0-1-ov-file#readme).


### Installation

In [None]:
%%capture
import os, re
# Nota: Si ejecutas esto en Colab por primera vez, descomenta las líneas siguientes:
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
     import torch; v = re.match(r"[0-9]{1,}\.[0-9]{1,}", str(torch.__version__)).group(0)
     xformers = "xformers==" + ("0.0.33.post1" if v=="2.9" else "0.0.32.post2" if v=="2.8" else "0.0.29.post3")
     !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
     !pip install sentencepiece protobuf "datasets==4.3.0" "huggingface_hub>=0.34.0" hf_transfer
     !pip install --no-deps unsloth
!pip install transformers==4.56.2
!pip install --no-deps trl==0.22.2

### Unsloth

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None  # None for auto detection
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="princeton-nlp/gemma-2-9b-it-SimPO",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

<a name="Data"></a>
### Data Prep
Carga de datasets: Rubuntu (Filtrado) + Capibara (Concatenado)

In [None]:
from datasets import load_dataset, concatenate_datasets

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

# --- CARGA Y PROCESAMIENTO DE DATASETS ---

# 1. Cargar Dataset Principal: Rubuntu Guarani-Jopara
print("Cargando dataset Rubuntu...")
dataset_main = load_dataset("rubuntu/dataset-guarani-jopara-v01", split="train")

# FILTRO: Mantener solo filas donde output NO es None y NO está vacío
dataset_main = dataset_main.filter(lambda x: x['output'] is not None and len(str(x['output']).strip()) > 0)
print(f"Dataset Rubuntu filtrado: {len(dataset_main)} filas.")

# 2. Cargar Dataset Secundario: Capibara LLM
print("Cargando dataset Capibara...")
dataset_secondary = load_dataset("Capibara-LLM/gn-multi-affective-alpaca", split="train")
print(f"Dataset Capibara cargado: {len(dataset_secondary)} filas.")

# 3. Estandarización de columnas
def standardize_columns(ds):
    if 'input' not in ds.column_names:
        ds = ds.add_column("input", [""] * len(ds))
    return ds.select_columns(["instruction", "input", "output"])

dataset_main = standardize_columns(dataset_main)
dataset_secondary = standardize_columns(dataset_secondary)

# 4. Concatenar y mezclar
dataset = concatenate_datasets([dataset_main, dataset_secondary])
dataset = dataset.shuffle(seed=3407)
print(f"Total combinado para entrenamiento: {len(dataset)} filas.")

# Aplicar formato
dataset = dataset.map(formatting_prompts_func, batched = True)

<a name="Train"></a>
### Train the model

In [None]:
from trl import SFTConfig, SFTTrainer

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    packing = False,
    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 60, # Pon esto en None para entrenar el dataset completo
        learning_rate = 2e-4,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.001,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

In [None]:
trainer_stats = trainer.train()

<a name="Inference"></a>
### Inference

In [None]:
FastLanguageModel.for_inference(model)
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Mba'éichapa reiko?", # instruction
        "", # input
        "", # output
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
print(tokenizer.batch_decode(outputs))

<a name="Save"></a>
### Saving, loading finetuned models
To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.

In [None]:
from google.colab import userdata

# Define el nombre del modelo
model_name_local = "gemma-2-9b-it-SimPO-Jopara"
repo_id_hub = "Capibara-LLM/gemma-2-9b-it-SimPO-Jopara"

# Obtener el token desde los secretos de Colab
try:
    hf_token = userdata.get('HF_TOKEN')
    if hf_token is None:
        print("Advertencia: El secreto 'HF_TOKEN' no fue encontrado. Usando valor por defecto.")
        hf_token = "TU_TOKEN_HUGGINGFACE_AQUI"
except Exception as e:
    print(f"No se pudo cargar el token de secretos: {e}")
    hf_token = "TU_TOKEN_HUGGINGFACE_AQUI"

print(f"Guardando localmente en: {model_name_local}")
model.save_pretrained(model_name_local)
tokenizer.save_pretrained(model_name_local)

print(f"Subiendo a Hugging Face Hub: {repo_id_hub}")
# Subir automáticamente si tienes el token configurado
if hf_token != "TU_TOKEN_HUGGINGFACE_AQUI":
    model.push_to_hub(repo_id_hub, token = hf_token)
    tokenizer.push_to_hub(repo_id_hub, token = hf_token)
else:
    print("Token no configurado correctamente, saltando subida al Hub.")

### GGUF / llama.cpp Conversion
Opcional: Si quieres guardar o subir versiones GGUF, cambia False a True abajo

In [None]:
if True:
    # Guardar GGUF q4_k_m localmente
    model.save_pretrained_gguf(model_name_local, tokenizer, quantization_method = "q4_k_m")

    # Subir GGUF q4_k_m al repositorio Capibara-LLM
    model.push_to_hub_gguf(repo_id_hub, tokenizer, quantization_method = "q4_k_m", token = hf_token)

if False:
    # Subir múltiples formatos GGUF (q4_k_m, q8_0, q5_k_m)
    model.push_to_hub_gguf(
        repo_id_hub,
        tokenizer,
        quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
        token = hf_token,
    )