<a href="https://colab.research.google.com/github/BlitzKriegM/C-digos-Proyecto/blob/main/llama3_rac.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()

# Debe instalarse por separado ya que Colab tiene torch 2.2.1, que rompe paquetes
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

if major_version >= 8:
    # Utilice esto para nuevas GPU como Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Utilice esto para GPU más antiguas (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes

!pip install --upgrade trl

from unsloth import FastLanguageModel
from transformers import TrainingArguments, AutoModelForCausalLM, TrainerCallback, TrainerState, TrainerControl
from datasets import load_dataset
from trl import SFTTrainer

In [None]:
class LoggingCallback(TrainerCallback):
    def on_log(self, args, state, control, **kwargs):
        logs = state.log_history[-1]
        print(f"Step {state.global_step}: {logs}")

max_seq_length = 2048
dtype = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16  # Ajustar dtype según disponibilidad
load_in_4bit = True

# Cargar modelo y tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/llama-3-8b-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

# Configuraciones adicionales del modelo usando PEFT
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None
)

# Cargar y preparar el dataset
dataset = load_dataset("somosnlp/Reglamento_Aeronautico_Colombiano_2024GemmaQA", split="train")
dataset = dataset.shuffle(seed=1234)
dataset = dataset.map(lambda samples: tokenizer(samples["Text"]), batched=True)
dataset = dataset.train_test_split(test_size=0.1)
train_data = dataset["train"]
test_data = dataset["test"]

NameError: name 'TrainerCallback' is not defined

In [None]:
# Configuración de los parámetros de entrenamiento
args = TrainingArguments(
    output_dir="outputs",
    evaluation_strategy="steps",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    max_steps=1000,
    learning_rate=2e-4,
    logging_steps=10,
    save_steps=100,
    eval_steps=100,
    # Reduce the batch size for evaluation
    per_device_eval_batch_size=1,
    # Accumulate gradients during evaluation
    eval_accumulation_steps=2,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    report_to="none"
)

# Inicialización del entrenador con el modelo, tokenizer, dataset y argumentos de entrenamiento
trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=test_data,
    tokenizer=tokenizer,
    args=args,
    dataset_num_proc=2,
    callbacks=[LoggingCallback]
)



In [None]:
# Iniciar el proceso de entrenamiento
trainer_stats = trainer.train()

# Guardar el modelo afinado
new_model = "alecrosales1/Llama3_rac"
trainer.model.save_pretrained(new_model)

# Cargar el modelo base
model_id = "unsloth/llama-3-8b-bnb-4bit"
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0}
)

# Fusionar el modelo con los pesos LoRA
merged_model = base_model.from_pretrained(new_model)
merged_model = merged_model.merge_and_unload()

# Guardar el modelo fusionado
merged_model.save_pretrained("merged_model", safe_serialization=True)
tokenizer.save_pretrained("merged_model")

# Mostrar las estadísticas de entrenamiento
trainer_stats

Step,Training Loss,Validation Loss
100,0.4997,0.539858
200,0.5076,0.508099
300,0.5054,0.48849
400,0.4837,0.474602
500,0.4816,0.460746
600,0.4479,0.449369
700,0.4219,0.440194
800,0.4249,0.431984
900,0.4512,0.425376
1000,0.4175,0.422251


Step 10: {'loss': 1.6314, 'grad_norm': 2.1105234622955322, 'learning_rate': 0.00019899497487437187, 'epoch': 0.003631411711302769, 'step': 10}
Step 20: {'loss': 0.7188, 'grad_norm': 0.4010428786277771, 'learning_rate': 0.0001969849246231156, 'epoch': 0.007262823422605538, 'step': 20}
Step 30: {'loss': 0.6363, 'grad_norm': 0.433738648891449, 'learning_rate': 0.0001949748743718593, 'epoch': 0.010894235133908307, 'step': 30}
Step 40: {'loss': 0.5889, 'grad_norm': 0.3442542850971222, 'learning_rate': 0.000192964824120603, 'epoch': 0.014525646845211076, 'step': 40}
Step 50: {'loss': 0.5418, 'grad_norm': 0.3842291533946991, 'learning_rate': 0.00019095477386934674, 'epoch': 0.018157058556513846, 'step': 50}
Step 60: {'loss': 0.5564, 'grad_norm': 0.3663150370121002, 'learning_rate': 0.00018894472361809047, 'epoch': 0.021788470267816613, 'step': 60}
Step 70: {'loss': 0.5923, 'grad_norm': 0.361740380525589, 'learning_rate': 0.0001869346733668342, 'epoch': 0.02541988197911938, 'step': 70}
Step 80

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 

In [None]:
merged_model.push_to_hub("alecrosales1/GemmaRac_Alec", use_temp_dir=True)
tokenizer.push_to_hub("alecrosales1/GemmaRac_Alec", use_temp_dir=True)
