In [None]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling, pipeline
from peft import LoraConfig, get_peft_model

In [None]:
# -----------------------------
# 1. Configuración del modelo
# -----------------------------

# Posibles modelos
#MODEL_NAME = "meta-llama/Llama-3.1-8B"  # si no cabe, cambio a 3B
MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
OUTPUT_DIR = "models/llama2-gaming-3b-lora"

#MODEL_NAME = "distilgpt2"
#OUTPUT_DIR = "models/distilgpt2-gaming-lora"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token

print("Cargando modelo base...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    torch_dtype=torch.float16,
)


In [None]:
# Cargar datasets

DATASET_PATH = "datasets/train_chats.jsonl"
VAL_PATH = "datasets/val_chats.jsonl"


train_ds = load_dataset("json", data_files=DATASET_PATH, split="train")
val_ds = load_dataset("json", data_files=VAL_PATH, split="train")

print(train_ds, val_ds)

In [None]:
# -----------------------------
# 2. Activar QLoRA / LoRA ----------> NO ES NECESARIO PARA DISTILGPT2 - SOLO MODELOS GRANDES +3B
# -----------------------------
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=[
        "q_proj", "v_proj"
    ],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
def format_chat(example):
    conversation = [
        {"role": "system", "content": "You are an assistant that recommends video games based on the user's tastes."},
        {"role": "user", "content": example["User"]},
        {"role": "assistant", "content": example["Assistant"]},
    ]

    prompt = tokenizer.apply_chat_template(
        conversation,
        tokenize=False,
        add_generation_prompt=False
    )

    tokenized = tokenizer(prompt, truncation=True)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

train_tokenized = train_ds.map(
    format_chat,
    batched=False,
    remove_columns=train_ds.column_names
)

val_tokenized = val_ds.map(
    format_chat,
    batched=False,
    remove_columns=val_ds.column_names
)


# -----------------------------
# Submuestreo para entrenamiento rápido
# -----------------------------

# Barajamos y nos quedamos con una muestra pequeña
max_train_samples = 50000   

train_small = train_tokenized.shuffle(seed=42)

if len(train_small) > max_train_samples:
    train_small = train_small.select(range(max_train_samples))

print("Tamaño train_small:", len(train_small))

train_small.set_format("torch")
val_tokenized.set_format("torch")


In [None]:
print("CUDA disponible:", torch.cuda.is_available())
print(torch.version.cuda)
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU")

In [None]:
import inspect

def build_training_args_safe(output_dir="./outputs", use_cuda=True):
    # Diccionario con las opciones que normalmente usamos
    base_kwargs = {
        "output_dir": output_dir,
        "overwrite_output_dir": True,
        "num_train_epochs": 3,
        "per_device_train_batch_size": 1,
        "gradient_accumulation_steps": 8,
        "warmup_steps": 100,
        "learning_rate": 2e-4,
        # Force fp16 when CUDA is available to match model dtype if the model was loaded in float16.
        # This avoids mismatches that can cause errors like "Attempting to unscale FP16 gradients."
        "fp16": bool(torch.cuda.is_available()),
        "logging_steps": 10,
        "save_steps": 200,
        "save_total_limit": 2,
        # "evaluation_strategy": "no",     
        # "evaluation_strategy": "steps",  
        # "eval_steps": 500,
        "report_to": "none",
        # "load_best_model_at_end": True,   
        # "metric_for_best_model": "loss",
        # "greater_is_better": False,
    }

    # Filtramos según la firma de TrainingArguments
    sig = inspect.signature(TrainingArguments.__init__)
    valid_keys = set(sig.parameters.keys())

    filtered = {k: v for k, v in base_kwargs.items() if k in valid_keys}

    print("\n[DEBUG] Claves aceptadas por TrainingArguments en este entorno:")
    for k in sorted(filtered.keys()):
        print(" -", k)

    return TrainingArguments(**filtered)

# Detectar si CUDA disponible
use_cuda = torch.cuda.is_available() if 'torch' in globals() else False

# Construir training_args de forma segura
training_args = build_training_args_safe(output_dir=OUTPUT_DIR, use_cuda=use_cuda)

train_dataset = train_small
eval_dataset = val_tokenized 

# Crear Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
)

print("\n[INFO] Trainer creado. Listo para entrenar.")



In [None]:
# -----------------------------
# 5. Entrenamiento
# -----------------------------
trainer.train()

In [None]:
# -----------------------------
# 6. Guardar modelo
# -----------------------------
print("\nGuardando modelo...")
trainer.model.save_pretrained(OUTPUT_DIR + "/final")
tokenizer.save_pretrained(OUTPUT_DIR + "/final")

print("Fine-Tuning completado")

In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

BASE_MODEL = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
MODEL_DIR = "../models/tinyllama-gaming-1b-lora/final"
FINAL_SAVE_DIR = "../models/FYG-model-final"

# Cargar el modelo base + adaptador
model = AutoModelForCausalLM.from_pretrained(BASE_MODEL)
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

lora_model = PeftModel.from_pretrained(model, MODEL_DIR)

# Fusionar los pesos de LoRA al modelo base
model = lora_model.merge_and_unload()  # Esto aplica LoRA al modelo base y elimina los pesos de adapter

# Guardar el modelo completo listo para Hugging Face
model.save_pretrained(FINAL_SAVE_DIR)
tokenizer.save_pretrained(FINAL_SAVE_DIR)


('../models/FYG-model-final\\tokenizer_config.json',
 '../models/FYG-model-final\\special_tokens_map.json',
 '../models/FYG-model-final\\chat_template.jinja',
 '../models/FYG-model-final\\tokenizer.model',
 '../models/FYG-model-final\\added_tokens.json',
 '../models/FYG-model-final\\tokenizer.json')

In [None]:

# Pruebas modelo fine-tuneado

MODEL_DIR = "models/llama2-gaming-3b-lora/final"  # ruta a tu modelo guardado

tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
model = AutoModelForCausalLM.from_pretrained(MODEL_DIR)

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

example = {
    "User": "I'm looking for a good RPG game.",
    "Assistant": "You might enjoy The Witcher 3 or Skyrim."
}

conversation = [
    {"role": "system", "content": "You are an assistant that recommends video games based on the user's tastes."},
    {"role": "user", "content": example["User"]},
    {"role": "assistant", "content": example["Assistant"]},
]

# Convertir la conversación en un solo prompt
prompt = ""
for turn in conversation:
    if turn["role"] == "system":
        prompt += f"[SYSTEM]: {turn['content']}\n"
    elif turn["role"] == "user":
        prompt += f"[USER]: {turn['content']}\n"
    elif turn["role"] == "assistant":
        prompt += f"[ASSISTANT]: {turn['content']}\n"

# Agregar la última pregunta del usuario para generar respuesta
prompt += "[USER]: Can you recommend me an action game?\n[ASSISTANT]:"

# Generación
output = pipe(prompt, max_new_tokens=150)
print(output[0]['generated_text'])