In [None]:
import os
import json
import torch
from datasets import Dataset
from peft import LoraConfig, get_peft_model, TaskType
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    TrainingArguments
)
from trl import SFTTrainer, SFTConfig

# 1. CARREGAR DADOS
data_path = "../data/fase3/finetuning_data.json"

if not os.path.exists(data_path):
    print(f"‚ö†Ô∏è Arquivo n√£o encontrado: {data_path}")
    print("Usando dados de exemplo na mem√≥ria para teste...")
    data = [
        {"instruction": "Teste", "output": "Isso √© um teste."}
    ]
else:
    with open(data_path, "r", encoding="utf-8") as f:
        data = json.load(f)

dataset = Dataset.from_list(data)

# Fun√ß√£o de formata√ß√£o
def format_instruction(sample):
    return {"text": f"### Human: {sample['instruction']}\n### Assistant: {sample['output']}"}

print("Formatando dados...")
dataset_formatado = dataset.map(format_instruction)

# 2. CARREGAR MODELO (Vers√£o CPU)
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

print(f"Carregando modelo {model_name}...")
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cpu",      # For√ßa uso da CPU
    torch_dtype=torch.float32 # Precis√£o normal (evita erro de FP16)
)

# 3. CONFIGURAR LoRA
peft_config = LoraConfig(
    r=4,                 # Reduzido para ser mais r√°pido na CPU
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM,
)

# Aplicar LoRA no modelo manualmente antes do treino
model = get_peft_model(model, peft_config)
print("LoRA aplicado com sucesso.")

# 4. CONFIGURA√á√ÉO DE TREINO (Ajustada para CPU)
sft_config = SFTConfig(
    output_dir="./results",
    dataset_text_field="text",
    max_length=256,             # Reduzido para economizar mem√≥ria RAM
    per_device_train_batch_size=1, # M√≠nimo poss√≠vel
    gradient_accumulation_steps=4,
    max_steps=10,               # Apenas 10 passos para provar que funciona
    learning_rate=2e-4,
    logging_steps=1,
    use_cpu=True,
    fp16=False,
    packing=False,
)

# 5. TREINADOR
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset_formatado,
    args=sft_config,
)

print("üöÄ Iniciando treinamento (Modo CPU)... Tenha paci√™ncia.")
trainer.train()

# 6. SALVAR
print("Salvando modelo...")
final_path = "../models/medical_assistant_adapter"
trainer.model.save_pretrained(final_path)
print(f"‚úÖ Sucesso! Modelo salvo em {final_path}")

Formatando dados...


Map:   0%|          | 0/3 [00:00<?, ? examples/s]

Carregando modelo TinyLlama/TinyLlama-1.1B-Chat-v1.0...


`torch_dtype` is deprecated! Use `dtype` instead!


LoRA aplicado com sucesso.


Adding EOS to train dataset:   0%|          | 0/3 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/3 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/3 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.


üöÄ Iniciando treinamento (Modo CPU)... Tenha paci√™ncia.
