In [1]:
!pip install codecarbon # reiniciar apos instalar este pacote se você estiver em um notebook



In [2]:
import os
import pandas as pd
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
    BitsAndBytesConfig, Trainer, TrainingArguments,
    DataCollatorForLanguageModeling, EarlyStoppingCallback,
    DataCollatorForSeq2Seq
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

from peft import PeftModel
import pickle
import time
from codecarbon import EmissionsTracker # para calcular emissões de CO2

In [3]:
# ------------------
# Configurações
# ------------------
# MODEL_NAME = "meta-llama/Meta-Llama-3-8B"
MODEL_NAME = 'Qwen/Qwen3-14B-Base'
OUTPUT_DIR = "./qlora-aderencia-classes.finetuning"
SEED = 42
LABELS = ["BAIXA", "MÉDIA", "ALTA"]

torch.manual_seed(SEED)
os.environ["TOKENIZERS_PARALLELISM"] = "false"
torch.utils.checkpoint.use_reentrant = False

# ------------------
# Prompt
# ------------------
PROMPT_TMPL = """You are a thematic relevance evaluator.
Classify how related an academic work (title and abstract) is to a strategic theme.

TITLE: {title}
KEYWORDS: {keywords}
ABSTRACT: {abstract}

Answer with a number 0, 1, or 2 for RELEVANCE LEVEL (2-HIGH, 1-MEDIUM, 0-LOW) to the strategic theme: "{category}".

ANSWER: """


In [4]:
# ------------------
# Criação do Dataset
# ------------------
# ds = Dataset.from_pandas(dados_df[~dados_df.modelo_nivel.isnull()])
# splits = ds.train_test_split(test_size=0.2, seed=SEED)
# split_train = splits["train"]
# split_tmp = splits["test"]

# splits = split_tmp.train_test_split(test_size=0.1, seed=SEED)
# split_eval = splits["train"]
# split_test = splits["test"]

# with open("my_data.pickle", "wb") as file:
#     pickle.dump(split_train, file)
#     pickle.dump(split_eval, file)
#     pickle.dump(split_test, file)

In [5]:
with open("my_data.pickle", "rb") as file:
    split_train = pickle.load(file)
    split_eval = pickle.load(file)
    split_test = pickle.load(file)

In [6]:
print( len(split_train) , len(split_eval), len(split_test) )

33620 4203 4203


In [7]:


# ------------------
# Tokenizer
# ------------------
tok = AutoTokenizer.from_pretrained(MODEL_NAME)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token



# ------------------
# Função de preprocessamento
# ------------------
def preprocess(example, max_input_length=512):
    processed_key_words = '\n- '.join(example["descricao_keyword"].split(';'))
    gold = str(example["modelo_nivel"]).strip().upper()
    if gold == "MEDIA":
        gold = "MÉDIA"
    assert gold in LABELS
    gold_idx = LABELS.index(gold)  # 0,1,2

    # Cria prompt
    prompt = PROMPT_TMPL.format(
        title=example["nome_producao"],
        abstract=example["descricao_abstract"],
        keywords=processed_key_words,
        category=example["tema"]
    )

    # Tokeniza prompt
    input_enc = tok(prompt, truncation=True, max_length=max_input_length, add_special_tokens=False)
    input_ids = input_enc["input_ids"]

    # Token do próximo token (classe)
    label_id = tok(str(gold_idx), add_special_tokens=False)["input_ids"][0]

    # labels: -100 no prompt, token da classe no final
    labels = [-100] * len(input_ids) + [label_id]
    input_ids_full = input_ids + [label_id]
    attention_mask = [1] * len(input_ids_full)

    return {
        "input_ids": input_ids_full,
        "attention_mask": attention_mask,
        "labels": labels
    }

# ------------------
# Criação do Dataset
# ------------------
train_ds = split_train.map(preprocess, batched=False, remove_columns=split_train.column_names)
eval_ds = split_eval.map(preprocess, batched=False, remove_columns=split_eval.column_names)

Map:   0%|          | 0/33620 [00:00<?, ? examples/s]

Map:   0%|          | 0/4203 [00:00<?, ? examples/s]

In [8]:


# ------------------
# Modelo base com quantização 4-bit
# ------------------
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
)

raw_model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)
raw_model = prepare_model_for_kbit_training(raw_model)

print( 'Model loaded in ', raw_model.device)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Model loaded in  cuda:0


In [9]:
for name, module in raw_model.named_modules():
    print(name)



model
model.embed_tokens
model.layers
model.layers.0
model.layers.0.self_attn
model.layers.0.self_attn.q_proj
model.layers.0.self_attn.k_proj
model.layers.0.self_attn.v_proj
model.layers.0.self_attn.o_proj
model.layers.0.self_attn.q_norm
model.layers.0.self_attn.k_norm
model.layers.0.mlp
model.layers.0.mlp.gate_proj
model.layers.0.mlp.up_proj
model.layers.0.mlp.down_proj
model.layers.0.mlp.act_fn
model.layers.0.input_layernorm
model.layers.0.post_attention_layernorm
model.layers.1
model.layers.1.self_attn
model.layers.1.self_attn.q_proj
model.layers.1.self_attn.k_proj
model.layers.1.self_attn.v_proj
model.layers.1.self_attn.o_proj
model.layers.1.self_attn.q_norm
model.layers.1.self_attn.k_norm
model.layers.1.mlp
model.layers.1.mlp.gate_proj
model.layers.1.mlp.up_proj
model.layers.1.mlp.down_proj
model.layers.1.mlp.act_fn
model.layers.1.input_layernorm
model.layers.1.post_attention_layernorm
model.layers.2
model.layers.2.self_attn
model.layers.2.self_attn.q_proj
model.layers.2.self_att

In [10]:
# ------------------
# LoRA
# ------------------

# pegar nomes das últimas camadas
n_last = 4
num_total_layers = len(raw_model.model.layers)
target_modules = []
for i in range(num_total_layers - n_last, num_total_layers):
    for proj in ["q_proj", "k_proj", "v_proj", "o_proj"]:
        target_modules.append(f"model.layers.{i}.self_attn.{proj}")
    for proj in ["up_proj", "down_proj"]:  # se quiser também adaptar o MLP
        target_modules.append(f"model.layers.{i}.mlp.{proj}")
print( target_modules )

['model.layers.36.self_attn.q_proj', 'model.layers.36.self_attn.k_proj', 'model.layers.36.self_attn.v_proj', 'model.layers.36.self_attn.o_proj', 'model.layers.36.mlp.up_proj', 'model.layers.36.mlp.down_proj', 'model.layers.37.self_attn.q_proj', 'model.layers.37.self_attn.k_proj', 'model.layers.37.self_attn.v_proj', 'model.layers.37.self_attn.o_proj', 'model.layers.37.mlp.up_proj', 'model.layers.37.mlp.down_proj', 'model.layers.38.self_attn.q_proj', 'model.layers.38.self_attn.k_proj', 'model.layers.38.self_attn.v_proj', 'model.layers.38.self_attn.o_proj', 'model.layers.38.mlp.up_proj', 'model.layers.38.mlp.down_proj', 'model.layers.39.self_attn.q_proj', 'model.layers.39.self_attn.k_proj', 'model.layers.39.self_attn.v_proj', 'model.layers.39.self_attn.o_proj', 'model.layers.39.mlp.up_proj', 'model.layers.39.mlp.down_proj']


In [11]:

lora_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=target_modules
)
model = get_peft_model(raw_model, lora_config)

In [12]:
import torch

if torch.cuda.is_available():
    print("Dispositivo:", torch.cuda.get_device_name(0))
    print("Suporta bf16?", torch.cuda.is_bf16_supported())
else:
    print("Nenhuma GPU disponível")


Dispositivo: NVIDIA RTX A5000
Suporta bf16? True


In [None]:

def train_all():

    # ------------------
    # Data collator para Causal LM
    # ------------------
    collator = DataCollatorForSeq2Seq(tokenizer=tok, padding=True)

    # ------------------
    # TrainingArguments
    # ------------------
    args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        num_train_epochs=10,
        per_device_train_batch_size=3,
        per_device_eval_batch_size=4,
        gradient_accumulation_steps=10,
        learning_rate=2e-4,
        warmup_ratio=0.03,
        logging_steps=20,
        eval_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=2,
        bf16=torch.cuda.is_available(),
        optim="paged_adamw_32bit",
        seed=SEED,
        report_to="none",
        dataloader_num_workers=4,
        load_best_model_at_end=True,
        metric_for_best_model="loss",
        greater_is_better=False
    )

    # ------------------
    # Trainer
    # ------------------
    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_ds,
        eval_dataset=eval_ds,
        tokenizer=tok,
        data_collator=collator,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
    )

    # ------------------
    # Treino
    # ------------------
    start_time = time.time()
    tracker = EmissionsTracker( output_file='fine_tuning_emissions_trainer.csv', log_level='warning' )
    tracker.start()

    trainer.train()

    emissions: float = tracker.stop()
    print("\n\nTotal de emissões (detalhes em emissions.csv): ",emissions)


    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"\nTempo total de execução: {elapsed_time:.2f} segundos")


    trainer.save_model(OUTPUT_DIR)
    tok.save_pretrained(OUTPUT_DIR)

    print("Treino finalizado. Modelo salvo em", OUTPUT_DIR)


# train_all()

  trainer = Trainer(
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
1,1.0013,0.999635
2,0.9805,0.956015
3,0.8889,0.945894
4,0.8066,0.999856
5,0.7584,1.104736


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)




Total de emissões (detalhes em emissions.csv):  1.500121978759484

Tempo total de execução: 198521.63 segundos
Treino finalizado. Modelo salvo em ./qlora-aderencia-classes.finetuning


In [None]:
del model
del raw_model
# del trainer
torch.cuda.empty_cache()

In [8]:
checkpoint_dir= os.path.join(OUTPUT_DIR, "checkpoint-3363")  # ajuste conforme necessário
# ------------------
# Modelo base com quantização 4-bit
# ------------------
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

# Aplicar o LoRA / PEFT checkpoint
model = PeftModel.from_pretrained(
    model,
    checkpoint_dir,
    device_map="auto",           # mantém a mesma distribuição
)

tok = AutoTokenizer.from_pretrained(checkpoint_dir)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [9]:
model.device

device(type='cuda', index=0)

In [10]:
# ------------------
# Predict (next token)
# ------------------
import torch
import torch.nn.functional as F

# tokens de interesse
target_tokens = [tok.encode("0", add_special_tokens=False)[0], tok.encode("1", add_special_tokens=False)[0], tok.encode("2", add_special_tokens=False)[0]]

def predict_class(prompt_text):
    # tokeniza
    tokenized = tok(prompt_text, return_tensors="pt")
    input_ids = tokenized.input_ids.cuda()
    attention_mask = tokenized.attention_mask.cuda()

    # forward no modelo (sem generate)
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        # logits do último token
        last_token_logits = outputs.logits[0, -1, :]
        # softmax para probabilidades
        probs = F.softmax(last_token_logits, dim=-1)

    # pegar probabilidades apenas para os tokens 0,1,2
    target_probs = {tok.decode([t]): probs[t].item() for t in target_tokens}

    # escolher token mais provável
    pred_token = max(target_probs, key=target_probs.get)

    return pred_token, target_probs


# Exemplo
example = split_test[0]
processed_key_words = '\n- '.join(example["descricao_keyword"].split(';'))

    # Cria prompt
example_prompt = PROMPT_TMPL.format(
        title=example["nome_producao"],
        abstract=example["descricao_abstract"],
        keywords=processed_key_words,
        category=example["tema"]
    )
print("Predicted class:", predict_class(example_prompt))

Predicted class: ('2', {'0': 0.1593017578125, '1': 0.29296875, '2': 0.54736328125})


In [11]:
from sklearn.metrics import accuracy_score, f1_score, classification_report
from tqdm import tqdm
# from codecarbon import EmissionsTracker # para calcular emissões de CO2
tracker = EmissionsTracker( output_file='fine_tuning_emissions.csv' )
tracker.start()

y_true = []
y_pred = []

# .select(range(30))

start_time = time.time()

for example in tqdm(split_test, desc="Inferindo"):  # percorre todo o dataset de teste
    # ---------------------
    # Prepara prompt
    processed_key_words = '\n- '.join(example["descricao_keyword"].split(';'))
    example_prompt = PROMPT_TMPL.format(
        title=example["nome_producao"],
        abstract=example["descricao_abstract"],
        keywords=processed_key_words,
        category=example["tema"]
    )

    # Predição
    pred = predict_class(example_prompt)
    y_pred.append(["0","1","2"].index(pred[0]))

    # Rótulo real
    gold = str(example["modelo_nivel"]).strip().upper()
    if gold == "MEDIA":
        gold = "MÉDIA"
    assert gold in LABELS
    y_true.append(LABELS.index(gold))

# Marca o tempo final
end_time = time.time()

# Tempo total em segundos
elapsed_time = end_time - start_time
print(f"\nTempo total de execução: {elapsed_time:.2f} segundos")


emissions: float = tracker.stop()
print("\n\nTotal de emissões (detalhes em emissions.csv): ",emissions)

# ---------------------
# Métricas
acc = accuracy_score(y_true, y_pred)
f1_macro = f1_score(y_true, y_pred, average="macro")
f1_weighted = f1_score(y_true, y_pred, average="weighted")

print("Acurácia:", acc)
print("F1-macro:", f1_macro)
print("F1-weighted:", f1_weighted)

# Relatório detalhado (por classe)
print("\nRelatório de classificação:")
print(classification_report(y_true, y_pred, target_names=LABELS))


[codecarbon INFO @ 18:04:46] [setup] RAM Tracking...
[codecarbon INFO @ 18:04:46] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 18:04:47] CPU Model on constant consumption mode: Intel(R) Core(TM) i9-14900KF
[codecarbon INFO @ 18:04:47] [setup] GPU Tracking...
[codecarbon INFO @ 18:04:47] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 18:04:47] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: cpu_load
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 18:04:47] >>> Tracker's metadata:
[codecarbon INFO @ 18:04:47]   Platform system: Linux-6.9.3-76060903-generic-x86_64-with-glibc2.35
[codecarbon INFO @ 18:04:47]   Python version: 3.11.5
[codecarbon INFO @ 18:04:47]   CodeCarbon version: 3.0.4
[codecarbon INFO @ 18:04:47]   Available RAM : 125.634 GB
[codecarbo


Tempo total de execução: 2577.49 segundos


[codecarbon INFO @ 18:47:48] Delta energy consumed for CPU with cpu_load : 0.000042 kWh, power : 12.836077423261543 W
[codecarbon INFO @ 18:47:48] Energy consumed for All CPU : 0.008892 kWh
[codecarbon INFO @ 18:47:48] Energy consumed for all GPUs : 0.163869 kWh. Total GPU Power : 223.67624861519107 W
[codecarbon INFO @ 18:47:48] 0.199061 kWh of electricity used since the beginning.




Total de emissões (detalhes em emissions.csv):  0.019577232647124677
Acurácia: 0.5886271710682845
F1-macro: 0.5021037127021436
F1-weighted: 0.5361640064250016

Relatório de classificação:
              precision    recall  f1-score   support

       BAIXA       0.62      0.88      0.73      2056
       MÉDIA       0.40      0.14      0.20      1284
        ALTA       0.57      0.58      0.57       863

    accuracy                           0.59      4203
   macro avg       0.53      0.53      0.50      4203
weighted avg       0.54      0.59      0.54      4203

