In [1]:
!pip install codecarbon # reiniciar apos instalar este pacote se você estiver em um notebook



In [2]:
import os
import pandas as pd
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
)

import pickle
import numpy as np

In [3]:
# ------------------
# Configurações
# ------------------
# MODEL_NAME = "meta-llama/Meta-Llama-3-8B"
MODEL_NAME = 'Qwen/Qwen3-14B-Base'
SEED = 42
LABELS = ["BAIXA", "MÉDIA", "ALTA"]

torch.manual_seed(SEED)
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# torch.utils.checkpoint.use_reentrant = False

# ------------------
# Prompt
# ------------------
PROMPT_TMPL = """You are a thematic relevance evaluator.
Classify how related an academic work (title and abstract) is to a strategic theme.

TITLE: {title}
KEYWORDS: {keywords}
ABSTRACT: {abstract}

Answer with a number 0, 1, or 2 for RELEVANCE LEVEL (2-HIGH, 1-MEDIUM, 0-LOW) to the strategic theme: "{category}".

GENERAL CRITERIA:
- HIGH: the topic is central to the research; strong semantic coherence.
- MEDIUM: partial/indirect or secondary relation to the topic.
- LOW: weak or tangential relation; the topic is not the main focus of the work.

ANSWER: """


In [4]:
with open("my_data.pickle", "rb") as file:
    split_train = pickle.load(file)
    split_eval = pickle.load(file)
    split_test = pickle.load(file)

In [5]:
print( len(split_train) , len(split_eval), len(split_test) )

33620 4203 4203


In [6]:


# ------------------
# Tokenizer
# ------------------
tok = AutoTokenizer.from_pretrained(MODEL_NAME)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token


In [7]:
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
memory_before = torch.cuda.memory_allocated()
print(f"Memória usada antes de carregar modelo: {memory_before/1024**2:.2f} MB")

Memória usada antes de carregar modelo: 0.00 MB


In [8]:

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    # quantization_config=bnb_config
)
# model = prepare_model_for_kbit_training(raw_model)

print( 'Model loaded in ', model.device)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


Model loaded in  cuda:0


In [9]:
# Após um batch ou epoch
memory_after = torch.cuda.memory_allocated()
print(f"Memória atualmente alocada: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
print(f"Máxima memória usada até agora: {torch.cuda.max_memory_allocated()/1024**2:.2f} MB")
print(f"Memória usada pelo modelo: {(memory_after-memory_before)/1024**2:.2f} MB")

Memória atualmente alocada: 18087.98 MB
Máxima memória usada até agora: 18087.98 MB
Memória usada pelo modelo: 18087.98 MB


In [10]:
import torch

if torch.cuda.is_available():
    print("Dispositivo:", torch.cuda.get_device_name(0))
    print("Suporta bf16?", torch.cuda.is_bf16_supported())
else:
    print("Nenhuma GPU disponível")


Dispositivo: NVIDIA RTX A5000
Suporta bf16? True


In [11]:
from collections import Counter

# pega todos os rótulos
labels = split_test["modelo_nivel"]

# conta quantos de cada
contagem = Counter(labels)

# imprime
for k, v in contagem.items():
    print(f"{k}: {v}")

class_counts = [contagem[x.replace('É','E')] for x in LABELS]
print("Contagem de rótulos:", class_counts)

MEDIA: 1284
BAIXA: 2056
ALTA: 863
Contagem de rótulos: [2056, 1284, 863]


In [12]:
model.device

device(type='cuda', index=0)

In [13]:
# ------------------
# Predict (next token)
# ------------------
import torch
import torch.nn.functional as F

# tokens de interesse
target_tokens = [tok.encode("0", add_special_tokens=False)[0], tok.encode("1", add_special_tokens=False)[0], tok.encode("2", add_special_tokens=False)[0]]

def predict_class(prompt_text):
    # tokeniza
    tokenized = tok(prompt_text, return_tensors="pt")
    input_ids = tokenized.input_ids.cuda()
    attention_mask = tokenized.attention_mask.cuda()

    # forward no modelo (sem generate)
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        # logits do último token
        last_token_logits = outputs.logits[0, -1, :]
        # softmax para probabilidades
        probs = F.softmax(last_token_logits, dim=-1)

    # pegar probabilidades apenas para os tokens 0,1,2
    target_probs = {tok.decode([t]): probs[t].item() for t in target_tokens}

    # escolher token mais provável
    pred_token = max(target_probs, key=target_probs.get)

    return pred_token, target_probs


# Exemplo
example = split_test[0]
processed_key_words = '\n- '.join(example["descricao_keyword"].split(';'))

    # Cria prompt
example_prompt = PROMPT_TMPL.format(
        title=example["nome_producao"],
        abstract=example["descricao_abstract"],
        keywords=processed_key_words,
        category=example["tema"]
    )
print("Predicted class:", predict_class(example_prompt))

Predicted class: ('1', {'0': 0.054772380739450455, '1': 0.680815577507019, '2': 0.22283849120140076})


In [14]:
np.mean([len(x) for x in split_test['descricao_abstract']])
len([len(x) for x in split_test['descricao_abstract'] if len(x) > 5000])

23

In [15]:
from sklearn.metrics import accuracy_score, f1_score, classification_report
from tqdm import tqdm
from codecarbon import EmissionsTracker # para calcular emissões de CO2
import time
tracker = EmissionsTracker( output_file='sem_fine_tuning_emissions.csv' )
tracker.start()

y_true = []
y_pred = []

start_time = time.time()

torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
memory_before = torch.cuda.memory_allocated()
print(f"Memória usada antes: {torch.cuda.memory_allocated()/1024**2:.2f} MB")


max_description=5000

# .select(range(30))

for example in tqdm(split_test.select(range(100)), desc="Inferindo"):  # percorre todo o dataset de teste
    # ---------------------
    # Prepara prompt
    processed_key_words = '\n- '.join(example["descricao_keyword"].split(';'))
    example_prompt = PROMPT_TMPL.format(
        title=example["nome_producao"],
        abstract=example["descricao_abstract"][:max_description],
        keywords=processed_key_words,
        category=example["tema"]
    )

    # Predição
    pred = predict_class(example_prompt)
    y_pred.append(["0","1","2"].index(pred[0]))

    # Rótulo real
    gold = str(example["modelo_nivel"]).strip().upper()
    if gold == "MEDIA":
        gold = "MÉDIA"
    assert gold in LABELS
    y_true.append(LABELS.index(gold))

memory_after = torch.cuda.memory_allocated()
print(f"Memória atualmente alocada: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
print(f"Máxima memória usada até agora: {torch.cuda.max_memory_allocated()/1024**2:.2f} MB")
print(f"Memória usada no treinamento: {(memory_after-memory_before)/1024**2:.2f} MB")

# Marca o tempo final
end_time = time.time()

# Tempo total em segundos
elapsed_time = end_time - start_time
print(f"\nTempo total de execução: {elapsed_time:.2f} segundos")


emissions: float = tracker.stop()
print("\n\nTotal de emissões (detalhes em emissions.csv): ",emissions)

# ---------------------
# Métricas
acc = accuracy_score(y_true, y_pred)
f1_macro = f1_score(y_true, y_pred, average="macro")
f1_weighted = f1_score(y_true, y_pred, average="weighted")

print("Acurácia:", acc)
print("F1-macro:", f1_macro)
print("F1-weighted:", f1_weighted)

# Relatório detalhado (por classe)
print("\nRelatório de classificação:")
print(classification_report(y_true, y_pred, target_names=LABELS))


[codecarbon INFO @ 14:53:56] [setup] RAM Tracking...
[codecarbon INFO @ 14:53:56] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 14:53:57] CPU Model on constant consumption mode: Intel(R) Core(TM) i9-14900KF
[codecarbon INFO @ 14:53:57] [setup] GPU Tracking...
[codecarbon INFO @ 14:53:57] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 14:53:57] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: cpu_load
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 14:53:57] >>> Tracker's metadata:
[codecarbon INFO @ 14:53:57]   Platform system: Linux-6.14.0-29-generic-x86_64-with-glibc2.39
[codecarbon INFO @ 14:53:57]   Python version: 3.11.5
[codecarbon INFO @ 14:53:57]   CodeCarbon version: 3.0.4
[codecarbon INFO @ 14:53:57]   Available RAM : 125.634 GB
[codecarbon INF

Memória usada antes: 18096.11 MB


Inferindo:   2%|▏         | 2/100 [00:12<09:45,  5.97s/it][codecarbon INFO @ 14:54:16] Energy consumed for RAM : 0.000164 kWh. RAM Power : 38.0 W
[codecarbon INFO @ 14:54:16] Delta energy consumed for CPU with cpu_load : 0.000055 kWh, power : 12.812110952960007 W
[codecarbon INFO @ 14:54:16] Energy consumed for All CPU : 0.000055 kWh
[codecarbon INFO @ 14:54:16] Energy consumed for all GPUs : 0.000669 kWh. Total GPU Power : 150.43673090483685 W
[codecarbon INFO @ 14:54:16] 0.000888 kWh of electricity used since the beginning.
Inferindo:   4%|▍         | 4/100 [00:24<09:55,  6.20s/it][codecarbon INFO @ 14:54:31] Energy consumed for RAM : 0.000317 kWh. RAM Power : 38.0 W
[codecarbon INFO @ 14:54:31] Delta energy consumed for CPU with cpu_load : 0.000052 kWh, power : 12.817127676800002 W
[codecarbon INFO @ 14:54:31] Energy consumed for All CPU : 0.000107 kWh
[codecarbon INFO @ 14:54:31] Energy consumed for all GPUs : 0.001314 kWh. Total GPU Power : 154.98197573858445 W
[codecarbon INFO @ 

Memória atualmente alocada: 18096.11 MB
Máxima memória usada até agora: 22534.59 MB
Memória usada no treinamento: 0.00 MB

Tempo total de execução: 622.28 segundos


[codecarbon INFO @ 15:04:23] Delta energy consumed for CPU with cpu_load : 0.000024 kWh, power : 12.8068105808 W
[codecarbon INFO @ 15:04:23] Energy consumed for All CPU : 0.002143 kWh
[codecarbon INFO @ 15:04:23] Energy consumed for all GPUs : 0.028143 kWh. Total GPU Power : 157.5355158694394 W
[codecarbon INFO @ 15:04:23] 0.036641 kWh of electricity used since the beginning.




Total de emissões (detalhes em emissions.csv):  0.003603576517339924
Acurácia: 0.38
F1-macro: 0.30620155038759694
F1-weighted: 0.2819767441860465

Relatório de classificação:
              precision    recall  f1-score   support

       BAIXA       0.00      0.00      0.00        41
       MÉDIA       0.40      0.67      0.50        43
        ALTA       0.33      0.56      0.42        16

    accuracy                           0.38       100
   macro avg       0.24      0.41      0.31       100
weighted avg       0.22      0.38      0.28       100



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
