In [None]:
!pip install codecarbon # reiniciar apos instalar este pacote se você estiver em um notebook

In [1]:
import os
import pandas as pd
import torch
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM,
)

import pickle
import numpy as np

In [2]:
# ------------------
# Configurações
# ------------------
# MODEL_NAME = "meta-llama/Meta-Llama-3-8B"
MODEL_NAME = 'Qwen/Qwen3-14B-Base'
SEED = 42
LABELS = ["BAIXA", "MÉDIA", "ALTA"]

torch.manual_seed(SEED)
os.environ["TOKENIZERS_PARALLELISM"] = "false"
# torch.utils.checkpoint.use_reentrant = False

# ------------------
# Prompt
# ------------------
PROMPT_TMPL = """You are a thematic relevance evaluator.
Classify how related an academic work (title and abstract) is to a strategic theme.

TITLE: {title}
KEYWORDS: {keywords}
ABSTRACT: {abstract}

Answer with a number 0, 1, or 2 for RELEVANCE LEVEL (2-HIGH, 1-MEDIUM, 0-LOW) to the strategic theme: "{category}".

GENERAL CRITERIA:
- HIGH: the topic is central to the research; strong semantic coherence.
- MEDIUM: partial/indirect or secondary relation to the topic.
- LOW: weak or tangential relation; the topic is not the main focus of the work.

ANSWER: """


In [3]:
with open("my_data.pickle", "rb") as file:
    split_train = pickle.load(file)
    split_eval = pickle.load(file)
    split_test = pickle.load(file)

In [None]:
print( len(split_train) , len(split_eval), len(split_test) )

In [4]:


# ------------------
# Tokenizer
# ------------------
tok = AutoTokenizer.from_pretrained(MODEL_NAME)
if tok.pad_token is None:
    tok.pad_token = tok.eos_token


In [5]:

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    trust_remote_code=True,
    # quantization_config=bnb_config
)
# model = prepare_model_for_kbit_training(raw_model)

print( 'Model loaded in ', model.device)

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.


Model loaded in  cuda:0


In [6]:
import torch

if torch.cuda.is_available():
    print("Dispositivo:", torch.cuda.get_device_name(0))
    print("Suporta bf16?", torch.cuda.is_bf16_supported())
else:
    print("Nenhuma GPU disponível")


Dispositivo: NVIDIA RTX A5000
Suporta bf16? True


In [7]:
from collections import Counter

# pega todos os rótulos
labels = split_test["modelo_nivel"]

# conta quantos de cada
contagem = Counter(labels)

# imprime
for k, v in contagem.items():
    print(f"{k}: {v}")

class_counts = [contagem[x.replace('É','E')] for x in LABELS]
print("Contagem de rótulos:", class_counts)

MEDIA: 1284
BAIXA: 2056
ALTA: 863
Contagem de rótulos: [2056, 1284, 863]


In [8]:
model.device

device(type='cuda', index=0)

In [9]:
# ------------------
# Predict (next token)
# ------------------
import torch
import torch.nn.functional as F

# tokens de interesse
target_tokens = [tok.encode("0", add_special_tokens=False)[0], tok.encode("1", add_special_tokens=False)[0], tok.encode("2", add_special_tokens=False)[0]]

def predict_class(prompt_text):
    # tokeniza
    tokenized = tok(prompt_text, return_tensors="pt")
    input_ids = tokenized.input_ids.cuda()
    attention_mask = tokenized.attention_mask.cuda()

    # forward no modelo (sem generate)
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        # logits do último token
        last_token_logits = outputs.logits[0, -1, :]
        # softmax para probabilidades
        probs = F.softmax(last_token_logits, dim=-1)

    # pegar probabilidades apenas para os tokens 0,1,2
    target_probs = {tok.decode([t]): probs[t].item() for t in target_tokens}

    # escolher token mais provável
    pred_token = max(target_probs, key=target_probs.get)

    return pred_token, target_probs


# Exemplo
example = split_test[0]
processed_key_words = '\n- '.join(example["descricao_keyword"].split(';'))

    # Cria prompt
example_prompt = PROMPT_TMPL.format(
        title=example["nome_producao"],
        abstract=example["descricao_abstract"],
        keywords=processed_key_words,
        category=example["tema"]
    )
print("Predicted class:", predict_class(example_prompt))

Predicted class: ('1', {'0': 0.054772380739450455, '1': 0.680815577507019, '2': 0.22283849120140076})


In [10]:
np.mean([len(x) for x in split_test['descricao_abstract']])
len([len(x) for x in split_test['descricao_abstract'] if len(x) > 5000])

23

In [11]:
from sklearn.metrics import accuracy_score, f1_score, classification_report
from tqdm import tqdm
from codecarbon import EmissionsTracker # para calcular emissões de CO2
import time
tracker = EmissionsTracker( output_file='sem_fine_tuning_emissions.csv' )
tracker.start()

y_true = []
y_pred = []

start_time = time.time()

max_description=5000

# .select(range(30))

for example in tqdm(split_test, desc="Inferindo"):  # percorre todo o dataset de teste
    # ---------------------
    # Prepara prompt
    processed_key_words = '\n- '.join(example["descricao_keyword"].split(';'))
    example_prompt = PROMPT_TMPL.format(
        title=example["nome_producao"],
        abstract=example["descricao_abstract"][:max_description],
        keywords=processed_key_words,
        category=example["tema"]
    )

    # Predição
    pred = predict_class(example_prompt)
    y_pred.append(["0","1","2"].index(pred[0]))

    # Rótulo real
    gold = str(example["modelo_nivel"]).strip().upper()
    if gold == "MEDIA":
        gold = "MÉDIA"
    assert gold in LABELS
    y_true.append(LABELS.index(gold))

# Marca o tempo final
end_time = time.time()

# Tempo total em segundos
elapsed_time = end_time - start_time
print(f"\nTempo total de execução: {elapsed_time:.2f} segundos")


emissions: float = tracker.stop()
print("\n\nTotal de emissões (detalhes em emissions.csv): ",emissions)

# ---------------------
# Métricas
acc = accuracy_score(y_true, y_pred)
f1_macro = f1_score(y_true, y_pred, average="macro")
f1_weighted = f1_score(y_true, y_pred, average="weighted")

print("Acurácia:", acc)
print("F1-macro:", f1_macro)
print("F1-weighted:", f1_weighted)

# Relatório detalhado (por classe)
print("\nRelatório de classificação:")
print(classification_report(y_true, y_pred, target_names=LABELS))


[codecarbon INFO @ 14:19:19] [setup] RAM Tracking...
[codecarbon INFO @ 14:19:19] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at /sys/class/powercap/intel-rapl/subsystem to measure CPU

[codecarbon INFO @ 14:19:20] CPU Model on constant consumption mode: Intel(R) Core(TM) i9-14900KF
[codecarbon INFO @ 14:19:20] [setup] GPU Tracking...
[codecarbon INFO @ 14:19:20] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 14:19:20] The below tracking methods have been set up:
                RAM Tracking Method: RAM power estimation model
                CPU Tracking Method: cpu_load
                GPU Tracking Method: pynvml
            
[codecarbon INFO @ 14:19:20] >>> Tracker's metadata:
[codecarbon INFO @ 14:19:20]   Platform system: Linux-6.9.3-76060903-generic-x86_64-with-glibc2.35
[codecarbon INFO @ 14:19:20]   Python version: 3.11.5
[codecarbon INFO @ 14:19:20]   CodeCarbon version: 3.0.4
[codecarbon INFO @ 14:19:20]   Available RAM : 125.634 GB
[codecarbo


Tempo total de execução: 25839.47 segundos


[codecarbon INFO @ 21:30:03] Delta energy consumed for CPU with cpu_load : 0.000029 kWh, power : 12.841690315519998 W
[codecarbon INFO @ 21:30:03] Energy consumed for All CPU : 0.089078 kWh
[codecarbon INFO @ 21:30:03] Energy consumed for all GPUs : 1.169009 kWh. Total GPU Power : 151.70730116699372 W
[codecarbon INFO @ 21:30:03] 1.521649 kWh of electricity used since the beginning.




Total de emissões (detalhes em emissions.csv):  0.14965111853738577
Acurácia: 0.3497501784439686
F1-macro: 0.32646197652542824
F1-weighted: 0.25467004429838097

Relatório de classificação:
              precision    recall  f1-score   support

       BAIXA       0.98      0.02      0.04      2056
       MÉDIA       0.30      0.68      0.42      1284
        ALTA       0.44      0.64      0.52       863

    accuracy                           0.35      4203
   macro avg       0.57      0.45      0.33      4203
weighted avg       0.66      0.35      0.25      4203

