In [2]:
# Example of a biased text

text = """
The new smartphone model is absolutely stunning and feels like holding the future in your hand. It weighs 175 grams and has a 6.5-inch OLED display. 
I love how smooth the user interface is, making every interaction a delight. The battery capacity is 4,000 mAh, which typically lasts a full day under normal use. In my opinion, the camera produces the most vibrant photos I’ve ever seen on a phone. Official benchmarks show it scores 750,000 points in the standard performance test.
"""

In [3]:
# Bloco 1: Imports e configurações
import nltk
nltk.download('punkt')
nltk.data.path.append('/home/dods/nltk_data')
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize
import pandas as pd
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification, pipeline


[nltk_data] Downloading package punkt to /home/dods/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/dods/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
  from .autonotebook import tqdm as notebook_tqdm
  _torch_pytree._register_pytree_node(
2025-05-18 08:06:45.641328: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-18 08:06:45.730216: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-18 08:06:46.141898: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-05-18 08:06:46.14

In [4]:
# Bloco 2 (corrigido): usar TFAutoModelForSequenceClassification para o modelo TF

model_name = "cffl/bert-base-styleclassification-subjective-neutral"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = TFAutoModelForSequenceClassification.from_pretrained(model_name, from_pt=True)

classifier = pipeline(
    "text-classification",
    model=model,
    tokenizer=tokenizer,
    framework="tf",
    top_k=None
)


2025-05-18 08:06:56.049002: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-05-18 08:06:56.049657: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2256] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertForSequenceClassification: ['bert.embeddings.position_ids']
- This IS expected if you are initializing TFBertForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPr

In [5]:
def split_text_to_df(text: str) -> pd.DataFrame:
    sentences = sent_tokenize(text, language='english')
    return pd.DataFrame({'sentence': sentences})

In [6]:
# Bloco: Função detect_bias ajustada para usar o label exato retornado pela API
def detect_bias(df: pd.DataFrame) -> pd.DataFrame:
    labels = []
    bias_scores = []
    for sent in df['sentence']:
        scores = classifier(sent, top_k=None)           # [{'label':..., 'score':...}, ...]
        best = max(scores, key=lambda x: x['score'])    # escolhe a classe de maior confiança
        labels.append(best['label'])                    # label idêntico ao retornado
        bias_scores.append(best['score'])               # score dessa classe
    df['label'] = labels
    df['bias_score'] = bias_scores
    return df


In [7]:
df_sent = split_text_to_df(text)
df_result = detect_bias(df_sent)
df_result

Unnamed: 0,sentence,label,bias_score
0,\nThe new smartphone model is absolutely stunn...,SUBJECTIVE,0.990301
1,It weighs 175 grams and has a 6.5-inch OLED di...,NEUTRAL,0.971791
2,"I love how smooth the user interface is, makin...",SUBJECTIVE,0.973334
3,"The battery capacity is 4,000 mAh, which typic...",NEUTRAL,0.861991
4,"In my opinion, the camera produces the most vi...",SUBJECTIVE,0.538275
5,"Official benchmarks show it scores 750,000 poi...",NEUTRAL,0.801163


'It weighs 175 grams and has a 6.5-inch OLED display.'