# Subtask 3: Manifestation Identification
Multi-label classification to classify how polarization is expressed, with multiple possible labels including Vilification, Extreme Language, Stereotype, Invalidation, Lack of Empathy, and Dehumanization.

## Imports

In [1]:
import pandas as pd

from sklearn.metrics import recall_score, precision_score, f1_score
import numpy as np

import torch

from sklearn.metrics import f1_score

from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from torch.utils.data import Dataset

In [2]:
import random
import numpy as np
import torch

SEED = 42

# Python
random.seed(SEED)

# Numpy
np.random.seed(SEED)

# PyTorch (CPU)
torch.manual_seed(SEED)

# PyTorch (GPU)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)  # if you have multiple GPUs

# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
import wandb

# Disable wandb logging for this script
wandb.init(mode="disabled")

## Data Import

In [4]:
# Load the training and validation data for subtask 1

train = pd.read_csv('train/eng.csv')
train_plus = pd.read_csv('eng_BT_full.csv')

N = int(len(train) * 0.0)   # last 20%  (you can change this)


train.head()

Unnamed: 0,id,text,stereotype,vilification,dehumanization,extreme_language,lack_of_empathy,invalidation
0,eng_973938b90b0ff5d87d35a582f83f5c89,is defending imperialism in the dnd chat,0,0,0,0,0,0
1,eng_07dfd4600426caca6e2c5883fcbea9ea,Still playing with this. I am now following Ra...,0,0,0,0,0,0
2,eng_f14519ff2302b6cd47712073f13bc461,.senate.gov Theres 3 groups out there Republic...,0,0,0,0,0,0
3,eng_e48b7e7542faafa544ac57b64bc80daf,"""ABC MD, David Anderson, said the additional f...",0,0,0,0,0,0
4,eng_7c581fb77bce8033aeba3d6dbd6273eb,"""bad people"" I have some conservative values s...",0,0,0,0,0,0


In [5]:
labels = train.columns[2:]

In [6]:
labels

Index(['stereotype', 'vilification', 'dehumanization', 'extreme_language',
       'lack_of_empathy', 'invalidation'],
      dtype='object')

# Dataset
-  Create a pytorch class for handling data
-  Wrapping the raw texts and labels into a format that Huggingface’s Trainer can use for training and evaluation

In [7]:
# Fix the dataset class by inheriting from torch.utils.data.Dataset
class PolarizationDataset(torch.utils.data.Dataset):
  def __init__(self,texts,labels,tokenizer,max_length =128):
    self.texts=texts
    self.labels=labels
    self.tokenizer= tokenizer
    self.max_length = max_length # Store max_length

  def __len__(self):
    return len(self.texts)

  def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]
        encoding = self.tokenizer(text, truncation=True, padding=False, max_length=self.max_length, return_tensors='pt')

        # Ensure consistent tensor conversion for all items
        item = {key: encoding[key].squeeze() for key in encoding.keys()}
        # CHANGE THIS LINE: Use torch.float instead of torch.long for multi-label classification
        item['labels'] = torch.tensor(label, dtype=torch.float)
        return item

Now, we'll tokenize the text data and create the datasets using `bert-base-uncased` as the tokenizer.

In [8]:
# Load the tokenizer
MODEL_NAME = 'distilroberta-base'

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

## Base-line from here

In [9]:
# Load the tokenizer
MODEL_NAME = 'distilroberta-base'
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

## Data processing

In [10]:
# import matplotlib.pyplot as plt

# for column in train[labels]:
#     counts = train[column].value_counts()

#     plt.figure()
#     plt.pie(
#         counts,
#         labels=counts.index.astype(str),
#         autopct="%1.1f%%"
#     )
#     plt.title(f"Distribution of values in '{column}'")
#     plt.show()


In [11]:
# Add the labels to the translated
train_plus = train_plus.drop(columns=["text"])
train_plus[['stereotype', 'vilification', 'dehumanization', 'extreme_language',
       'lack_of_empathy', 'invalidation']] = train[['stereotype', 'vilification', 'dehumanization', 'extreme_language',
       'lack_of_empathy', 'invalidation']].values

train_plus.head()

Unnamed: 0,id,political,racial/ethnic,religious,gender/sexual,other,back_translated,stereotype,vilification,dehumanization,extreme_language,lack_of_empathy,invalidation
0,eng_973938b90b0ff5d87d35a582f83f5c89,0,0,0,0,0,defends imperialism in the dnd cat,0,0,0,0,0,0
1,eng_07dfd4600426caca6e2c5883fcbea9ea,0,0,0,0,0,I'm Rachel Maddie from msnbc.,0,0,0,0,0,0
2,eng_f14519ff2302b6cd47712073f13bc461,0,0,0,0,0,".senate.gov Theres 3 groups there Republicans,...",0,0,0,0,0,0
3,eng_e48b7e7542faafa544ac57b64bc80daf,0,0,0,0,0,"""Continuing investment in ABC is essential to ...",0,0,0,0,0,0
4,eng_7c581fb77bce8033aeba3d6dbd6273eb,0,0,0,0,0,I have conservative values and that makes me o...,0,0,0,0,0,0


In [12]:
train_plus = train_plus.drop(columns=['id', 'political', 'religious', 'gender/sexual', 'other'])
train_plus.head()

Unnamed: 0,racial/ethnic,back_translated,stereotype,vilification,dehumanization,extreme_language,lack_of_empathy,invalidation
0,0,defends imperialism in the dnd cat,0,0,0,0,0,0
1,0,I'm Rachel Maddie from msnbc.,0,0,0,0,0,0
2,0,".senate.gov Theres 3 groups there Republicans,...",0,0,0,0,0,0
3,0,"""Continuing investment in ABC is essential to ...",0,0,0,0,0,0
4,0,I have conservative values and that makes me o...,0,0,0,0,0,0


In [13]:
train.shape

(3222, 8)

In [14]:
val = train.tail(300).reset_index(drop=True)
# train = train.iloc[:-N].reset_index(drop=True)

# Sample some from the translation
back_translated = train_plus[['back_translated', 'stereotype', 'vilification', 'dehumanization', 'extreme_language',
       'lack_of_empathy', 'invalidation']]

sample_size = 1000
back_translated_sampled = back_translated.sample(n=sample_size)
back_translated_sampled = back_translated_sampled.rename(columns={"back_translated": "text"})

train_final = pd.concat([train, back_translated_sampled], axis=0).reset_index(drop=True)

### The shape of the definitive training dataset

In [15]:
train.shape

(3222, 8)

In [16]:
train_final.shape

(4222, 8)

In [17]:
# Create train and Test dataset for multilabel
train_dataset = PolarizationDataset(
    train_final['text'].tolist(),
    train_final[labels].values.tolist(),
    tokenizer
)
val_set = PolarizationDataset(
    val['text'].tolist(),
    val[labels].values.tolist(),
    tokenizer
)

In [18]:
# Load the model
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=6,
    problem_type="multi_label_classification",
    # local_files_only=True, # Only if the model is downloaded locally
) # use 6 labels

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at distilroberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
# Load into the GPU for speed
model.to('cuda')

RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-5): 6 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
           

In [20]:
# Define training arguments
training_args = TrainingArguments(
    output_dir=f"./",
    num_train_epochs=4,
    learning_rate=2e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="no",
    logging_steps=100,
    disable_tqdm=False
)


def compute_metrics_multilabel(p):
    # Sigmoid the predictions to get probabilities
    probs = torch.sigmoid(torch.from_numpy(p.predictions)).numpy()  # shape: (batch_size, num_labels)

    # Define custom thresholds for each label
    # thresholds = [0.357, 0.087, 0.035, 0.022, 0.039]  # adjust per label
    thresholds = [0.3, 0.3, 0.25, 0.25, 0.25, 0.25]

    # Convert probabilities to predicted labels using thresholds
    preds = np.zeros_like(probs, dtype=int)
    for i, thr in enumerate(thresholds):
        preds[:, i] = (probs[:, i] > thr).astype(int)

    # Compute macro F1 score
    return {'f1_macro': f1_score(p.label_ids, preds, average='macro')}

In [21]:
# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_set,
    compute_metrics=compute_metrics_multilabel,  # Use the new metrics function
    data_collator=DataCollatorWithPadding(tokenizer)
)

# Train the model
trainer.train()

# Evaluate the model on the validation set
eval_results = trainer.evaluate()
print(f"Macro F1 score on validation set for Subtask 3: {eval_results['eval_f1_macro']}")

Epoch,Training Loss,Validation Loss,F1 Macro
1,0.4675,0.31666,0.237897
2,0.3678,0.348443,0.246378
3,0.3628,0.366359,0.266551
4,0.3183,0.310997,0.283614


Macro F1 score on validation set for Subtask 3: 0.28361395699496184


In [22]:
eval_results

{'eval_loss': 0.3109973669052124,
 'eval_f1_macro': 0.28361395699496184,
 'eval_runtime': 0.8414,
 'eval_samples_per_second': 356.546,
 'eval_steps_per_second': 45.162,
 'epoch': 4.0}

## Process the output dataset into a .csv

In [23]:
# Load the dev data (only has 'id' and 'text' columns)
dev = pd.read_csv('dev/eng.csv')

# Create a dataset for dev (without labels)
class PolarizationDatasetTest(torch.utils.data.Dataset):
    def __init__(self, texts, tokenizer, max_length=128):
        self.texts = texts
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        encoding = self.tokenizer(text, truncation=True, padding=False,
                                   max_length=self.max_length, return_tensors='pt')
        item = {key: encoding[key].squeeze() for key in encoding.keys()}
        return item

# Create the test dataset
test_dataset = PolarizationDatasetTest(dev['text'].tolist(), tokenizer)

# Get predictions
predictions = trainer.predict(test_dataset)

# Apply sigmoid and convert to numpy
probs = (torch.from_numpy(predictions.predictions)).numpy()

thresholds = [0.3, 0.3, 0.25, 0.15, 0.15, 0.15]

# Create prediction array (numpy)
preds = np.zeros_like(probs, dtype=int)

# Apply per-label thresholds
for i, thr in enumerate(thresholds):
    preds[:, i] = (probs[:, i] > thr).astype(int)

# Create submission dataframe
submission = pd.DataFrame({
    'id': dev['id'],
    'stereotype': preds[:, 0],
    'vilification': preds[:, 1],
    'dehumanization': preds[:, 2],
    'extreme_language': preds[:, 3],
    'lack_of_empathy': preds[:, 4],
    'invalidation': preds[:, 5],
})


# Save to CSV
submission.to_csv('pred_eng.csv', index=False)
print("Predictions saved to predictions.csv")
print(submission.head())

Predictions saved to predictions.csv
                                     id  stereotype  vilification  \
0  eng_f66ca14d60851371f9720aaf4ccd9b58           0             0   
1  eng_3a489aa7fed9726aa8d3d4fe74c57efb           0             0   
2  eng_95770ff547ea5e48b0be00f385986483           0             0   
3  eng_2048ae6f9aa261c48e6d777bcc5b38bf           0             0   
4  eng_07781aa88e61e7c0a996abd1e5ea3a20           0             0   

   dehumanization  extreme_language  lack_of_empathy  invalidation  
0               0                 0                0             0  
1               0                 0                0             0  
2               0                 0                0             0  
3               0                 0                0             0  
4               0                 0                0             0  


In [24]:
labels

Index(['stereotype', 'vilification', 'dehumanization', 'extreme_language',
       'lack_of_empathy', 'invalidation'],
      dtype='object')