In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.auto import tqdm

import torch
import torch.nn as nn
from transformers import AutoModel
from logs import log

In [None]:
import wandb

# Disable wandb logging for this script
wandb.init(mode="disabled")

# CONFIG
NUM_TYPES = 5
NUM_MANIFESTATIONS = 6
datasets_merge = False
lang = "eng"
trial_id = "000002"
model_names = ['bert-base-uncased', "UBC-NLP/MARBERTv2", "microsoft/deberta-v3-base"]
model_name = model_names[2]

In [None]:
train_1 = pd.read_csv("./dev_phase/subtask1/train/" + lang + ".csv")
train_2 = pd.read_csv("./dev_phase/subtask2/train/" + lang + ".csv")
train_3 = pd.read_csv("./dev_phase/subtask3/train/" + lang + ".csv")
dev_df = pd.read_csv("./dev_phase/subtask1/dev/" + lang + ".csv")

In [None]:
import pandas as pd

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np

import torch

from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments,
    DataCollatorWithPadding
)
from torch.utils.data import Dataset
from tqdm.auto import tqdm

In [None]:
class PolarizationDataset(torch.utils.data.Dataset):
  def __init__(self,texts,labels,tokenizer,max_length =128):
    self.texts=texts
    self.labels=labels
    self.tokenizer= tokenizer
    self.max_length = max_length # Store max_length

  def __len__(self):
    return len(self.texts)

  def __getitem__(self,idx):
    text=self.texts[idx]
    label=self.labels[idx]
    encoding=self.tokenizer(text,truncation=True,padding=False,max_length=self.max_length,return_tensors='pt')

    # Ensure consistent tensor conversion for all items
    item = {key: encoding[key].squeeze() for key in encoding.keys()}
    item['labels'] = torch.tensor(label, dtype=torch.float)
    return item

In [None]:
from sklearn.model_selection import train_test_split
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

train_datasets = []
val_datasets = []

# Prepare label columns separately for each task, fallback to the correct columns per train DataFrame
def get_label_columns(df):
    return [col for col in df.columns if col not in ['id', 'text']]

# Split indices once and reuse for all datasets to ensure same split
n_samples = len(train_1)
indices = np.arange(n_samples)
train_indices, val_indices = train_test_split(
    indices,
    test_size=0.2,
    random_state=42
)

if datasets_merge:
    # Merge all datasets on 'id'
    merged = train_1.merge(train_2, on=['id', 'text'], how='outer', suffixes=('_1', '_2'))
    # For the third, avoid duplicate columns of 'text', so drop redundant one, or merge only on id
    merged = merged.merge(train_3, on=['id', 'text'], how='outer', suffixes=('', '_3'))
    # Get label columns: all columns excluding 'id' and 'text'
    merged_label_columns = get_label_columns(merged)
    texts = merged['text'].tolist()
    labels = merged[merged_label_columns].values.tolist()
    texts_train = [texts[i] for i in train_indices]
    texts_val = [texts[i] for i in val_indices]
    labels_train = [labels[i] for i in train_indices]
    labels_val = [labels[i] for i in val_indices]
    train_dataset = PolarizationDataset(texts_train, labels_train, tokenizer)
    val_dataset = PolarizationDataset(texts_val, labels_val, tokenizer)
else:
    # Apply the same split to all three datasets
    for train in [train_1, train_2, train_3]:
        current_label_columns = get_label_columns(train)
        texts = train['text'].tolist()
        
        # Use the same indices for all datasets
        texts_train = [texts[i] for i in train_indices]
        texts_val = [texts[i] for i in val_indices]
        
        if current_label_columns:
            labels = train[current_label_columns].values.tolist()
            labels_train = [labels[i] for i in train_indices]
            labels_val = [labels[i] for i in val_indices]
        else:
            labels_train = [[] for _ in texts_train]
            labels_val = [[] for _ in texts_val]
        
        train_datasets.append(PolarizationDataset(texts_train, labels_train, tokenizer))
        val_datasets.append(PolarizationDataset(texts_val, labels_val, tokenizer))



In [7]:
class GatedMTLModel(nn.Module):
    def __init__(self, model_name, num_types, num_manifestations):
        super().__init__()
        self.encoder = AutoModel.from_pretrained(model_name)
        hidden_size = self.encoder.config.hidden_size

        self.num_types = num_types
        self.num_manifestations = num_manifestations

        self.head1 = nn.Linear(hidden_size, 1)
        self.head2 = nn.Linear(hidden_size, num_types)
        self.head3 = nn.Linear(hidden_size, num_manifestations)

    def forward(self, input_ids, attention_mask=None, token_type_ids=None, labels=None):
        outputs = self.encoder(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
        )

        H = outputs.last_hidden_state[:, 0, :]

        logits1 = self.head1(H)
        gate = torch.sigmoid(logits1)
        H_gated = H * gate

        logits2 = self.head2(H_gated)
        logits3 = self.head3(H_gated)

        logits = torch.cat([logits1, logits2, logits3], dim=-1)

        loss = None
        if labels is not None:
            labels = labels.float()
            loss_fct = nn.BCEWithLogitsLoss()

            y1_true = labels[:, :1]
            y2_true = labels[:, 1:1 + self.num_types]
            y3_true = labels[:, 1 + self.num_types:]

            loss1 = loss_fct(logits1, y1_true)
            loss2 = loss_fct(logits2, y2_true)
            loss3 = loss_fct(logits3, y3_true)

            loss = (loss1 + loss2 + loss3) / 3.0

        return {
            "loss": loss,
            "logits": logits,
            "polarization_logits": logits1,
            "types_logits": logits2,
            "manifestations_logits": logits3,
        }

In [8]:
model = GatedMTLModel(model_name, NUM_TYPES, NUM_MANIFESTATIONS)

def compute_metrics(eval_pred):
    logits = eval_pred.predictions
    labels = eval_pred.label_ids

    if isinstance(logits, tuple):
        logits = logits[0]

    probs = 1 / (1 + np.exp(-logits))
    preds = (probs >= 0.5).astype(int)
    labels = labels.astype(int)

    y1_true = labels[:, 0]
    y1_pred = preds[:, 0]

    y2_true = labels[:, 1:1+NUM_TYPES]
    y2_pred = preds[:, 1:1+NUM_TYPES]

    y3_true = labels[:, 1+NUM_TYPES:]
    y3_pred = preds[:, 1+NUM_TYPES:]

    return {
        "subtask_1/accuracy": accuracy_score(y1_true, y1_pred),
        "subtask_1/f1_binary": f1_score(y1_true, y1_pred, average="binary", zero_division=0),
        "subtask_1/f1_macro": f1_score(y1_true, y1_pred, average="macro", zero_division=0),
        "subtask_1/f1_micro": f1_score(y1_true, y1_pred, average="micro", zero_division=0),

        "subtask_2/f1_macro": f1_score(y2_true, y2_pred, average="macro", zero_division=0),
        "subtask_2/f1_micro": f1_score(y2_true, y2_pred, average="micro", zero_division=0),

        "subtask_3/f1_macro": f1_score(y3_true, y3_pred, average="macro", zero_division=0),
        "subtask_3/f1_micro": f1_score(y3_true, y3_pred, average="micro", zero_division=0),
    }

# Define training arguments
training_args = TrainingArguments(
        output_dir=f"./",
        num_train_epochs=10,
        learning_rate=2e-5,
        per_device_train_batch_size=64,
        per_device_eval_batch_size=8,
        eval_strategy="epoch",
        save_strategy="no",
        logging_steps=len(train_dataset) // 64,
        disable_tqdm=False
    )

pytorch_model.bin:   0%|          | 0.00/371M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/371M [00:00<?, ?B/s]

In [9]:
# Initialize the Trainer
trainer = Trainer(
    model=model,                         # the instantiated ðŸ¤— Transformers model to be trained
    args=training_args,                  # training arguments, defined above
    train_dataset=train_dataset,         # training dataset
    eval_dataset=val_dataset,            # evaluation dataset
    compute_metrics=compute_metrics,     # the callback that computes metrics of interest
    data_collator=DataCollatorWithPadding(tokenizer) # Data collator for dynamic padding
)

# Train the model
trainer.train()

# Evaluate the model on the validation set
eval_results = trainer.evaluate()
print(
    "Validation Results:",
    f"\nsubtask_1 accuracy: {eval_results['eval_subtask_1/accuracy']:.4f}",
    f"\nsubtask_1 f1_binary: {eval_results['eval_subtask_1/f1_binary']:.4f}",
    f"\nsubtask_1 f1_macro: {eval_results['eval_subtask_1/f1_macro']:.4f}",
    f"\nsubtask_1 f1_micro: {eval_results['eval_subtask_1/f1_micro']:.4f}",
    f"\nsubtask_2 f1_macro: {eval_results['eval_subtask_2/f1_macro']:.4f}",
    f"\nsubtask_2 f1_micro: {eval_results['eval_subtask_2/f1_micro']:.4f}",
    f"\nsubtask_3 f1_macro: {eval_results['eval_subtask_3/f1_macro']:.4f}",
    f"\nsubtask_3 f1_micro: {eval_results['eval_subtask_3/f1_micro']:.4f}",
)

Epoch,Training Loss,Validation Loss,Subtask 1/accuracy,Subtask 1/f1 Binary,Subtask 1/f1 Macro,Subtask 1/f1 Micro,Subtask 2/f1 Macro,Subtask 2/f1 Micro,Subtask 3/f1 Macro,Subtask 3/f1 Micro
1,0.5806,0.463416,0.779845,0.710204,0.766352,0.779845,0.0,0.0,0.0,0.0
2,0.4486,0.431845,0.787597,0.72211,0.775107,0.787597,0.14,0.547486,0.188813,0.369892
3,0.3952,0.460585,0.747287,0.675944,0.734414,0.747287,0.137313,0.549488,0.241772,0.423529
4,0.3746,0.481673,0.753488,0.681363,0.740176,0.753488,0.137313,0.549488,0.247989,0.424188
5,0.3469,0.419,0.793798,0.671605,0.760661,0.793798,0.132283,0.506024,0.34925,0.462701
6,0.3282,0.42745,0.792248,0.665,0.757219,0.792248,0.132984,0.509018,0.35612,0.462523
7,0.3177,0.434216,0.782946,0.663462,0.751639,0.782946,0.131633,0.506876,0.346226,0.459165
8,0.306,0.466363,0.778295,0.685714,0.757228,0.778295,0.135198,0.531136,0.372141,0.468156
9,0.2999,0.470431,0.775194,0.679912,0.753337,0.775194,0.134906,0.528651,0.354561,0.448306
10,0.2967,0.478152,0.775194,0.681319,0.753833,0.775194,0.140043,0.526316,0.354242,0.444255


Validation Results: 
subtask_1 accuracy: 0.7752 
subtask_1 f1_binary: 0.6813 
subtask_1 f1_macro: 0.7538 
subtask_1 f1_micro: 0.7752 
subtask_2 f1_macro: 0.1400 
subtask_2 f1_micro: 0.5263 
subtask_3 f1_macro: 0.3542 
subtask_3 f1_micro: 0.4443


# Log Metrics

In [10]:
# Log the experiment results - each subtask separately


# Prepare metadata for the experiment
experiment_metadata = {
    "approach": "MTL_Gated",
    f"model_{lang}": model_name,
    "learning_rate": training_args.learning_rate,
    "num_train_epochs": training_args.num_train_epochs,
    "per_device_train_batch_size": training_args.per_device_train_batch_size,
    "per_device_eval_batch_size": training_args.per_device_eval_batch_size,
    "num_types": NUM_TYPES,
    "num_manifestations": NUM_MANIFESTATIONS,
    "datasets_merge": datasets_merge,
}

# Extract metrics for each subtask
subtask_1_results = {
    "eval_loss": eval_results.get("eval_loss"),
    "eval_accuracy": eval_results.get("eval_subtask_1/accuracy"),
    "eval_f1_binary": eval_results.get("eval_subtask_1/f1_binary"),
    "eval_f1_macro": eval_results.get("eval_subtask_1/f1_macro"),
    "eval_f1_micro": eval_results.get("eval_subtask_1/f1_micro"),
    "eval_runtime": eval_results.get("eval_runtime"),
    "eval_samples_per_second": eval_results.get("eval_samples_per_second"),
    "eval_steps_per_second": eval_results.get("eval_steps_per_second"),
    "epoch": eval_results.get("epoch")
}

subtask_2_results = {
    "eval_loss": eval_results.get("eval_loss"),
    "eval_f1_macro": eval_results.get("eval_subtask_2/f1_macro"),
    "eval_f1_micro": eval_results.get("eval_subtask_2/f1_micro"),
    "eval_runtime": eval_results.get("eval_runtime"),
    "eval_samples_per_second": eval_results.get("eval_samples_per_second"),
    "eval_steps_per_second": eval_results.get("eval_steps_per_second"),
    "epoch": eval_results.get("epoch")
}

subtask_3_results = {
    "eval_loss": eval_results.get("eval_loss"),
    "eval_f1_macro": eval_results.get("eval_subtask_3/f1_macro"),
    "eval_f1_micro": eval_results.get("eval_subtask_3/f1_micro"),
    "eval_runtime": eval_results.get("eval_runtime"),
    "eval_samples_per_second": eval_results.get("eval_samples_per_second"),
    "eval_steps_per_second": eval_results.get("eval_steps_per_second"),
    "epoch": eval_results.get("epoch")
}

# To respect pre-existing metadata, update it INSTEAD of replacing it
import json

# Attempt to load existing logs and merge metadata for this trial if present
existing_metadata = {}
try:
    with open("logs.json", "r", encoding="utf-8") as f:
        logs = json.load(f)
        if isinstance(logs, dict):
            logs = [logs]
        for trial in logs:
            if trial.get("trial_id") == trial_id and "metadata" in trial:
                existing_metadata = trial["metadata"].copy()
                break
except (FileNotFoundError, json.JSONDecodeError):
    pass

# Only add/replace model_{lang}, don't overwrite the whole metadata
merged_metadata = dict(existing_metadata)
merged_metadata.update({
    f"model_{lang}": model_name,
    "approach": experiment_metadata["approach"],
    "learning_rate": experiment_metadata["learning_rate"],
    "num_train_epochs": experiment_metadata["num_train_epochs"],
    "per_device_train_batch_size": experiment_metadata["per_device_train_batch_size"],
    "per_device_eval_batch_size": experiment_metadata["per_device_eval_batch_size"],
    "num_types": experiment_metadata["num_types"],
    "num_manifestations": experiment_metadata["num_manifestations"],
    "datasets_merge": experiment_metadata["datasets_merge"]
})

log(
    subtask_name="subtask_1",
    language=lang,
    eval_results=subtask_1_results,
    metadata=merged_metadata,
    trial_id=trial_id
)

# Log subtask_2 and subtask_3 using the same trial_id and do not pass metadata to avoid overwrite
log(
    subtask_name="subtask_2",
    language=lang,
    eval_results=subtask_2_results,
    metadata=None,  # Don't overwrite metadata
    trial_id=trial_id
)

log(
    subtask_name="subtask_3",
    language=lang,
    eval_results=subtask_3_results,
    metadata=None,  # Don't overwrite metadata
    trial_id=trial_id
)

print(f"\nâœ“ Experiment results logged to logs.json (trial_id: {trial_id})")
print(f"  - subtask_1: {lang}")
print(f"  - subtask_2: {lang}")
print(f"  - subtask_3: {lang}")


âœ“ Experiment results logged to logs.json (trial_id: 000002)
  - subtask_1: eng
  - subtask_2: eng
  - subtask_3: eng


# Predict on the dev set

In [11]:
# Load dev 1 and predict all 3 dev sets
dev_1 = pd.read_csv(f"./dev_phase/subtask1/dev/{lang}.csv")
dev_2 = pd.read_csv(f"./dev_phase/subtask2/dev/{lang}.csv")
dev_3 = pd.read_csv(f"./dev_phase/subtask3/dev/{lang}.csv")

# Create dataset from dev 1 texts (all dev sets have same texts)
dev_texts = dev_1['text'].tolist()
dev_dataset = PolarizationDataset(dev_texts, [[0]*12]*len(dev_texts), tokenizer)

# Predict
predictions = trainer.predict(dev_dataset)
logits = predictions.predictions
if isinstance(logits, tuple):
    logits = logits[0]
probs = 1 / (1 + np.exp(-logits))
preds = (probs >= 0.5).astype(int)

# Extract predictions for each subtask
polarization_preds = preds[:, 0]
types_preds = preds[:, 1:1+NUM_TYPES]
manifestations_preds = preds[:, 1+NUM_TYPES:]

# Create output DataFrames
output_1 = dev_1[['id', 'text']].copy()
output_1['polarization'] = polarization_preds

output_2 = dev_2[['id', 'text']].copy()
type_cols = [col for col in dev_2.columns if col not in ['id', 'text']]
for i, col in enumerate(type_cols):
    output_2[col] = types_preds[:, i]

output_3 = dev_3[['id', 'text']].copy()
manifest_cols = [col for col in dev_3.columns if col not in ['id', 'text']]
for i, col in enumerate(manifest_cols):
    output_3[col] = manifestations_preds[:, i]


# Drop the 'text' column before saving
output_1 = output_1.drop(columns=['text'])
output_2 = output_2.drop(columns=['text'])
output_3 = output_3.drop(columns=['text'])

# Create dir under results with trial_id
import os
os.makedirs(f"./results/{trial_id}", exist_ok=True)
# Create 3 dirs under it: subtask_1, subtask_2, subtask_3
os.makedirs(f"./results/{trial_id}/subtask_1", exist_ok=True)
os.makedirs(f"./results/{trial_id}/subtask_2", exist_ok=True)
os.makedirs(f"./results/{trial_id}/subtask_3", exist_ok=True)


# Save predictions to subtask_ directories, using the lang parameter instead of hard coding
output_1.to_csv(f"./results/{trial_id}/subtask_1/pred_{lang}.csv", index=False)
output_2.to_csv(f"./results/{trial_id}/subtask_2/pred_{lang}.csv", index=False)
output_3.to_csv(f"./results/{trial_id}/subtask_3/pred_{lang}.csv", index=False)

print(f"Predictions saved for all 3 dev sets")

Predictions saved for all 3 dev sets
