In [None]:
from transformers import Trainer, TrainingArguments
import numpy as np

class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        # Define your correlation matrix c
        # Assume the number of emotions n equals len(emotion_labels)
        n = len(emotion_labels)
        c = np.random.rand(n, n)  # Using a random matrix for example
        c = (c + c.T) / 2  # Symmetrizing the correlation matrix
        c = np.clip(c, 0, 1)  # Clipping values to [0, 1]

        # Extract present and absent emotions from labels
        P = [i for i in range(n) if labels[0][i] == 1]  # Present emotions
        N = [i for i in range(n) if labels[0][i] == 0]  # Absent emotions

        # Convert tensors to be compatible with your loss functions
        P = torch.tensor(P, dtype=torch.long)
        N = torch.tensor(N, dtype=torch.long)

        # Compute combined loss
        loss = combined_loss(labels.float(), logits, torch.tensor(c, dtype=torch.float32), P, N, similarity_function, distance_function, alpha=0.5, beta=0.5)
        return (loss, outputs) if return_outputs else loss

In [None]:
# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="steps",
    save_total_limit=1,
    logging_strategy="epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=20,
    weight_decay=0.01,
    metric_for_best_model="f1",
    load_best_model_at_end=True,
)

trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()

In [None]:
def similarity_function(x, y):
    return F.cosine_similarity(x, y, dim=0)

def distance_function(x, y):
    return -F.cosine_similarity(x, y, dim=0)

In [None]:
import torch.nn.functional as F

def global_loss(h, c):
    n = h.shape[0]
    loss = 0
    for i in range(n):
        for j in range(n):
            if i != j:
                loss += (F.cosine_similarity(h[i], h[j], dim=0) - c[i, j]) ** 2
    return loss / (n * (n - 1))

def local_loss_present_and_absent(P, N, r, S, D, f, f0):
    # Compute inter-group loss
    inter_loss = 0
    for i in N:
        for j in P:
            inter_loss += f[i, j] * S(r[i], r[j])
    inter_loss /= (len(N) * len(P)) if len(N) * len(P) > 0 else 1

    # Compute intra-group loss
    intra_loss = 0
    for i in N:
        for j in N:
            if i != j:
                intra_loss += f0[i, j] * D(r[i], r[j])
    intra_loss /= (len(N) * (len(N) - 1) / 2) if len(N) > 1 else 1

    for i in P:
        for j in P:
            if i != j:
                intra_loss += f0[i, j] * D(-r[i], -r[j])
    intra_loss /= (len(P) * (len(P) - 1) / 2) if len(P) > 1 else 1

    return inter_loss + intra_loss

def combined_loss(y, r, c, P, N, S, D, alpha, beta):
    f = 1 - c  # Decreasing weight for inter-group
    f0 = c     # Increasing weight for intra-group

    inter_loss = local_loss_present_and_absent(P, N, r, S, D, f, f0)
    g_loss = global_loss(r, c)

    return (1 - alpha) * F.binary_cross_entropy_with_logits(r, y) + alpha * inter_loss + beta * g_loss

In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch.nn.functional as F

# Step 1: Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('cardiffnlp/twitter-xlm-roberta-base-sentiment')
model = AutoModelForSequenceClassification.from_pretrained(
    'cardiffnlp/twitter-xlm-roberta-base-sentiment',
    problem_type="multi_label_classification",
    num_labels=len(emotion_labels)  # Assume emotion_labels is defined
)

# Step 2: Define the dataset class
class EmotionDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        label = self.labels[idx]

        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length)
        return {
            'input_ids': torch.tensor(encoding['input_ids']),
            'attention_mask': torch.tensor(encoding['attention_mask']),
            'labels': torch.tensor(label, dtype=torch.float)
        }

# Step 3: Load your dataset
# Assume `train_df` and `dev_df` are preloaded DataFrames containing 'text' and emotion columns
train_texts = train_df['text'].tolist()
train_labels = train_df[emotion_labels].fillna(0).values
dev_texts = dev_df['text'].tolist()
dev_labels = dev_df[emotion_labels].fillna(0).values

train_dataset = EmotionDataset(train_texts, train_labels, tokenizer)
dev_dataset = EmotionDataset(dev_texts, dev_labels, tokenizer)

# Step 4: Define the similarity and distance functions
def similarity_function(x, y):
    return F.cosine_similarity(x, y, dim=0)

def distance_function(x, y):
    return -F.cosine_similarity(x, y, dim=0)

# Step 5: Define the loss functions
def global_loss(h, c):
    n = h.shape[0]
    loss = 0
    for i in range(n):
        for j in range(n):
            if i != j:
                loss += (F.cosine_similarity(h[i], h[j], dim=0) - c[i, j]) ** 2
    return loss / (n * (n - 1))

def local_loss_present_and_absent(P, N, r, S, D, f, f0):
    inter_loss = 0
    for i in N:
        for j in P:
            inter_loss += f[i, j] * S(r[i], r[j])
    inter_loss /= (len(N) * len(P)) if len(N) * len(P) > 0 else 1

    intra_loss = 0
    for i in N:
        for j in N:
            if i != j:
                intra_loss += f0[i, j] * D(r[i], r[j])
    intra_loss /= (len(N) * (len(N) - 1) / 2) if len(N) > 1 else 1

    for i in P:
        for j in P:
            if i != j:
                intra_loss += f0[i, j] * D(-r[i], -r[j])
    intra_loss /= (len(P) * (len(P) - 1) / 2) if len(P) > 1 else 1

    return inter_loss + intra_loss

def combined_loss(y, r, c, P, N, S, D, alpha, beta):
    f = 1 - c  # Decreasing weight for inter-group
    f0 = c     # Increasing weight for intra-group

    inter_loss = local_loss_present_and_absent(P, N, r, S, D, f, f0)
    g_loss = global_loss(r, c)

    return (1 - alpha) * F.binary_cross_entropy_with_logits(r, y) + alpha * inter_loss + beta * g_loss

# Step 6: Define the CustomTrainer
class CustomTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits

        n = len(emotion_labels)
        c = np.random.rand(n, n)
        c = (c + c.T) / 2
        c = np.clip(c, 0, 1)

        P = [i for i in range(n) if labels[0][i] == 1]
        N = [i for i in range(n) if labels[0][i] == 0]

        P = torch.tensor(P, dtype=torch.long)
        N = torch.tensor(N, dtype=torch.long)

        loss = combined_loss(labels.float(), logits, torch.tensor(c, dtype=torch.float32), P, N, similarity_function, distance_function, alpha=0.5, beta=0.5)
        return (loss, outputs) if return_outputs else loss

# Step 7: Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="steps",
    save_total_limit=1,
    logging_strategy="epoch",
    learning_rate=1e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=20,
    weight_decay=0.01,
    metric_for_best_model="f1",
    load_best_model_at_end=True,
)

# Step 8: Create trainer and start training
trainer = CustomTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=dev_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,  # Assume compute_metrics is defined
)

trainer.train()