In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame(data = {"lbl":[[1], [1,0], [0], [4], [3], [3,0]], 
                          "text": ['Đây là bệnh về da',
                                   'Da thâm nổi mụm và tiền ung thư',
                                   'Nhóm bệnh ung thư về sắc tố',
                                   'Cơ xương có thể không phát triển',
                                   'Gan nhiễm mỡ và có dấu hiệu xơ gan',
                                   'Xơ gan giai đoạn tiền phát nguy cơ gây ung thư gan']})
df

Unnamed: 0,lbl,text
0,[1],Đây là bệnh về da
1,"[1, 0]",Da thâm nổi mụm và tiền ung thư
2,[0],Nhóm bệnh ung thư về sắc tố
3,[4],Cơ xương có thể không phát triển
4,[3],Gan nhiễm mỡ và có dấu hiệu xơ gan
5,"[3, 0]",Xơ gan giai đoạn tiền phát nguy cơ gây ung thư...


In [3]:
def binary_cvt(labels, max_val=4):
    zeros_arr = [0]*(max_val+1)
    for label in labels:
        zeros_arr[label] = 1
    return zeros_arr


df['binary_lbl'] = df.lbl.apply(lambda x: binary_cvt(x))
df.head()

Unnamed: 0,lbl,text,binary_lbl
0,[1],Đây là bệnh về da,"[0, 1, 0, 0, 0]"
1,"[1, 0]",Da thâm nổi mụm và tiền ung thư,"[1, 1, 0, 0, 0]"
2,[0],Nhóm bệnh ung thư về sắc tố,"[1, 0, 0, 0, 0]"
3,[4],Cơ xương có thể không phát triển,"[0, 0, 0, 0, 1]"
4,[3],Gan nhiễm mỡ và có dấu hiệu xơ gan,"[0, 0, 0, 1, 0]"


In [14]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class CrossEntropyLossMultiLabel(nn.Module):
    ''' 
    Cross Entropy Loss for Multi-Label Classification
    
    This class defines the Cross Entropy Loss for addressing multi-label classification tasks.
    It uses PyTorch's built-in CrossEntropyLoss, adjusted for multi-label.

    Attributes:
        loss_fn (torch.nn.CrossEntropyLoss): PyTorch Cross Entropy Loss instance.
    '''

    def __init__(self):
        super(CrossEntropyLossMultiLabel, self).__init__()

    def forward(self, logits, labels):
        ''' 
        Forward pass for Cross Entropy Loss.

        Args:
            logits (torch.Tensor): Logits predicted by the model.
            labels (torch.Tensor): True labels.

        Returns:
            torch.Tensor: Computed Cross Entropy Loss.
        '''
        # Apply sigmoid activation to logits for multi-label classification
        logits_sigmoid = torch.sigmoid(logits)

        # Flatten the logits and labels for multi-label loss calculation
        logits_flat = logits_sigmoid.view(-1)
        labels_flat = labels.view(-1)

        # Binary cross entropy loss
        loss = F.binary_cross_entropy(logits_flat, labels_flat)

        return loss
    
class FocalLossMultiLabel(nn.Module):
    ''' 
    Focal Loss for Multi-Label Classification
    
    This class defines the Focal Loss for addressing class imbalance in multi-label classification tasks.
    It introduces a modulating factor (gamma) to down-weight easy samples.

    Attributes:
        gamma (float): Modulating factor for Focal Loss.
    '''

    def __init__(self, gamma=2.0):
        super(FocalLossMultiLabel, self).__init__()
        self.gamma = gamma

    def forward(self, outputs, labels):
        ''' 
        Forward pass for Focal Loss.

        Args:
            outputs (torch.Tensor): Raw outputs from the model.
            labels (torch.Tensor): True labels.

        Returns:
            torch.Tensor: Computed Focal Loss.
        '''
        ce_loss = F.binary_cross_entropy_with_logits(outputs, labels, reduction='none')
        pt = torch.exp(-ce_loss)
        loss = (1 - pt) ** self.gamma * ce_loss
        return loss.mean()
    
class FocalLossWithBatchNormL2MultiLabel(nn.Module):
    ''' 
    Focal Loss with BatchNorm L2 Penalty for Multi-Label Classification
    
    This class defines Focal Loss with an additional BatchNorm L2 penalty for multi-label classification.
    It helps prevent overfitting by penalizing large weights in BatchNorm layers.

    Attributes:
        gamma (float): Modulating factor for Focal Loss.
        beta (float): Coefficient for BatchNorm L2 penalty.
    '''

    def __init__(self, gamma=2.0, beta=1e-4):
        super(FocalLossWithBatchNormL2MultiLabel, self).__init__()
        self.gamma = gamma
        self.beta = beta

    def forward(self, outputs, labels):
        ''' 
        Forward pass for Focal Loss with BatchNorm L2 Penalty.

        Args:
            outputs (torch.Tensor): Raw outputs from the model.
            labels (torch.Tensor): True labels.

        Returns:
            torch.Tensor: Computed Focal Loss with BatchNorm L2 Penalty.
        '''
        ce_loss = F.binary_cross_entropy_with_logits(outputs, labels, reduction='none')
        pt = torch.exp(-ce_loss)
        loss = (1 - pt) ** self.gamma * ce_loss
        return loss.mean() + self.beta * self.batch_norm_l2_penalty()

    def batch_norm_l2_penalty(self):
        ''' 
        Compute BatchNorm L2 Penalty.

        Returns:
            torch.Tensor: L2 penalty for BatchNorm layers.
        '''
        l2_penalty = torch.tensor(0.0, requires_grad=True)
        for module in self.modules():
            if isinstance(module, nn.BatchNorm2d):
                l2_penalty += (module.weight ** 2).sum()
        return l2_penalty
    
class LabelSmoothingLossMultiLabel(nn.Module):
    ''' 
    Label Smoothing Loss for Multi-Label Classification
    
    This class defines the Label Smoothing Loss for addressing multi-label classification tasks.
    It mitigates overconfidence in the model predictions by introducing label smoothing.

    Attributes:
        smoothing (float): Smoothing factor for label smoothing.
    '''

    def __init__(self, smoothing=0.1):
        super(LabelSmoothingLossMultiLabel, self).__init__()
        self.smoothing = smoothing

    def forward(self, outputs, labels):
        ''' 
        Forward pass for Label Smoothing Loss.

        Args:
            outputs (torch.Tensor): Logits predicted by the model.
            labels (torch.Tensor): True labels.

        Returns:
            torch.Tensor: Computed Label Smoothing Loss.
        '''
        sigmoid_outputs = torch.sigmoid(outputs)

        smooth_labels = (1.0 - self.smoothing) * labels + self.smoothing / 2.0
        log_probs = torch.log(sigmoid_outputs)

        loss = -torch.sum(smooth_labels * log_probs + (1.0 - smooth_labels) * torch.log(1.0 - sigmoid_outputs))
        return loss / outputs.size(0)  # Normalize by batch size

In [4]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from torch.utils.data import Dataset, DataLoader
from torch.nn import BCEWithLogitsLoss  # For multi-label classification


2024-02-21 10:56:52.045606: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-02-21 10:56:52.045669: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [15]:
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from torch.utils.data import DataLoader, Dataset
import torch.nn as nn
import torch.optim as optim

# Chuẩn bị dữ liệu
texts = df.text.tolist()

labels = torch.tensor(df.binary_lbl.tolist())

# Token hóa văn bản
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
tokenized_texts = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")

# Tạo dataset
class CustomDataset(Dataset):
    def __init__(self, tokenized_texts, labels):
        self.tokenized_texts = tokenized_texts
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return {'input_ids': self.tokenized_texts['input_ids'][idx], 'attention_mask': self.tokenized_texts['attention_mask'][idx], 'labels': self.labels[idx]}

dataset = CustomDataset(tokenized_texts, labels)

# Tạo DataLoader
batch_size = 2
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Tạo mô hình BERT
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=5)

# Hàm mất mát BCEWithLogitsLoss
# criterion = nn.BCEWithLogitsLoss()
criterion = LabelSmoothingLossMultiLabel()
# criterion = nn.CrossEntropyLoss()

# Tối ưu hóa
optimizer = optim.AdamW(model.parameters(), lr=1e-5)

# Quá trình huấn luyện
num_epochs = 10
for epoch in range(num_epochs):
    for batch in dataloader:
        inputs = batch['input_ids']
        attention_mask = batch['attention_mask']
        labels = batch['labels']

        # Tính logits từ mô hình
        outputs = model(inputs, attention_mask=attention_mask)
        logits = outputs.logits

        # Tính toán loss
        loss = criterion(logits, labels.float())
        print(loss)
        # Backpropagation và tối ưu hóa
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

# Đánh giá mô hình (tương tự trong quá trình huấn luyện)
# ...


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly i

tensor(3.5259, grad_fn=<DivBackward0>)
tensor(3.6424, grad_fn=<DivBackward0>)
tensor(3.2496, grad_fn=<DivBackward0>)
tensor(3.0011, grad_fn=<DivBackward0>)
tensor(3.4814, grad_fn=<DivBackward0>)
tensor(3.2179, grad_fn=<DivBackward0>)
tensor(3.1448, grad_fn=<DivBackward0>)
tensor(3.0556, grad_fn=<DivBackward0>)
tensor(2.9386, grad_fn=<DivBackward0>)
tensor(2.9808, grad_fn=<DivBackward0>)
tensor(2.8337, grad_fn=<DivBackward0>)
tensor(2.7732, grad_fn=<DivBackward0>)
tensor(2.7289, grad_fn=<DivBackward0>)
tensor(2.8315, grad_fn=<DivBackward0>)


KeyboardInterrupt: 