In [1]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Collecting pyarrow>=21.0.0 (from datasets>=2.0.0->evaluate)
  Downloading pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (3.2 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl (47.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m38.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyarrow, evaluate
  Attempting uninstall: pyarrow
    Found existing installation: pyarrow 19.0.1
    Uninstalling pyarrow-19.0.1:
      Successfully uninstalled pyarrow-19.0.1
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency confl

In [2]:
import torch.nn as nn

class SupConLoss(nn.Module):
    """Supervised Contrastive Learning Loss"""

    def __init__(self, temperature=0.07, contrast_mode='all', base_temperature=0.07):
        super(SupConLoss, self).__init__()
        self.temperature = temperature
        self.contrast_mode = contrast_mode
        self.base_temperature = base_temperature

    def forward(self, features, labels=None):
        """
        Args:
            features: [bsz, hidden_dim]
            labels: [bsz]
        """
        device = features.device
        features = nn.functional.normalize(features, dim=1)

        batch_size = features.shape[0]
        if batch_size < 2:
            return torch.tensor(0.0, device=device)

        if labels is not None:
            labels = labels.contiguous().view(-1, 1)
            mask = torch.eq(labels, labels.T).float().to(device)
        else:
            mask = torch.eye(batch_size, dtype=torch.float32).to(device)

        # Compute similarity
        anchor_dot_contrast = torch.div(
            torch.matmul(features, features.T),
            self.temperature
        )

        logits_max, _ = torch.max(anchor_dot_contrast, dim=1, keepdim=True)
        logits = anchor_dot_contrast - logits_max.detach()

        # Mask out self-contrast
        logits_mask = torch.scatter(
            torch.ones_like(mask),
            1,
            torch.arange(batch_size).view(-1, 1).to(device),
            0
        )
        mask = mask * logits_mask

        # Compute loss
        exp_logits = torch.exp(logits) * logits_mask
        log_prob = logits - torch.log(exp_logits.sum(1, keepdim=True) + 1e-12)

        mask_sum = torch.clamp(mask.sum(1), min=1.0)
        mean_log_prob_pos = (mask * log_prob).sum(1) / mask_sum

        loss = -(self.temperature / self.base_temperature) * mean_log_prob_pos
        return loss.mean()

In [3]:
import os
import torch
import torch.nn as nn
import sys
import logging
import evaluate

import pandas as pd
import numpy as np

import gc
from typing import Optional, Tuple, Union
from dataclasses import dataclass

from transformers import AutoTokenizer, AutoModel, AutoConfig, DataCollatorWithPadding
from transformers import Trainer, TrainingArguments, training_args
from transformers import BertPreTrainedModel, BertModel, PreTrainedModel
from transformers.modeling_outputs import SequenceClassifierOutput
from transformers import AutoModelForSequenceClassification

from datasets import Dataset
from sklearn.model_selection import train_test_split

2025-11-02 10:44:48.646728: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762080288.821092      20 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762080288.872503      20 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
train = pd.read_csv("/kaggle/input/corpus-imdb/labeledTrainData.tsv", header=0, delimiter="\t", quoting=3)
test = pd.read_csv("/kaggle/input/corpus-imdb/testData.tsv", header=0, delimiter="\t", quoting=3)

In [5]:
program = os.path.basename(sys.argv[0])
logger = logging.getLogger(program)

logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')
logging.root.setLevel(level=logging.INFO)
logger.info(r"running %s" % ''.join(sys.argv))

In [6]:
train, val = train_test_split(train, test_size=.2)

train_dict = {'label': train["sentiment"].values, 'text': train['review'].values}
val_dict = {'label': val["sentiment"].values, 'text': val['review'].values}
test_dict = {"text": test['review'].values}

train_dataset = Dataset.from_dict(train_dict)
val_dataset = Dataset.from_dict(val_dict)
test_dataset = Dataset.from_dict(test_dict)

In [7]:
class BertWithSCL(PreTrainedModel):
    def __init__(self, config, alpha=0.2):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.config = config
        self.alpha = alpha

        # 加载 BERT backbone
        self.bert = AutoModel.from_config(config)

        # Classifier
        classifier_dropout = (
            config.classifier_dropout
            if hasattr(config, 'classifier_dropout') and config.classifier_dropout is not None
            else config.hidden_dropout_prob
        )
        self.dropout = nn.Dropout(classifier_dropout)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)

        # SCL loss
        self.scl_loss_fct = SupConLoss()

        # Initialize weights
        self.post_init()

    def forward(
            self,
            input_ids: Optional[torch.Tensor] = None,
            attention_mask: Optional[torch.Tensor] = None,
            token_type_ids: Optional[torch.Tensor] = None,
            position_ids: Optional[torch.Tensor] = None,
            head_mask: Optional[torch.Tensor] = None,
            inputs_embeds: Optional[torch.Tensor] = None,
            labels: Optional[torch.Tensor] = None,
            output_attentions: Optional[bool] = None,
            output_hidden_states: Optional[bool] = None,
            return_dict: Optional[bool] = None,
    ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]:

        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=self.training,
            return_dict=return_dict,
        )

        # 获取 [CLS] token 的表示
        pooled_output = outputs[1]  # pooler_output

        # Dropout + Classifier
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        # 计算 loss
        loss = None
        if labels is not None:
            # Cross-entropy loss
            loss_fct = nn.CrossEntropyLoss()
            ce_loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

            if self.training and labels.size(0) >= 2:
                
                last_hidden_state = outputs.hidden_states[-1]  # [bsz, seq_len, hidden_dim]
                cls_features = last_hidden_state[:, 0, :]  # [bsz, hidden_dim]

                scl_loss = self.scl_loss_fct(cls_features, labels)

                # Combined loss
                loss = ce_loss + self.alpha * scl_loss
            else:
                loss = ce_loss

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states if self.training else None,
            attentions=outputs.attentions,
        )

In [8]:
from peft import get_peft_model, LoraConfig, TaskType

model_name = '/kaggle/input/bert-base-uncased'
NUM_CLASSES = 2


config = AutoConfig.from_pretrained(model_name)
config.num_labels = NUM_CLASSES

model = BertWithSCL(config, alpha=0.2)

pretrained_bert = AutoModel.from_pretrained(model_name)
model.bert = pretrained_bert

tokenizer = AutoTokenizer.from_pretrained(model_name)


peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    inference_mode=False,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["query", "key", "value", "dense"],  # BERT 的注意力层
)

model = get_peft_model(model, peft_config)
model.print_trainable_parameters()



trainable params: 1,340,930 || all params: 110,824,708 || trainable%: 1.2100


In [9]:
def preprocess_function(examples):
    return tokenizer(examples['text'], truncation=True)

train_dataset = train_dataset.map(preprocess_function, batched=True)
val_dataset = val_dataset.map(preprocess_function, batched=True)
test_dataset = test_dataset.map(preprocess_function, batched=True)

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

Map:   0%|          | 0/20000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Downloading builder script: 0.00B [00:00, ?B/s]

In [10]:
os.makedirs('/kaggle/working/checkpoint', exist_ok=True)
os.makedirs('/kaggle/working/logs', exist_ok=True)
training_args = TrainingArguments(
    output_dir='/kaggle/working/checkpoint',  # output directory
    num_train_epochs=3,  # total number of training epochs
    per_device_train_batch_size=2,  # batch size per device during training
    per_device_eval_batch_size=4,  # batch size for evaluation
    warmup_steps=500,  # number of warmup steps for learning rate scheduler
    weight_decay=0.01,  # strength of weight decay
    logging_dir='/kaggle/working/logs',  # directory for storing logs
    logging_steps=100,
    save_strategy="no",
    report_to="none",
    eval_strategy="epoch"
)
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    processing_class=tokenizer,
    compute_metrics=compute_metrics,
)

trainer.train()

No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2081,0.299691,0.9214
2,0.1861,0.309314,0.925
3,0.2487,0.329688,0.9242




TrainOutput(global_step=15000, training_loss=0.2917224810918172, metrics={'train_runtime': 4463.3002, 'train_samples_per_second': 13.443, 'train_steps_per_second': 3.361, 'total_flos': 1.348184845109184e+16, 'train_loss': 0.2917224810918172, 'epoch': 3.0})

In [11]:
prediction_outputs = trainer.predict(test_dataset)
test_pred = np.argmax(prediction_outputs[0], axis=-1).flatten()
print(test_pred)

[1 0 1 ... 0 1 1]


In [12]:
result_output = pd.DataFrame(data={"id": test["id"], "sentiment": test_pred})
result_output.to_csv("/kaggle/working/bert_scl_lora.csv", index=False, quoting=3)
logging.info('result saved!')