In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "3"

import numpy as np
import torch

from transformers import AutoTokenizer, AutoModelForQuestionAnswering, TrainingArguments, Trainer
from torch.utils.data import Dataset
from peft import LoraConfig, get_peft_model, PeftConfig, PeftModel, prepare_model_for_kbit_training, AdaLoraConfig

import logging

from datasets import load_dataset

raw_datasets  = load_dataset("glue", 'rte')

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import AutoTokenizer, AutoModelForMaskedLM, AutoConfig
#from roberta import RobertaForSequenceClassification


model_name = "microsoft/deberta-v3-base"

#config.num_labels=2
tokenizer = AutoTokenizer.from_pretrained(model_name)



In [3]:
from transformers import AutoTokenizer, DataCollatorWithPadding


tokenizer.padding_side = 'left'
mask_token = tokenizer.mask_token



# col_to_delete = ['idx']
col_to_delete = ['sentence1','sentence2']

def preprocessing_function(examples):
    prompts = [
        f"Premise: {premise} Hypothesis: {hypothesis} "
        f"Does the premise imply the hypothesis? Answer:{mask_token}"
        for premise, hypothesis in zip(examples["sentence1"], examples["sentence2"])
    ]
    return tokenizer(prompts, padding = False, truncation=True, max_length=512)

tokenized_datasets = raw_datasets.map(preprocessing_function, batched=True, remove_columns=col_to_delete)
# llama_tokenized_datasets = llama_tokenized_datasets.rename_column("target", "label")
tokenized_datasets.set_format("torch")

# Data collator for padding a batch of examples to the maximum length seen in the batch
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

Map: 100%|██████████| 277/277 [00:00<00:00, 10967.73 examples/s]


In [4]:
tokenizer.decode(tokenized_datasets['validation']['input_ids'][10])

'[CLS] Premise: The international humanitarian aid organization, Doctors Without Borders/Medecins Sans Frontieres (MSF), continues to treat victims of violence in all locations where it is present in Darfur. Hypothesis: Doctors Without Borders is an international aid organization. Does the premise imply the hypothesis? Answer:[MASK][SEP]'

In [5]:
tokenizer.mask_token_id

128000

In [6]:
import torch
import torch.nn as nn
from transformers import RobertaForSequenceClassification
from transformers.activations import ACT2FN
import random
from modeling import MLMSequenceClassification

config = AutoConfig.from_pretrained(model_name, num_labels=2)
config.mask_token_id = 50264
model = MLMSequenceClassification.from_pretrained(model_name, config=config, mask_token_id=tokenizer.mask_token_id)


Some weights of DebertaV2ForMaskedLM were not initialized from the model checkpoint at microsoft/deberta-v3-base and are newly initialized: ['cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [7]:
import RoCoFT

RoCoFT.PEFT(model, method='column', rank=3) 
#targets=['key', 'value', 'dense', 'query'])

In [8]:
model

MLMSequenceClassification(
  (transformer): DebertaV2ForMaskedLM(
    (deberta): DebertaV2Model(
      (embeddings): DebertaV2Embeddings(
        (word_embeddings): Embedding(128100, 768, padding_idx=0)
        (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): DebertaV2Encoder(
        (layer): ModuleList(
          (0-11): 12 x DebertaV2Layer(
            (attention): DebertaV2Attention(
              (self): DisentangledSelfAttention(
                (query_proj): column()
                (key_proj): column()
                (value_proj): column()
                (pos_dropout): Dropout(p=0.1, inplace=False)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): DebertaV2SelfOutput(
                (dense): column()
                (LayerNorm): LayerNorm((768,), eps=1e-07, elementwise_affine=True)
                (dropout): Dropout(p=0.1, inplace=False)


In [9]:
import evaluate
import numpy as np
from sklearn import metrics
import torch
import numpy as np

def compute_metrics(eval_pred):


    logits, labels = eval_pred # eval_pred is the tuple of predictions and labels returned by the model
    predictions = np.argmax(logits, axis=-1)
    
    precision = metrics.precision_score(labels, predictions, average="macro")
    recall = metrics.recall_score(labels, predictions, average="macro")
    f1 = metrics.f1_score(labels, predictions, average="macro")
    accuracy = metrics.accuracy_score(labels, predictions)
    
    return {"precision": precision, "recall": recall, "f1-score": f1, 'accuracy': accuracy}

In [10]:
from transformers import TrainingArguments, Trainer

import time
from transformers import Trainer, TrainingArguments
training_args = TrainingArguments(
    output_dir='dir',
    learning_rate=2e-3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=20,
    weight_decay=0.20,
    evaluation_strategy="steps",
    save_strategy="steps",
    save_total_limit=2,
    save_steps=10000000,
    logging_steps=100,
   
    load_best_model_at_end=True,
    lr_scheduler_type="cosine",  # You can choose from 'linear', 'cosine', 'cosine_with_restarts', 'polynomial', etc.
    warmup_steps=100,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],

    data_collator=data_collator,
    compute_metrics=compute_metrics
)



[2025-05-05 18:03:14,119] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)


/home/guangyu/anaconda3/envs/MD/compiler_compat/ld: cannot find -laio: No such file or directory
collect2: error: ld returned 1 exit status
/home/guangyu/anaconda3/envs/MD/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::runtime_error::~runtime_error()@GLIBCXX_3.4'
/home/guangyu/anaconda3/envs/MD/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `__gxx_personality_v0@CXXABI_1.3'
/home/guangyu/anaconda3/envs/MD/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::ostream::tellp()@GLIBCXX_3.4'
/home/guangyu/anaconda3/envs/MD/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::chrono::_V2::steady_clock::now()@GLIBCXX_3.4.19'
/home/guangyu/anaconda3/envs/MD/compiler_compat/ld: /usr/local/cuda/lib64/libcufile.so: undefined reference to `std::string::_M_replace_aux(unsigned long, unsigned long, unsigned long, char)@GLIBCXX_3.4'
/home/guangyu/anaconda3/envs/MD/comp

In [11]:
trainer.train()

Got mask position:  tensor(-2, device='cuda:0')


Step,Training Loss,Validation Loss,Precision,Recall,F1-score,Accuracy
100,0.7122,0.697416,0.486162,0.498824,0.338145,0.472924
200,0.6789,0.661114,0.789683,0.59542,0.527091,0.617329
300,0.5464,0.452411,0.789335,0.783828,0.784805,0.787004
400,0.4111,0.489615,0.823436,0.824349,0.823022,0.823105
500,0.3887,0.420942,0.819895,0.817787,0.818451,0.819495
600,0.3033,0.597287,0.808276,0.809082,0.808415,0.808664
700,0.2557,0.501176,0.824235,0.815827,0.817205,0.819495
800,0.1811,0.72901,0.818071,0.808193,0.809572,0.812274
900,0.185,0.710939,0.829495,0.793213,0.7936,0.801444
1000,0.1621,0.610959,0.844995,0.833342,0.83506,0.837545


TrainOutput(global_step=3120, training_loss=0.15589709866505402, metrics={'train_runtime': 1049.9128, 'train_samples_per_second': 47.433, 'train_steps_per_second': 2.972, 'total_flos': 51959605101600.0, 'train_loss': 0.15589709866505402, 'epoch': 20.0})