# Starter Notebook

Install and import required libraries

In [1]:
# !pip install transformers datasets evaluate accelerate peft trl bitsandbytes
# !pip install nvidia-ml-py3

In [2]:
import os
import pandas as pd
import torch
import random
import re
import nltk
import pickle
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.char as nac

from transformers import (
    RobertaModel,
    RobertaTokenizer, 
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    RobertaForSequenceClassification,
    EarlyStoppingCallback
)
from peft import LoraConfig, get_peft_model, PeftModel
from datasets import load_dataset, Dataset, ClassLabel

# Download required NLTK data
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package wordnet to
[nltk_data]     /home/viewsetting/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/viewsetting/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [3]:
# 设置设备
device = torch.device('cuda:1' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"使用设备: {device}")

使用设备: cuda:1


In [4]:
import wandb
wandb.login(key="2008ab8d896bfc68619ace7f820e0513468b9783", relogin=True)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/viewsetting/.netrc


True

In [5]:
# Get current wandb entity
current_entity = wandb.api.default_entity
print(f"Current wandb entity: {current_entity}")


Current wandb entity: jl10897-new-york-university


## Load Tokenizer and Preprocess Data

In [6]:
# base_model = 'roberta-base'
# # base_model = 'roberta-large'

# dataset = load_dataset('ag_news', split='train')
# tokenizer = RobertaTokenizer.from_pretrained(base_model)

# def clean_text(text):
#     text = re.sub(r"<.*?>", "", text)                 # 去除 HTML 标签
#     text = re.sub(r"http\S+|www\S+", "", text)        # 移除 URL
#     text = re.sub(r"[^A-Za-z0-9.,!?;:'\"()\[\]\s]", "", text)  # 去除特殊字符
#     text = re.sub(r"\s+", " ", text).strip()          # 去除多余空格
#     return text.lower()                               # 可选：统一小写
    
# def preprocess(examples):
#     cleaned_texts = [clean_text(t) for t in examples['text']]
#     return tokenizer(cleaned_texts, truncation=True, padding=True, max_length=256)

# tokenized_dataset = dataset.map(preprocess, batched=True,  remove_columns=["text"])
# tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

In [7]:
nltk.download('averaged_perceptron_tagger_eng', download_dir='/home/viewsetting/nltk_data')



[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /home/viewsetting/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [8]:
nltk.data.path.append('/home/viewsetting/nltk_data')

In [9]:
base_model = 'roberta-base'
# base_model = 'roberta-large'

dataset = load_dataset('ag_news', split='train')
tokenizer = RobertaTokenizer.from_pretrained(base_model)

# Initialize augmenters
synonym_aug = naw.SynonymAug(aug_src='wordnet', aug_p=0.3)
delete_aug = naw.RandomWordAug(action='delete', aug_p=0.1)
swap_aug = naw.RandomWordAug(action='swap', aug_p=0.1)
typo_aug = nac.RandomCharAug(action='swap', aug_char_p=0.05, aug_word_p=0.1)

def clean_text(text):
    text = re.sub(r"<.*?>", "", text)                 # 去除 HTML 标签
    text = re.sub(r"http\S+|www\S+", "", text)        # 移除 URL
    text = re.sub(r"[^A-Za-z0-9.,!?;:'\"()\[\]\s]", "", text)  # 去除特殊字符
    text = re.sub(r"\s+", " ", text).strip()          # 去除多余空格
    return text.lower()                               # 统一小写

def simple_paraphrase(text):
    # Simple rule-based paraphrasing
    replacements = {
        'said': 'stated',
        'big': 'large',
        'small': 'tiny',
        'good': 'excellent',
        'bad': 'poor',
        'buy': 'purchase',
        'sell': 'trade',
        'make': 'create',
        'show': 'display',
        'start': 'begin'
    }
    words = text.split()
    for i, word in enumerate(words):
        if word.lower() in replacements and random.random() < 0.3:
            words[i] = replacements[word.lower()]
    return ' '.join(words)

def preprocess_with_augmentation(examples, augmentations=None):
    if augmentations is None:
        augmentations = {
            'synonym': False,
            'delete': False,
            'swap': False,
            'paraphrase': False,
            'noise': False
        }

    # Clean texts first
    cleaned_texts = [clean_text(t) for t in examples['text']]
    augmented_texts = []

    for text in cleaned_texts:
        aug_text = text
        if augmentations.get('synonym', False):
            aug_text = synonym_aug.augment(aug_text)[0]
        if augmentations.get('delete', False):
            aug_text = delete_aug.augment(aug_text)[0]
        if augmentations.get('swap', False):
            aug_text = swap_aug.augment(aug_text)[0]
        if augmentations.get('paraphrase', False):
            aug_text = simple_paraphrase(aug_text)
        if augmentations.get('noise', False):
            aug_text = typo_aug.augment(aug_text)[0]
        augmented_texts.append(aug_text)

    # Tokenize with same parameters as original
    return tokenizer(augmented_texts, truncation=True, padding=True, max_length=256)

# Example augmentation configuration
augmentation_config = {
    'synonym': True,
    'delete': True,
    'swap': True,
    'paraphrase': True,
    'noise': True
}

tokenized_dataset = dataset.map(
    lambda examples: preprocess_with_augmentation(examples, augmentation_config),
    batched=True,
    remove_columns=["text"]
)
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")



Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

In [10]:
# Extract the number of classess and their names
num_labels = dataset.features['label'].num_classes
class_names = dataset.features["label"].names
print(f"number of labels: {num_labels}")
print(f"the labels: {class_names}")

# Create an id2label mapping
# We will need this for our classifier.
id2label = {i: label for i, label in enumerate(class_names)}

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")


number of labels: 4
the labels: ['World', 'Sports', 'Business', 'Sci/Tech']


## Load Pre-trained Model
Set up config for pretrained model and download it from hugging face

In [11]:
model = RobertaForSequenceClassification.from_pretrained(
    base_model,
    id2label=id2label)
model

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

## Anything from here on can be modified

In [12]:
# Split the original training set
split_datasets = tokenized_dataset.train_test_split(test_size=640, seed=42)
train_dataset = split_datasets['train']
eval_dataset = split_datasets['test']

## Setup LoRA Config
Setup PEFT config and get peft model for finetuning

In [13]:
# PEFT Config
# peft_config = LoraConfig(
#     r=2,
#     lora_alpha=4,
#     lora_dropout=0.05,
#     bias = 'none',
#     target_modules = ['query'],
#     task_type="SEQ_CLS",
# )
peft_config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    target_modules=["query", "value"],
    task_type="SEQ_CLS"
)

In [14]:
# !module load gcc
# !which gcc


In [15]:
# import os
# os.environ["CC"] = "/share/apps/NYUAD5/gcc/9.2.0/bin/gcc"


In [16]:
peft_model = get_peft_model(model, peft_config)
peft_model

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): RobertaForSequenceClassification(
      (roberta): RobertaModel(
        (embeddings): RobertaEmbeddings(
          (word_embeddings): Embedding(50265, 768, padding_idx=1)
          (position_embeddings): Embedding(514, 768, padding_idx=1)
          (token_type_embeddings): Embedding(1, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): RobertaEncoder(
          (layer): ModuleList(
            (0-11): 12 x RobertaLayer(
              (attention): RobertaAttention(
                (self): RobertaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): Module

In [17]:
# print("Trainable parameters:")
# for name, param in peft_model.named_parameters():
#     if param.requires_grad:
#         print(name)

In [18]:
print('PEFT Model')
peft_model.print_trainable_parameters()

PEFT Model
trainable params: 741,124 || all params: 125,389,832 || trainable%: 0.5911


## Training Setup

In [19]:
# To track evaluation accuracy during training
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    # Calculate accuracy
    accuracy = accuracy_score(labels, preds)
    return {
        'accuracy': accuracy
    }

In [23]:
import wandb
import seaborn as sns
import matplotlib.pyplot as plt
from typing import Dict
from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score, 
    confusion_matrix,
    classification_report
)

def compute_metrics(pred):
    """
    Compute and log comprehensive evaluation metrics
    """
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    probs = torch.nn.functional.softmax(torch.tensor(pred.predictions), dim=-1)
    
    # Basic metrics
    accuracy = accuracy_score(labels, preds)
    precision_macro = precision_score(labels, preds, average='macro')
    precision_micro = precision_score(labels, preds, average='micro')
    precision_per_class = precision_score(labels, preds, average=None)
    
    recall_macro = recall_score(labels, preds, average='macro')
    recall_micro = recall_score(labels, preds, average='micro')
    recall_per_class = recall_score(labels, preds, average=None)
    
    f1_macro = f1_score(labels, preds, average='macro')
    f1_micro = f1_score(labels, preds, average='micro')
    f1_per_class = f1_score(labels, preds, average=None)
    
    # Per-class metrics
    metrics_per_class = {}
    for i, class_name in enumerate(id2label.values()):
        metrics_per_class.update({
            f'precision_class_{class_name}': precision_per_class[i],
            f'recall_class_{class_name}': recall_per_class[i],
            f'f1_class_{class_name}': f1_per_class[i]
        })
    
    # Confusion matrix - only log to wandb
    cm = confusion_matrix(labels, preds)
    plt.figure(figsize=(10,8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=list(id2label.values()),
                yticklabels=list(id2label.values()))
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    
    # Log confusion matrix to wandb
    wandb.log({'eval/confusion_matrix': wandb.Image(plt)})
    plt.close()
    
    # Confidence histograms - only log to wandb
    for i in range(len(id2label)):
        plt.figure(figsize=(8,6))
        class_probs = probs[:, i].numpy()
        plt.hist(class_probs, bins=50)
        plt.title(f'Confidence Distribution - Class {id2label[i]}')
        plt.xlabel('Confidence')
        plt.ylabel('Count')
        wandb.log({f'eval/confidence_dist_class_{id2label[i]}': wandb.Image(plt)})
        plt.close()

    # Log metrics to wandb
    wandb.log({
        'eval/accuracy': accuracy,
        'eval/precision_macro': precision_macro,
        'eval/precision_micro': precision_micro,
        'eval/recall_macro': recall_macro,
        'eval/recall_micro': recall_micro,
        'eval/f1_macro': f1_macro,
        'eval/f1_micro': f1_micro,
    })
    
    # Return only JSON-serializable metrics
    return {
        'accuracy': accuracy,
        'precision_macro': precision_macro,
        'precision_micro': precision_micro,
        'recall_macro': recall_macro,
        'recall_micro': recall_micro,
        'f1_macro': f1_macro,
        'f1_micro': f1_micro,
        **metrics_per_class
    }

class CustomTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.gradient_norm = 0.0

    def training_step(self, model, inputs):
        """Override training step to compute training metrics"""
        loss = super().training_step(model, inputs)
        
        # Compute training accuracy
        with torch.no_grad():
            outputs = model(**inputs)
            predictions = outputs.logits.argmax(-1)
            accuracy = (predictions == inputs['labels']).float().mean()
            
            # Log training metrics
            if self.state.global_step % self.args.logging_steps == 0:
                wandb.log({
                    'train/loss': loss.item(),
                    'train/accuracy': accuracy.item(),
                    'train/step': self.state.global_step
                })
        
        # Compute gradient norm
        if self.args.gradient_checkpointing:
            self.gradient_norm = torch.norm(
                torch.stack([
                    torch.norm(p.grad.detach())
                    for p in model.parameters()
                    if p.grad is not None
                ])
            ).item()
        
        return loss

    def log(self, logs: Dict[str, float]) -> None:
        """
        Enhanced logging with additional training metrics
        """
        if self.state.global_step % self.args.logging_steps == 0:
            # Log learning rates
            if hasattr(self.optimizer, "param_groups"):
                current_lr = self.optimizer.param_groups[0]['lr']
                wandb.log({
                    'train/learning_rate': current_lr,
                    'train/step': self.state.global_step
                })
            
            # Log gradient norm
            logs["train/gradient_norm"] = self.gradient_norm
            
            # Log batch size
            logs["train/batch_size"] = self.args.per_device_train_batch_size
            
            # Log memory usage if using GPU
            if torch.cuda.is_available():
                logs["system/gpu_memory_allocated"] = torch.cuda.memory_allocated() / 1024**2  # MB
                logs["system/gpu_memory_cached"] = torch.cuda.memory_reserved() / 1024**2  # MB
            
            # Log parameter statistics
            for name, param in self.model.named_parameters():
                if param.requires_grad:
                    logs[f"parameters/mean/{name}"] = param.data.mean().item()
                    logs[f"parameters/std/{name}"] = param.data.std().item()
                    if param.grad is not None:
                        logs[f"gradients/mean/{name}"] = param.grad.data.mean().item()
                        logs[f"gradients/std/{name}"] = param.grad.data.std().item()
            
            # Log all metrics to wandb
            wandb.log(logs)
        
        super().log(logs)

# Setup Training args
output_dir = "results"

training_args = TrainingArguments(
    output_dir=output_dir,
    report_to='wandb',
    eval_strategy='steps',
    logging_steps=100,
    eval_steps=200,
    save_steps=400,
    save_total_limit=2,

    learning_rate=1e-4,  # LoRA
    warmup_ratio=0.1,
    num_train_epochs=2,
    
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,

    optim="adamw_torch",  # BETTER THAN SGD
    weight_decay=0.01,

    gradient_checkpointing=False,
    dataloader_num_workers=2,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    run_name="lora_finetuning_run_3",
    
    logging_first_step=True, 
    logging_nan_inf_filter=False, 
    logging_strategy="steps",  
    label_names=["labels"]
)

def get_trainer(model):
    """
    Create a trainer instance with enhanced logging
    """
    # Initialize wandb with detailed config
    wandb.init(
        project="ag_news_classification",
        name=f"roberta_lora_{wandb.util.generate_id()}",
        config={
            "model_name": base_model,
            "lora_config": {
                "r": peft_config.r,
                "alpha": peft_config.lora_alpha,
                "dropout": peft_config.lora_dropout,
                "target_modules": peft_config.target_modules,
            },
            "training_config": {
                "learning_rate": training_args.learning_rate,
                "batch_size": training_args.per_device_train_batch_size,
                "epochs": training_args.num_train_epochs,
                "warmup_ratio": training_args.warmup_ratio,
                "weight_decay": training_args.weight_decay,
            },
            "augmentation_config": augmentation_config,
            "dataset": "ag_news",
            "train_size": len(train_dataset),
            "eval_size": len(eval_dataset),
        }
    )
    
    model.config.label2id = {label: i for i, label in enumerate(class_names)}
    model.config.id2label = {i: label for i, label in enumerate(class_names)}
    
    return CustomTrainer(
        model=model,
        args=training_args,
        compute_metrics=compute_metrics,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        data_collator=data_collator,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
    )

### Start Training

In [24]:
!export CUDA_VISIBLE_DEVICES=3


In [25]:
peft_lora_finetuning_trainer = get_trainer(peft_model)

result = peft_lora_finetuning_trainer.train()

VBox(children=(Label(value='0.269 MB of 0.269 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁▁██
eval/f1_class_Business,▁█
eval/f1_class_Sci/Tech,▁█
eval/f1_class_Sports,▁█
eval/f1_class_World,▁█
eval/f1_macro,▁▁██
eval/f1_micro,▁▁██
eval/loss,█▁
eval/precision_class_Business,▁█
eval/precision_class_Sci/Tech,▁█

0,1
eval/accuracy,0.86562
eval/f1_class_Business,0.83612
eval/f1_class_Sci/Tech,0.85417
eval/f1_class_Sports,0.93333
eval/f1_class_World,0.83688
eval/f1_macro,0.86512
eval/f1_micro,0.86562
eval/loss,0.40115
eval/precision_class_Business,0.85616
eval/precision_class_Sci/Tech,0.82


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113322799145762, max=1.0…

    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.


Step,Training Loss,Validation Loss,Accuracy,Precision Macro,Precision Micro,Recall Macro,Recall Micro,F1 Macro,F1 Micro,Precision Class World,Recall Class World,F1 Class World,Precision Class Sports,Recall Class Sports,F1 Class Sports,Precision Class Business,Recall Class Business,F1 Class Business,Precision Class Sci/tech,Recall Class Sci/tech,F1 Class Sci/tech,Norm,Size,Memory Allocated,Memory Cached,Model.model.roberta.encoder.layer.0.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.0.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.0.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.0.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.1.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.1.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.1.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.1.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.2.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.2.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.2.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.2.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.3.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.3.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.3.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.3.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.4.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.4.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.4.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.4.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.5.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.5.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.5.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.5.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.6.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.6.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.6.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.6.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.7.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.7.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.7.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.7.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.8.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.8.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.8.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.8.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.9.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.9.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.9.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.9.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.10.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.10.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.10.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.10.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.11.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.11.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.11.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.11.attention.self.value.lora B.default.weight,Model.model.classifier.modules To Save.default.dense.weight,Model.model.classifier.modules To Save.default.dense.bias,Model.model.classifier.modules To Save.default.out Proj.weight,Model.model.classifier.modules To Save.default.out Proj.bias
200,0.3803,0.389086,0.867188,0.868075,0.867188,0.866257,0.867188,0.866989,0.867188,0.857143,0.827586,0.842105,0.919255,0.936709,0.9279,0.85906,0.836601,0.847682,0.836842,0.86413,0.850267,0.0,32,509.042969,7180.0,0.022732,0.00381,0.022968,0.004046,0.022732,0.00423,0.022411,0.003581,0.023229,0.004236,0.021484,0.003044,0.021844,0.003486,0.021598,0.002978,0.021525,0.003617,0.021303,0.003295,0.022309,0.00441,0.021971,0.004136,0.025216,0.005818,0.021339,0.004492,0.02227,0.005032,0.02227,0.00479,0.022005,0.005459,0.021868,0.004601,0.022469,0.005004,0.022002,0.004942,0.022624,0.004984,0.022246,0.005184,0.022406,0.005274,0.022267,0.00516,0.020315,0.000737,0.021368,0.000714
400,0.3592,0.384672,0.870313,0.874226,0.870313,0.867611,0.870313,0.870042,0.870313,0.856115,0.82069,0.838028,0.941935,0.924051,0.932907,0.881119,0.823529,0.851351,0.817734,0.902174,0.857881,0.0,32,509.072266,7180.0,0.023772,0.004824,0.023088,0.004319,0.023585,0.005013,0.022591,0.003891,0.024088,0.005031,0.021761,0.003497,0.02272,0.004486,0.021773,0.003322,0.022068,0.004463,0.021439,0.003566,0.022901,0.005062,0.022052,0.004319,0.026182,0.006288,0.021392,0.004635,0.022463,0.005488,0.022254,0.004935,0.022372,0.005949,0.0217,0.004689,0.023673,0.005798,0.022132,0.005157,0.023914,0.005776,0.022337,0.00547,0.022665,0.005876,0.022375,0.005451,0.020384,0.000971,0.021627,0.000896
600,0.3681,0.378458,0.875,0.877177,0.875,0.872655,0.875,0.874461,0.875,0.857143,0.827586,0.842105,0.941558,0.917722,0.929487,0.871622,0.843137,0.857143,0.838384,0.902174,0.86911,0.0,32,509.072266,7180.0,0.024646,0.00561,0.023253,0.004576,0.024156,0.005519,0.022778,0.004115,0.024493,0.005412,0.021851,0.003697,0.02299,0.004868,0.021837,0.003438,0.022348,0.004919,0.021581,0.003748,0.023373,0.005468,0.022065,0.004349,0.026289,0.006473,0.021517,0.004815,0.022624,0.005788,0.022325,0.00511,0.022671,0.006257,0.021691,0.004861,0.024333,0.006294,0.022171,0.005292,0.025608,0.006625,0.022472,0.005753,0.023375,0.006608,0.022544,0.005764,0.020421,0.001072,0.021609,0.000927
800,0.3468,0.343803,0.875,0.876354,0.875,0.874195,0.875,0.874662,0.875,0.893939,0.813793,0.851986,0.908537,0.943038,0.925466,0.848101,0.875817,0.861736,0.854839,0.86413,0.859459,0.0,32,509.072266,7180.0,0.025332,0.00614,0.02351,0.004806,0.024375,0.005835,0.022927,0.004253,0.024973,0.005812,0.02209,0.003899,0.023452,0.005326,0.021994,0.003636,0.02278,0.005377,0.021678,0.00387,0.023643,0.005768,0.022125,0.004437,0.026932,0.006709,0.021585,0.004913,0.022668,0.005981,0.022293,0.005173,0.022842,0.006502,0.021653,0.004928,0.025691,0.007013,0.022174,0.005383,0.029223,0.007857,0.022418,0.005909,0.023863,0.007138,0.022512,0.005788,0.020465,0.001205,0.0218,0.001073
1000,0.3399,0.337962,0.876563,0.877765,0.876563,0.876198,0.876563,0.876824,0.876563,0.837838,0.855172,0.846416,0.942308,0.93038,0.936306,0.884354,0.849673,0.866667,0.846561,0.869565,0.857909,0.0,32,509.072266,7180.0,0.025729,0.006479,0.02359,0.004906,0.024598,0.005996,0.023078,0.00437,0.025074,0.006015,0.022169,0.003997,0.023987,0.005729,0.022104,0.003778,0.022918,0.005553,0.021744,0.003953,0.023862,0.005956,0.0221,0.004446,0.027078,0.006846,0.021684,0.005025,0.022825,0.006182,0.022374,0.005288,0.022949,0.00669,0.021675,0.005007,0.026447,0.007369,0.022211,0.005456,0.029919,0.008246,0.022604,0.006096,0.024491,0.00764,0.022625,0.005962,0.020498,0.001301,0.021911,0.001195
1200,0.3333,0.332009,0.879687,0.882326,0.879687,0.878838,0.879687,0.880294,0.879687,0.877698,0.841379,0.859155,0.947712,0.917722,0.932476,0.864516,0.875817,0.87013,0.839378,0.880435,0.859416,0.0,32,509.072266,7180.0,0.026164,0.006794,0.023756,0.005034,0.024879,0.006191,0.023238,0.004472,0.025434,0.006274,0.022355,0.00414,0.024228,0.005943,0.022187,0.003888,0.02319,0.005776,0.021817,0.004091,0.024157,0.006152,0.022147,0.004489,0.027315,0.006954,0.021777,0.005083,0.022973,0.006315,0.022402,0.005357,0.023257,0.006914,0.021633,0.005042,0.027255,0.007643,0.022243,0.005501,0.030547,0.008493,0.022575,0.006167,0.024929,0.007948,0.022702,0.006083,0.020522,0.001366,0.022021,0.001254
1400,0.3255,0.326597,0.884375,0.885964,0.884375,0.88469,0.884375,0.885244,0.884375,0.863946,0.875862,0.869863,0.948052,0.924051,0.935897,0.880795,0.869281,0.875,0.851064,0.869565,0.860215,0.0,32,509.072266,7180.0,0.026432,0.006978,0.023828,0.005086,0.024905,0.006238,0.023232,0.004486,0.02543,0.006322,0.02237,0.004155,0.024279,0.006009,0.022221,0.003931,0.023284,0.005872,0.02187,0.004145,0.024354,0.006301,0.022147,0.004494,0.027371,0.007004,0.021805,0.005108,0.023058,0.006452,0.022436,0.005407,0.023296,0.007003,0.02168,0.005106,0.027652,0.007822,0.022291,0.005543,0.030753,0.008615,0.022718,0.00627,0.025136,0.008101,0.022749,0.006163,0.020541,0.001402,0.022136,0.001274
1600,0.3183,0.321465,0.884375,0.886761,0.884375,0.884144,0.884375,0.885291,0.884375,0.873239,0.855172,0.864111,0.954248,0.924051,0.938907,0.876623,0.882353,0.879479,0.842932,0.875,0.858667,0.0,32,509.072266,7180.0,0.026632,0.007106,0.0239,0.005128,0.024995,0.006301,0.023279,0.004513,0.025456,0.006357,0.022422,0.004177,0.024337,0.006051,0.022287,0.003986,0.023356,0.005942,0.02191,0.004187,0.024467,0.00636,0.02218,0.004529,0.027564,0.007076,0.021852,0.005166,0.023104,0.006479,0.022445,0.005413,0.023331,0.007027,0.021671,0.005105,0.027976,0.007941,0.022321,0.005568,0.030956,0.008667,0.022746,0.006314,0.025211,0.00814,0.022737,0.006149,0.020549,0.001458,0.022164,0.001365
1800,0.3267,0.316439,0.889062,0.892092,0.889062,0.888534,0.889062,0.890078,0.889062,0.880282,0.862069,0.87108,0.954545,0.93038,0.942308,0.893333,0.875817,0.884488,0.840206,0.88587,0.862434,0.0,32,509.072266,7180.0,0.026752,0.007177,0.023941,0.005151,0.025055,0.006336,0.023311,0.004528,0.025518,0.006405,0.02245,0.004189,0.024393,0.006087,0.0223,0.003995,0.023399,0.005973,0.021927,0.004216,0.024487,0.006379,0.022197,0.004543,0.02755,0.007086,0.021869,0.005188,0.023121,0.006509,0.022465,0.005427,0.023339,0.007038,0.021686,0.005112,0.028066,0.007975,0.022328,0.005574,0.030983,0.008687,0.022784,0.006333,0.025306,0.008176,0.022754,0.006171,0.02055,0.001456,0.022165,0.001355


    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.
    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.
    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.
    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argumen

In [26]:
# 保存模型
peft_model_path = os.path.join(output_dir, "peft_model")
peft_model.save_pretrained(peft_model_path)
# 保存tokenizer
tokenizer.save_pretrained(peft_model_path)
print(f"Model saved to {peft_model_path}")


Model saved to results/peft_model


## Evaluate Finetuned Model


### Performing Inference on Custom Input
Uncomment following functions for running inference on custom inputs

In [27]:
# def classify(model, tokenizer, text):
#     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#     inputs = tokenizer(text, truncation=True, padding=True, return_tensors="pt").to(device)
#     output = model(**inputs)

#     prediction = output.logits.argmax(dim=-1).item()

#     print(f'\n Class: {prediction}, Label: {id2label[prediction]}, Text: {text}')
#     return id2label[prediction]

In [28]:
# classify( peft_model, tokenizer, "Kederis proclaims innocence Olympic champion Kostas Kederis today left hospital ahead of his date with IOC inquisitors claiming his ...")
# classify( peft_model, tokenizer, "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - Short-sellers, Wall Street's dwindling\band of ultra-cynics, are seeing green again.")

### Run Inference on eval_dataset

In [29]:
from torch.utils.data import DataLoader
import evaluate
from tqdm import tqdm

def evaluate_model(inference_model, dataset, labelled=True, batch_size=32, data_collator=None):
    """
    Evaluate a PEFT model on a dataset.

    Args:
        inference_model: The model to evaluate.
        dataset: The dataset (Hugging Face Dataset) to run inference on.
        labelled (bool): If True, the dataset includes labels and metrics will be computed.
                         If False, only predictions will be returned.
        batch_size (int): Batch size for inference.
        data_collator: Function to collate batches. If None, the default collate_fn is used.

    Returns:
        If labelled is True, returns a tuple (metrics, predictions)
        If labelled is False, returns the predictions.
    """
    # Create the DataLoader
    eval_dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=data_collator)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    inference_model.to(device)
    inference_model.eval()

    all_predictions = []
    if labelled:
        metric = evaluate.load('accuracy')

    # Loop over the DataLoader
    for batch in tqdm(eval_dataloader):
        # Move each tensor in the batch to the device
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = inference_model(**batch)
        predictions = outputs.logits.argmax(dim=-1)
        all_predictions.append(predictions.cpu())

        if labelled:
            # Expecting that labels are provided under the "labels" key.
            references = batch["labels"]
            metric.add_batch(
                predictions=predictions.cpu().numpy(),
                references=references.cpu().numpy()
            )

    # Concatenate predictions from all batches
    all_predictions = torch.cat(all_predictions, dim=0)

    if labelled:
        eval_metric = metric.compute()
        print("Evaluation Metric:", eval_metric)
        return eval_metric, all_predictions
    else:
        return all_predictions

In [30]:
# Check evaluation accuracy
_, _ = evaluate_model(peft_model, eval_dataset, True, 32, data_collator)

100%|██████████| 20/20 [00:04<00:00,  4.19it/s]

Evaluation Metric: {'accuracy': 0.88125}





### Run Inference on unlabelled dataset

In [31]:
#Load your unlabelled data
unlabelled_dataset = pd.read_pickle("/home/viewsetting/ssd_2T/test_unlabelled.pkl")
test_dataset = unlabelled_dataset.map(preprocess_with_augmentation, batched=True, remove_columns=["text"])
unlabelled_dataset

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Dataset({
    features: ['text'],
    num_rows: 8000
})

In [32]:
# Run inference and save predictions
preds = evaluate_model(peft_model, test_dataset, False, 32, data_collator)
df_output = pd.DataFrame({
    'ID': range(len(preds)),
    'Label': preds.numpy()  # or preds.tolist()
})
df_output.to_csv(os.path.join(output_dir,"inference_output.csv"), index=False)
print("Inference complete. Predictions saved to inference_output.csv")

100%|██████████| 250/250 [00:31<00:00,  7.94it/s]

Inference complete. Predictions saved to inference_output.csv



