# Starter Notebook

Install and import required libraries

In [1]:
# !pip install transformers datasets evaluate accelerate peft trl bitsandbytes
# !pip install nvidia-ml-py3

In [2]:
import os
import pandas as pd
import torch
import random
import re
import nltk
import pickle
import nlpaug.augmenter.word as naw
import nlpaug.augmenter.char as nac

from transformers import (
    RobertaModel,
    RobertaTokenizer, 
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
    RobertaForSequenceClassification,
    EarlyStoppingCallback
)
from peft import LoraConfig, get_peft_model, PeftModel
from datasets import load_dataset, Dataset, ClassLabel

# Download required NLTK data
nltk.download('wordnet')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package wordnet to
[nltk_data]     /home/viewsetting/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/viewsetting/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [3]:
# 设置设备
device = torch.device('cuda:1' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
print(f"Using Device: {device}")

Using Device: cuda:1


In [4]:
import wandb
wandb.login(key="2008ab8d896bfc68619ace7f820e0513468b9783", relogin=True)


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/viewsetting/.netrc


True

In [5]:
# Get current wandb entity
current_entity = wandb.api.default_entity
print(f"Current wandb entity: {current_entity}")


Current wandb entity: jl10897-new-york-university


## Load Tokenizer and Preprocess Data

In [6]:
# base_model = 'roberta-base'
# # base_model = 'roberta-large'

# dataset = load_dataset('ag_news', split='train')
# tokenizer = RobertaTokenizer.from_pretrained(base_model)

# def clean_text(text):
#     text = re.sub(r"<.*?>", "", text)                 # 去除 HTML 标签
#     text = re.sub(r"http\S+|www\S+", "", text)        # 移除 URL
#     text = re.sub(r"[^A-Za-z0-9.,!?;:'\"()\[\]\s]", "", text)  # 去除特殊字符
#     text = re.sub(r"\s+", " ", text).strip()          # 去除多余空格
#     return text.lower()                               # 可选：统一小写
    
# def preprocess(examples):
#     cleaned_texts = [clean_text(t) for t in examples['text']]
#     return tokenizer(cleaned_texts, truncation=True, padding=True, max_length=256)

# tokenized_dataset = dataset.map(preprocess, batched=True,  remove_columns=["text"])
# tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

In [7]:
nltk.download('averaged_perceptron_tagger_eng', download_dir='/home/viewsetting/nltk_data')



[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /home/viewsetting/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [8]:
nltk.data.path.append('/home/viewsetting/nltk_data')

In [9]:
base_model = 'roberta-base'
# base_model = 'roberta-large'

dataset = load_dataset('ag_news', split='train')
tokenizer = RobertaTokenizer.from_pretrained(base_model)

# Initialize augmenters
synonym_aug = naw.SynonymAug(aug_src='wordnet', aug_p=0.3)
delete_aug = naw.RandomWordAug(action='delete', aug_p=0.1)
swap_aug = naw.RandomWordAug(action='swap', aug_p=0.1)
typo_aug = nac.RandomCharAug(action='swap', aug_char_p=0.05, aug_word_p=0.1)

def clean_text(text):
    text = re.sub(r"<.*?>", "", text)                 # 去除 HTML 标签
    text = re.sub(r"http\S+|www\S+", "", text)        # 移除 URL
    text = re.sub(r"[^A-Za-z0-9.,!?;:'\"()\[\]\s]", "", text)  # 去除特殊字符
    text = re.sub(r"\s+", " ", text).strip()          # 去除多余空格
    return text.lower()                               # 统一小写

def sentence_shuffle(text):
    # Split text into sentences using simple punctuation-based splitting
    sentences = re.split(r'(?<=[.!?])\s+', text.strip())
    if len(sentences) < 2:  # Skip if only one sentence
        return text
    # Randomly shuffle sentences
    random.shuffle(sentences)
    # Rejoin with spaces, ensuring proper spacing
    return ' '.join(s.strip() for s in sentences if s.strip())

def preprocess_with_augmentation(examples, augmentations=None):
    if augmentations is None:
        augmentations = {
            'synonym': False,
            'delete': False,
            'swap': False,
            'shuffle': False,
            'noise': False
        }

    # Clean texts first
    cleaned_texts = [clean_text(t) for t in examples['text']]
    augmented_texts = []

    for text in cleaned_texts:
        aug_text = text
        if augmentations.get('synonym', False):
            aug_text = synonym_aug.augment(aug_text)[0]
        if augmentations.get('delete', False):
            aug_text = delete_aug.augment(aug_text)[0]
        if augmentations.get('swap', False):
            aug_text = swap_aug.augment(aug_text)[0]
        if augmentations.get('shuffle', False):
            aug_text = sentence_shuffle(aug_text)
        if augmentations.get('noise', False):
            aug_text = typo_aug.augment(aug_text)[0]
        augmented_texts.append(aug_text)

    # Tokenize with same parameters as original
    return tokenizer(augmented_texts, truncation=True, padding=True, max_length=256)

# Example augmentation configuration
augmentation_config = {
    'synonym': True,
    'delete': True,
    'swap': True,
    'shuffle': True,  # Enable sentence shuffling
    'noise': True
}



tokenized_dataset = dataset.map(
    lambda examples: preprocess_with_augmentation(examples, augmentation_config),
    batched=True,
    remove_columns=["text"]
)
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")



Map:   0%|          | 0/120000 [00:00<?, ? examples/s]

In [10]:
# Extract the number of classess and their names
num_labels = dataset.features['label'].num_classes
class_names = dataset.features["label"].names
print(f"number of labels: {num_labels}")
print(f"the labels: {class_names}")

# Create an id2label mapping
# We will need this for our classifier.
id2label = {i: label for i, label in enumerate(class_names)}

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, return_tensors="pt")


number of labels: 4
the labels: ['World', 'Sports', 'Business', 'Sci/Tech']


## Load Pre-trained Model
Set up config for pretrained model and download it from hugging face

In [11]:
model = RobertaForSequenceClassification.from_pretrained(
    base_model,
    id2label=id2label)
model

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
             

## Anything from here on can be modified

In [12]:
# Split the original training set
split_datasets = tokenized_dataset.train_test_split(test_size=640, seed=42)
train_dataset = split_datasets['train']
eval_dataset = split_datasets['test']

## Setup LoRA Config
Setup PEFT config and get peft model for finetuning

In [13]:
peft_config = LoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    target_modules=["query", "value"],
    task_type="SEQ_CLS"
)

In [14]:
peft_model = get_peft_model(model, peft_config)
peft_model

PeftModelForSequenceClassification(
  (base_model): LoraModel(
    (model): RobertaForSequenceClassification(
      (roberta): RobertaModel(
        (embeddings): RobertaEmbeddings(
          (word_embeddings): Embedding(50265, 768, padding_idx=1)
          (position_embeddings): Embedding(514, 768, padding_idx=1)
          (token_type_embeddings): Embedding(1, 768)
          (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (encoder): RobertaEncoder(
          (layer): ModuleList(
            (0-11): 12 x RobertaLayer(
              (attention): RobertaAttention(
                (self): RobertaSelfAttention(
                  (query): lora.Linear(
                    (base_layer): Linear(in_features=768, out_features=768, bias=True)
                    (lora_dropout): ModuleDict(
                      (default): Dropout(p=0.05, inplace=False)
                    )
                    (lora_A): Module

In [15]:
print('PEFT Model')
peft_model.print_trainable_parameters()

PEFT Model
trainable params: 741,124 || all params: 125,389,832 || trainable%: 0.5911


## Training Setup

In [19]:
import wandb
import seaborn as sns
import matplotlib.pyplot as plt
from typing import Dict
from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score, 
    confusion_matrix,
    classification_report
)

def compute_metrics(pred):
    """
    Compute and log comprehensive evaluation metrics
    """
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    probs = torch.nn.functional.softmax(torch.tensor(pred.predictions), dim=-1)
    
    # Basic metrics
    accuracy = accuracy_score(labels, preds)
    precision_macro = precision_score(labels, preds, average='macro')
    precision_micro = precision_score(labels, preds, average='micro')
    precision_per_class = precision_score(labels, preds, average=None)
    
    recall_macro = recall_score(labels, preds, average='macro')
    recall_micro = recall_score(labels, preds, average='micro')
    recall_per_class = recall_score(labels, preds, average=None)
    
    f1_macro = f1_score(labels, preds, average='macro')
    f1_micro = f1_score(labels, preds, average='micro')
    f1_per_class = f1_score(labels, preds, average=None)
    
    # Per-class metrics
    metrics_per_class = {}
    for i, class_name in enumerate(id2label.values()):
        metrics_per_class.update({
            f'precision_class_{class_name}': precision_per_class[i],
            f'recall_class_{class_name}': recall_per_class[i],
            f'f1_class_{class_name}': f1_per_class[i]
        })
    
    # Confusion matrix - only log to wandb
    cm = confusion_matrix(labels, preds)
    plt.figure(figsize=(10,8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=list(id2label.values()),
                yticklabels=list(id2label.values()))
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    
    # Log confusion matrix to wandb
    wandb.log({'eval/confusion_matrix': wandb.Image(plt)})
    plt.close()
    
    # Confidence histograms - only log to wandb
    for i in range(len(id2label)):
        plt.figure(figsize=(8,6))
        class_probs = probs[:, i].numpy()
        plt.hist(class_probs, bins=50)
        plt.title(f'Confidence Distribution - Class {id2label[i]}')
        plt.xlabel('Confidence')
        plt.ylabel('Count')
        wandb.log({f'eval/confidence_dist_class_{id2label[i]}': wandb.Image(plt)})
        plt.close()

    # Log metrics to wandb
    wandb.log({
        'eval/accuracy': accuracy,
        'eval/precision_macro': precision_macro,
        'eval/precision_micro': precision_micro,
        'eval/recall_macro': recall_macro,
        'eval/recall_micro': recall_micro,
        'eval/f1_macro': f1_macro,
        'eval/f1_micro': f1_micro,
    })
    
    # Return only JSON-serializable metrics
    return {
        'accuracy': accuracy,
        'precision_macro': precision_macro,
        'precision_micro': precision_micro,
        'recall_macro': recall_macro,
        'recall_micro': recall_micro,
        'f1_macro': f1_macro,
        'f1_micro': f1_micro,
        **metrics_per_class
    }

class CustomTrainer(Trainer):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.gradient_norm = 0.0

    def training_step(self, model, inputs):
        """Override training step to compute training metrics"""
        loss = super().training_step(model, inputs)
        
        # Compute training accuracy
        with torch.no_grad():
            outputs = model(**inputs)
            predictions = outputs.logits.argmax(-1)
            accuracy = (predictions == inputs['labels']).float().mean()
            
            # Log training metrics
            if self.state.global_step % self.args.logging_steps == 0:
                wandb.log({
                    'train/loss': loss.item(),
                    'train/accuracy': accuracy.item(),
                    'train/step': self.state.global_step
                })
        
        # Compute gradient norm
        if self.args.gradient_checkpointing:
            self.gradient_norm = torch.norm(
                torch.stack([
                    torch.norm(p.grad.detach())
                    for p in model.parameters()
                    if p.grad is not None
                ])
            ).item()
        
        return loss

    def log(self, logs: Dict[str, float]) -> None:
        """
        Enhanced logging with additional training metrics
        """
        if self.state.global_step % self.args.logging_steps == 0:
            # Log learning rates
            if hasattr(self.optimizer, "param_groups"):
                current_lr = self.optimizer.param_groups[0]['lr']
                wandb.log({
                    'train/learning_rate': current_lr,
                    'train/step': self.state.global_step
                })
            
            # Log gradient norm
            logs["train/gradient_norm"] = self.gradient_norm
            
            # Log batch size
            logs["train/batch_size"] = self.args.per_device_train_batch_size
            
            # Log memory usage if using GPU
            if torch.cuda.is_available():
                logs["system/gpu_memory_allocated"] = torch.cuda.memory_allocated() / 1024**2  # MB
                logs["system/gpu_memory_cached"] = torch.cuda.memory_reserved() / 1024**2  # MB
            
            # Log parameter statistics
            for name, param in self.model.named_parameters():
                if param.requires_grad:
                    logs[f"parameters/mean/{name}"] = param.data.mean().item()
                    logs[f"parameters/std/{name}"] = param.data.std().item()
                    if param.grad is not None:
                        logs[f"gradients/mean/{name}"] = param.grad.data.mean().item()
                        logs[f"gradients/std/{name}"] = param.grad.data.std().item()
            
            # Log all metrics to wandb
            wandb.log(logs)
        
        super().log(logs)

# Setup Training args
output_dir = "results"

training_args = TrainingArguments(
    output_dir=output_dir,
    report_to='wandb',
    eval_strategy='steps',
    logging_steps=100,
    eval_steps=200,
    save_steps=400,
    save_total_limit=2,

    learning_rate=1e-4,  # LoRA
    warmup_ratio=0.1,
    num_train_epochs=2,
    
    per_device_train_batch_size=32,
    per_device_eval_batch_size=16,

    optim="adamw_torch",  # BETTER THAN SGD
    weight_decay=0.01,

    gradient_checkpointing=False,
    dataloader_num_workers=2,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    run_name="lora_finetuning_run_1",
    
    logging_first_step=True, 
    logging_nan_inf_filter=False, 
    logging_strategy="steps",  
    label_names=["labels"]
)

def get_trainer(model):
    """
    Create a trainer instance with enhanced logging
    """
    # Initialize wandb with detailed config
    wandb.init(
        project="ag_news_classification",
        name=f"roberta_lora_{wandb.util.generate_id()}",
        config={
            "model_name": base_model,
            "lora_config": {
                "r": peft_config.r,
                "alpha": peft_config.lora_alpha,
                "dropout": peft_config.lora_dropout,
                "target_modules": peft_config.target_modules,
            },
            "training_config": {
                "learning_rate": training_args.learning_rate,
                "batch_size": training_args.per_device_train_batch_size,
                "epochs": training_args.num_train_epochs,
                "warmup_ratio": training_args.warmup_ratio,
                "weight_decay": training_args.weight_decay,
            },
            "augmentation_config": augmentation_config,
            "dataset": "ag_news",
            "train_size": len(train_dataset),
            "eval_size": len(eval_dataset),
        }
    )
    
    model.config.label2id = {label: i for i, label in enumerate(class_names)}
    model.config.id2label = {i: label for i, label in enumerate(class_names)}
    
    return CustomTrainer(
        model=model,
        args=training_args,
        compute_metrics=compute_metrics,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        data_collator=data_collator,
        callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]
    )

### Start Training

In [20]:
!export CUDA_VISIBLE_DEVICES=3


In [21]:
peft_lora_finetuning_trainer = get_trainer(peft_model)

result = peft_lora_finetuning_trainer.train()

VBox(children=(Label(value='0.271 MB of 0.271 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/accuracy,▁▁██
eval/f1_class_Business,▁█
eval/f1_class_Sci/Tech,▁█
eval/f1_class_Sports,▁█
eval/f1_class_World,▁█
eval/f1_macro,▁▁██
eval/f1_micro,▁▁██
eval/loss,█▁
eval/precision_class_Business,▁█
eval/precision_class_Sci/Tech,█▁

0,1
eval/accuracy,0.86875
eval/f1_class_Business,0.8505
eval/f1_class_Sci/Tech,0.85195
eval/f1_class_Sports,0.93082
eval/f1_class_World,0.84058
eval/f1_macro,0.86846
eval/f1_micro,0.86875
eval/loss,0.41224
eval/precision_class_Business,0.86486
eval/precision_class_Sci/Tech,0.81592


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114514077134017, max=1.0…

    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.


Step,Training Loss,Validation Loss,Accuracy,Precision Macro,Precision Micro,Recall Macro,Recall Micro,F1 Macro,F1 Micro,Precision Class World,Recall Class World,F1 Class World,Precision Class Sports,Recall Class Sports,F1 Class Sports,Precision Class Business,Recall Class Business,F1 Class Business,Precision Class Sci/tech,Recall Class Sci/tech,F1 Class Sci/tech,Norm,Size,Memory Allocated,Memory Cached,Model.model.roberta.encoder.layer.0.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.0.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.0.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.0.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.1.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.1.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.1.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.1.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.2.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.2.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.2.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.2.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.3.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.3.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.3.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.3.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.4.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.4.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.4.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.4.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.5.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.5.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.5.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.5.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.6.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.6.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.6.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.6.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.7.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.7.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.7.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.7.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.8.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.8.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.8.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.8.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.9.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.9.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.9.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.9.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.10.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.10.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.10.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.10.attention.self.value.lora B.default.weight,Model.model.roberta.encoder.layer.11.attention.self.query.lora A.default.weight,Model.model.roberta.encoder.layer.11.attention.self.query.lora B.default.weight,Model.model.roberta.encoder.layer.11.attention.self.value.lora A.default.weight,Model.model.roberta.encoder.layer.11.attention.self.value.lora B.default.weight,Model.model.classifier.modules To Save.default.dense.weight,Model.model.classifier.modules To Save.default.dense.bias,Model.model.classifier.modules To Save.default.out Proj.weight,Model.model.classifier.modules To Save.default.out Proj.bias
200,0.3766,0.400551,0.865625,0.866022,0.865625,0.865991,0.865625,0.865253,0.865625,0.874074,0.813793,0.842857,0.914634,0.949367,0.931677,0.806061,0.869281,0.836478,0.869318,0.831522,0.85,0.0,32,509.058594,7180.0,0.022759,0.003786,0.022423,0.00419,0.022531,0.004126,0.021982,0.003447,0.022723,0.003792,0.020947,0.002617,0.0212,0.00293,0.021318,0.002649,0.021352,0.003503,0.021495,0.003727,0.022184,0.004106,0.02163,0.004381,0.025257,0.005807,0.021708,0.004687,0.022085,0.004866,0.022372,0.004944,0.021889,0.005192,0.021938,0.004798,0.022689,0.005318,0.022232,0.004999,0.022864,0.005352,0.021942,0.004734,0.022139,0.004722,0.021899,0.004646,0.020327,0.000737,0.021308,0.000763
400,0.351,0.387417,0.878125,0.880441,0.878125,0.875772,0.878125,0.877421,0.878125,0.886364,0.806897,0.844765,0.930818,0.936709,0.933754,0.862745,0.862745,0.862745,0.841837,0.896739,0.868421,0.0,32,509.072266,7180.0,0.02406,0.004979,0.022885,0.004656,0.023371,0.005038,0.022119,0.003734,0.023852,0.004742,0.021151,0.003122,0.021919,0.004053,0.021508,0.003121,0.021794,0.004463,0.021691,0.004063,0.022874,0.004889,0.021727,0.004599,0.026179,0.006304,0.02172,0.004868,0.022544,0.005519,0.022324,0.00507,0.022304,0.005667,0.021856,0.004922,0.02364,0.006152,0.022289,0.005214,0.024172,0.006177,0.021896,0.004924,0.022438,0.005304,0.021947,0.004852,0.020405,0.000934,0.021633,0.000963
600,0.3672,0.365845,0.871875,0.873355,0.871875,0.870742,0.871875,0.871637,0.871875,0.88806,0.82069,0.853047,0.925466,0.943038,0.934169,0.833333,0.849673,0.841424,0.846561,0.869565,0.857909,0.0,32,509.072266,7180.0,0.024945,0.005692,0.023102,0.004925,0.023901,0.005417,0.022195,0.003949,0.024593,0.005285,0.0214,0.003521,0.02227,0.004589,0.021677,0.003372,0.022132,0.005113,0.021814,0.004217,0.02318,0.005264,0.021799,0.004727,0.026523,0.00654,0.021837,0.005009,0.022832,0.005864,0.022489,0.005271,0.022733,0.006188,0.021885,0.005027,0.024115,0.006558,0.022382,0.005376,0.025124,0.006715,0.022006,0.005111,0.023081,0.006127,0.022067,0.004983,0.020447,0.001145,0.0217,0.001223
800,0.351,0.363798,0.88125,0.885109,0.88125,0.878787,0.88125,0.880826,0.88125,0.907692,0.813793,0.858182,0.947368,0.911392,0.929032,0.833333,0.882353,0.857143,0.852041,0.907609,0.878947,0.0,32,509.072266,7180.0,0.025641,0.006207,0.023501,0.005229,0.024301,0.005725,0.022608,0.004218,0.02478,0.005602,0.02159,0.003744,0.022629,0.005022,0.021837,0.003597,0.022436,0.005594,0.021918,0.004341,0.023536,0.00564,0.021924,0.004843,0.026955,0.006783,0.022003,0.005185,0.023138,0.006198,0.022407,0.005293,0.023099,0.006513,0.021907,0.005125,0.024568,0.006946,0.022445,0.005495,0.027803,0.007689,0.022109,0.005238,0.023878,0.006779,0.022207,0.005177,0.020488,0.001283,0.02181,0.001302
1000,0.3489,0.349325,0.878125,0.878248,0.878125,0.877092,0.878125,0.87755,0.878125,0.863309,0.827586,0.84507,0.937107,0.943038,0.940063,0.851613,0.862745,0.857143,0.860963,0.875,0.867925,0.0,32,509.072266,7180.0,0.026065,0.006546,0.023528,0.005311,0.024582,0.005922,0.022643,0.004309,0.025055,0.005894,0.021727,0.003918,0.022709,0.005179,0.021961,0.003742,0.022612,0.005915,0.021977,0.004395,0.023612,0.005769,0.021971,0.004898,0.02716,0.006896,0.022118,0.005273,0.023355,0.006426,0.022482,0.005352,0.023336,0.006712,0.021963,0.005224,0.024916,0.007213,0.022437,0.005554,0.028402,0.007958,0.022149,0.005309,0.024539,0.007263,0.022247,0.005201,0.020514,0.001279,0.021878,0.00118
1200,0.3405,0.337507,0.89375,0.897216,0.89375,0.891363,0.89375,0.89329,0.89375,0.922481,0.82069,0.868613,0.949367,0.949367,0.949367,0.859873,0.882353,0.870968,0.857143,0.913043,0.884211,0.0,32,509.072266,7180.0,0.026401,0.006797,0.023651,0.005409,0.024798,0.00609,0.02279,0.004421,0.025204,0.006008,0.021846,0.004047,0.022917,0.005418,0.022075,0.003858,0.022757,0.006147,0.022064,0.004488,0.023831,0.005968,0.022035,0.004987,0.027399,0.007026,0.022172,0.005304,0.023427,0.006547,0.02248,0.005372,0.023433,0.00679,0.021979,0.005259,0.025322,0.007436,0.022525,0.005619,0.029213,0.00819,0.022211,0.005377,0.02491,0.007503,0.022329,0.005305,0.02053,0.001336,0.021888,0.00127
1400,0.3256,0.337656,0.8875,0.891273,0.8875,0.885705,0.8875,0.887513,0.8875,0.922481,0.82069,0.868613,0.943038,0.943038,0.943038,0.849057,0.882353,0.865385,0.850515,0.896739,0.873016,0.0,32,509.072266,7180.0,0.026713,0.007028,0.023661,0.005432,0.024861,0.006175,0.022732,0.004427,0.025359,0.006166,0.021906,0.004124,0.02299,0.005526,0.022146,0.003901,0.02289,0.006303,0.022132,0.004569,0.024034,0.006121,0.022092,0.005047,0.027365,0.007045,0.022227,0.005362,0.023483,0.00664,0.022504,0.005415,0.023553,0.006895,0.022021,0.005297,0.025632,0.00761,0.022552,0.005645,0.029321,0.008271,0.022302,0.005435,0.025111,0.007631,0.022368,0.005358,0.020549,0.001349,0.022024,0.001259
1600,0.3258,0.33644,0.892188,0.895815,0.892188,0.890697,0.892188,0.892318,0.892188,0.923077,0.827586,0.872727,0.955128,0.943038,0.949045,0.845679,0.895425,0.869841,0.859375,0.896739,0.87766,0.0,32,509.072266,7180.0,0.026889,0.007151,0.023704,0.005456,0.024902,0.006224,0.022713,0.004424,0.025405,0.006198,0.021946,0.004164,0.022989,0.005535,0.022205,0.003943,0.022966,0.006411,0.022196,0.004634,0.024098,0.006173,0.02212,0.005069,0.027492,0.00708,0.022248,0.005384,0.023549,0.006707,0.022522,0.005424,0.02366,0.006984,0.02203,0.005312,0.02595,0.007715,0.02255,0.005648,0.029355,0.008321,0.022277,0.005431,0.025343,0.007735,0.022376,0.005356,0.020554,0.001388,0.022035,0.001345
1800,0.3287,0.332486,0.889062,0.891895,0.889062,0.887704,0.889062,0.889095,0.889062,0.909091,0.827586,0.866426,0.955128,0.943038,0.949045,0.84472,0.888889,0.866242,0.858639,0.891304,0.874667,0.0,32,509.072266,7180.0,0.027002,0.007218,0.023743,0.005473,0.024927,0.006243,0.022729,0.004426,0.025503,0.00624,0.021962,0.00417,0.023021,0.00557,0.022223,0.003947,0.022993,0.006454,0.022223,0.004663,0.024144,0.006201,0.022135,0.005079,0.027508,0.007093,0.022266,0.005399,0.023574,0.006729,0.022522,0.005425,0.02367,0.007004,0.022041,0.005322,0.026055,0.007755,0.022572,0.00566,0.029493,0.00836,0.022288,0.005433,0.025451,0.007784,0.022393,0.005368,0.020557,0.001391,0.022049,0.001342


    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.
    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.
    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.
    There is an imbalance between your GPUs. You may want to exclude GPU 2 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argumen

In [22]:
# Save Model
peft_model_path = os.path.join(output_dir, "peft_model_shuffle")
peft_model.save_pretrained(peft_model_path)
# Save Tokenizer
tokenizer.save_pretrained(peft_model_path)
print(f"Model saved to {peft_model_path}")


Model saved to results/peft_model_shuffle


## Evaluate Finetuned Model


### Run Inference on eval_dataset

In [23]:
from torch.utils.data import DataLoader
import evaluate
from tqdm import tqdm

def evaluate_model(inference_model, dataset, labelled=True, batch_size=32, data_collator=None):
    """
    Evaluate a PEFT model on a dataset.

    Args:
        inference_model: The model to evaluate.
        dataset: The dataset (Hugging Face Dataset) to run inference on.
        labelled (bool): If True, the dataset includes labels and metrics will be computed.
                         If False, only predictions will be returned.
        batch_size (int): Batch size for inference.
        data_collator: Function to collate batches. If None, the default collate_fn is used.

    Returns:
        If labelled is True, returns a tuple (metrics, predictions)
        If labelled is False, returns the predictions.
    """
    # Create the DataLoader
    eval_dataloader = DataLoader(dataset, batch_size=batch_size, collate_fn=data_collator)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    inference_model.to(device)
    inference_model.eval()

    all_predictions = []
    if labelled:
        metric = evaluate.load('accuracy')

    # Loop over the DataLoader
    for batch in tqdm(eval_dataloader):
        # Move each tensor in the batch to the device
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = inference_model(**batch)
        predictions = outputs.logits.argmax(dim=-1)
        all_predictions.append(predictions.cpu())

        if labelled:
            # Expecting that labels are provided under the "labels" key.
            references = batch["labels"]
            metric.add_batch(
                predictions=predictions.cpu().numpy(),
                references=references.cpu().numpy()
            )

    # Concatenate predictions from all batches
    all_predictions = torch.cat(all_predictions, dim=0)

    if labelled:
        eval_metric = metric.compute()
        print("Evaluation Metric:", eval_metric)
        return eval_metric, all_predictions
    else:
        return all_predictions

In [24]:
# Check evaluation accuracy
_, _ = evaluate_model(peft_model, eval_dataset, True, 32, data_collator)

100%|██████████| 20/20 [00:03<00:00,  5.01it/s]

Evaluation Metric: {'accuracy': 0.8890625}





### Run Inference on unlabelled dataset

In [26]:
#Load your unlabelled data
unlabelled_dataset = pd.read_pickle("/home/viewsetting/ssd_2T/test_unlabelled.pkl")
test_dataset = unlabelled_dataset.map(preprocess_with_augmentation, batched=True, remove_columns=["text"])
unlabelled_dataset

Map:   0%|          | 0/8000 [00:00<?, ? examples/s]

Dataset({
    features: ['text'],
    num_rows: 8000
})

In [27]:
# Run inference and save predictions
preds = evaluate_model(peft_model, test_dataset, False, 32, data_collator)
df_output = pd.DataFrame({
    'ID': range(len(preds)),
    'Label': preds.numpy()  # or preds.tolist()
})
df_output.to_csv(os.path.join(output_dir,"inference_output_shuffle.csv"), index=False)
print("Inference complete. Predictions saved to inference_output.csv")

100%|██████████| 250/250 [00:26<00:00,  9.50it/s]

Inference complete. Predictions saved to inference_output.csv



