In [1]:
!pip install datasets
!pip install --upgrade peft
import torch
import time
from transformers import RobertaForSequenceClassification, RobertaTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType

Collecting datasets
  Downloading datasets-3.0.2-py3-none-any.whl.metadata (20 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-3.0.2-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.7/472.7 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K   [90m

# TESTING BASE MODEL WITH NO OPTIMISATIONS

In [2]:
def measure_resources(model, tokenizer, dataset, training_args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Start memory tracking
    torch.cuda.reset_peak_memory_stats()
    start_time = time.time()

    # Training setup
    def tokenize_function(examples):
        return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)

    tokenized_dataset = dataset.map(tokenize_function, batched=True)
    tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
    tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset['train'],
        eval_dataset=tokenized_dataset['test'],
    )

    # Train the model
    trainer.train()

    # End memory and time tracking
    peak_memory = torch.cuda.max_memory_allocated() / (1024 ** 2)  # Convert bytes to MB
    end_time = time.time()
    training_time = end_time - start_time

    return peak_memory, training_time

In [3]:
# Load the dataset and tokenizer
dataset = load_dataset('imdb')  # You can use any other dataset
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    evaluation_strategy="epoch",
    num_train_epochs=1,  # Set to a smaller number for testing purposes
)

# Measure fine-tuning without LoRA
model_base = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)
peak_memory_base, training_time_base = measure_resources(model_base, tokenizer, dataset, training_args)
print(f"Without LoRA - Peak Memory: {peak_memory_base:.2f} MB, Training Time: {training_time_base:.2f} seconds")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss
1,0.2895,0.251532


Without LoRA - Peak Memory: 3041.00 MB, Training Time: 973.90 seconds


# TESTING BASE MODEL WITH LoRA

In [4]:
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,  # Sequence classification task
    r=16,  # Rank of the update matrices
    lora_alpha=32,  # Alpha scaling factor
    lora_dropout=0.1,  # Dropout rate for LoRA
    bias="none"  # Bias strategy for LoRA
)
model_lora = get_peft_model(model_base, lora_config)

peak_memory_lora, training_time_lora = measure_resources(model_lora, tokenizer, dataset, training_args)
print(f"With LoRA - Peak Memory: {peak_memory_lora:.2f} MB, Training Time: {training_time_lora:.2f} seconds")

# Compare Results
memory_savings = peak_memory_base - peak_memory_lora
time_savings = training_time_base - training_time_lora
print(f"Memory Saved: {memory_savings:.2f} MB")
print(f"Time Saved: {time_savings:.2f} seconds")

Epoch,Training Loss,Validation Loss
1,0.2041,0.260369


With LoRA - Peak Memory: 1761.55 MB, Training Time: 597.62 seconds
Memory Saved: 1279.45 MB
Time Saved: 376.28 seconds


# MIXED PRECISION TRAINING

In [5]:
# Training arguments with mixed precision
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    evaluation_strategy="epoch",
    num_train_epochs=1,
    fp16=True  # Enable mixed precision
)

# Initialize and train model
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)
peak_memory, training_time = measure_resources(model, tokenizer, dataset, training_args)
print(f"Baseline with Mixed Precision - Peak Memory: {peak_memory:.2f} MB, Training Time: {training_time:.2f} seconds")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss
1,0.2787,0.241352


Baseline with Mixed Precision - Peak Memory: 3220.12 MB, Training Time: 459.36 seconds


# PRUNING

In [10]:
import torch
import torch.nn.utils.prune as prune
from transformers import RobertaForSequenceClassification, RobertaTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
import time

# Define pruning function
def prune_model(model, amount=0.2):
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=amount)
    return model

# Initialize and prune model, then move to GPU
model_base = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)
model_pruned = prune_model(model_base)
model_pruned.to("cuda")  # Move pruned model to GPU

# Measure resource usage for GPU-based model
def measure_resources_gpu(model, tokenizer, dataset, training_args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Start memory tracking
    torch.cuda.reset_peak_memory_stats()
    start_time = time.time()

    # Tokenize and prepare dataset
    def tokenize_function(examples):
        return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)

    tokenized_dataset = dataset.map(tokenize_function, batched=True)
    tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
    tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    # Adjust to_device function to handle only numerical data
    def to_device(batch):
        return {
            k: torch.tensor(v).to(device) if isinstance(v, list) and all(isinstance(i, (int, float)) for i in v)
            else v.to(device) if isinstance(v, torch.Tensor)
            else v  # Leave non-tensor, non-numeric lists (like strings) unchanged
            for k, v in batch.items()
        }

    tokenized_dataset = tokenized_dataset.map(to_device, batched=True)

    # Initialize Trainer with the GPU-based model
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset['train'],
        eval_dataset=tokenized_dataset['test'],
    )

    # Train the model on GPU
    trainer.train()

    # End memory and time tracking
    peak_memory = torch.cuda.max_memory_allocated() / (1024 ** 2)  # Convert bytes to MB
    end_time = time.time()
    training_time = end_time - start_time

    return peak_memory, training_time

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    evaluation_strategy="epoch",
    num_train_epochs=1,
)

# Load tokenizer and dataset
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
dataset = load_dataset('imdb')

# Run the pruned model on GPU and measure resources
peak_memory, training_time = measure_resources_gpu(model_pruned, tokenizer, dataset, training_args)
print(f"Pruned Model on GPU - Peak Memory: {peak_memory:.2f} MB, Training Time: {training_time:.2f} seconds")

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/25000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


Epoch,Training Loss,Validation Loss
1,0.2804,0.247298


Pruned Model on GPU - Peak Memory: 5189.66 MB, Training Time: 582.02 seconds
