In [None]:
!pip install datasets
!pip install --upgrade peft
import torch
import time
from transformers import RobertaForSequenceClassification, RobertaTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, TaskType

# TESTING BASE MODEL WITH NO OPTIMISATIONS

In [None]:
def measure_resources(model, tokenizer, dataset, training_args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Start memory tracking
    torch.cuda.reset_peak_memory_stats()
    start_time = time.time()

    # Training setup
    def tokenize_function(examples):
        return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)

    tokenized_dataset = dataset.map(tokenize_function, batched=True)
    tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
    tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset['train'],
        eval_dataset=tokenized_dataset['test'],
    )

    # Train the model
    trainer.train()

    # End memory and time tracking
    peak_memory = torch.cuda.max_memory_allocated() / (1024 ** 2)  # Convert bytes to MB
    end_time = time.time()
    training_time = end_time - start_time

    return peak_memory, training_time

In [None]:
# Load the dataset and tokenizer
dataset = load_dataset('imdb')  # You can use any other dataset
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")

# Training Arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    evaluation_strategy="epoch",
    num_train_epochs=1,  # Set to a smaller number for testing purposes
)

# Measure fine-tuning without LoRA
model_base = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)
peak_memory_base, training_time_base = measure_resources(model_base, tokenizer, dataset, training_args)
print(f"Without LoRA - Peak Memory: {peak_memory_base:.2f} MB, Training Time: {training_time_base:.2f} seconds")

# TESTING BASE MODEL WITH LoRA

In [None]:
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,  # Sequence classification task
    r=16,  # Rank of the update matrices
    lora_alpha=32,  # Alpha scaling factor
    lora_dropout=0.1,  # Dropout rate for LoRA
    bias="none"  # Bias strategy for LoRA
)
model_lora = get_peft_model(model_base, lora_config)

peak_memory_lora, training_time_lora = measure_resources(model_lora, tokenizer, dataset, training_args)
print(f"With LoRA - Peak Memory: {peak_memory_lora:.2f} MB, Training Time: {training_time_lora:.2f} seconds")

# Compare Results
memory_savings = peak_memory_base - peak_memory_lora
time_savings = training_time_base - training_time_lora
print(f"Memory Saved: {memory_savings:.2f} MB")
print(f"Time Saved: {time_savings:.2f} seconds")

# MIXED PRECISION TRAINING

In [None]:
# Training arguments with mixed precision
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    evaluation_strategy="epoch",
    num_train_epochs=1,
    fp16=True  # Enable mixed precision
)

# Initialize and train model
model = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)
peak_memory, training_time = measure_resources(model, tokenizer, dataset, training_args)
print(f"Baseline with Mixed Precision - Peak Memory: {peak_memory:.2f} MB, Training Time: {training_time:.2f} seconds")

# PRUNING

In [None]:
import torch
import torch.nn.utils.prune as prune
from transformers import RobertaForSequenceClassification, RobertaTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
import time

# Define pruning function
def prune_model(model, amount=0.2):
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            prune.l1_unstructured(module, name='weight', amount=amount)
    return model

# Initialize and prune model, then move to GPU
model_base = RobertaForSequenceClassification.from_pretrained("roberta-base", num_labels=2)
model_pruned = prune_model(model_base)
model_pruned.to("cuda")  # Move pruned model to GPU

# Measure resource usage for GPU-based model
def measure_resources_gpu(model, tokenizer, dataset, training_args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    # Start memory tracking
    torch.cuda.reset_peak_memory_stats()
    start_time = time.time()

    # Tokenize and prepare dataset
    def tokenize_function(examples):
        return tokenizer(examples['text'], truncation=True, padding='max_length', max_length=128)

    tokenized_dataset = dataset.map(tokenize_function, batched=True)
    tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
    tokenized_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

    # Adjust to_device function to handle only numerical data
    def to_device(batch):
        return {
            k: torch.tensor(v).to(device) if isinstance(v, list) and all(isinstance(i, (int, float)) for i in v)
            else v.to(device) if isinstance(v, torch.Tensor)
            else v  # Leave non-tensor, non-numeric lists (like strings) unchanged
            for k, v in batch.items()
        }

    tokenized_dataset = tokenized_dataset.map(to_device, batched=True)

    # Initialize Trainer with the GPU-based model
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_dataset['train'],
        eval_dataset=tokenized_dataset['test'],
    )

    # Train the model on GPU
    trainer.train()

    # End memory and time tracking
    peak_memory = torch.cuda.max_memory_allocated() / (1024 ** 2)  # Convert bytes to MB
    end_time = time.time()
    training_time = end_time - start_time

    return peak_memory, training_time

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    evaluation_strategy="epoch",
    num_train_epochs=1,
)

# Load tokenizer and dataset
tokenizer = RobertaTokenizer.from_pretrained("roberta-base")
dataset = load_dataset('imdb')

# Run the pruned model on GPU and measure resources
peak_memory, training_time = measure_resources_gpu(model_pruned, tokenizer, dataset, training_args)
print(f"Pruned Model on GPU - Peak Memory: {peak_memory:.2f} MB, Training Time: {training_time:.2f} seconds")