In [1]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.5.1-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.5.1-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.4/491.4 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl (1

In [None]:
!pip install hf_xet

In [None]:
!pip install peft

# Using BERT on MRPC Dataset:

**Diff-pruning on BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Function to apply pruning to the model
def apply_pruning(model, pruning_percentage=0.2):
    for name, param in model.named_parameters():
        if 'bias' not in name:  # Don't prune bias terms
            weight = param.data
            num_elements = weight.numel()
            num_pruned = int(pruning_percentage * num_elements)
            flattened = weight.view(-1)
            _, indices = torch.topk(flattened.abs(), num_pruned, largest=False)
            flattened[indices] = 0
            weight.copy_(flattened.view(weight.size()))

# Load BERT tokenizer and MRPC dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "mrpc")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Apply pruning to the model
apply_pruning(model, pruning_percentage=0.2)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)  # Get the index of max logit
    return {'accuracy': accuracy_score(labels, predictions)}

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-4,
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()

print(f"Evaluation results: {results}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/649k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/75.7k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/308k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkhanz23[0m ([33mkhanz23-cardiff-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6745,0.624242,0.683824
2,0.6103,0.626149,0.683824
3,0.655,0.631133,0.683824
4,0.6474,0.628794,0.683824
5,0.6109,0.625536,0.683824


Evaluation results: {'eval_loss': 0.6242417097091675, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 2.7218, 'eval_samples_per_second': 149.902, 'eval_steps_per_second': 9.553, 'epoch': 5.0}


**Bitfit on BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load BERT tokenizer and MRPC dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "mrpc")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Function to freeze all layers except bias terms
def apply_bitfit(model):
    for name, param in model.named_parameters():
        if 'bias' not in name:  # Freeze all non-bias parameters
            param.requires_grad = False

# Apply BitFit
apply_bitfit(model)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)  # Get the index of max logit
    return {'accuracy': accuracy_score(labels, predictions)}

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-4,
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()

print(f"Evaluation results: {results}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6115,0.584993,0.696078
2,0.5498,0.517563,0.740196
3,0.5291,0.502057,0.779412
4,0.5209,0.484994,0.786765
5,0.4934,0.481838,0.801471


Evaluation results: {'eval_loss': 0.48183783888816833, 'eval_accuracy': 0.8014705882352942, 'eval_runtime': 2.846, 'eval_samples_per_second': 143.36, 'eval_steps_per_second': 9.136, 'epoch': 5.0}


**LoRa on BERT**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Function to apply LoRA (Low-Rank Adaptation)
def apply_lora(model, rank=8):
    # Add low-rank adapters to each transformer layer (example for Bert model)
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            module.weight.requires_grad = False  # Freeze weights
            # Add a low-rank decomposition (A * B)
            adapter_a = torch.nn.Parameter(torch.randn(module.in_features, rank))
            adapter_b = torch.nn.Parameter(torch.randn(rank, module.out_features))
            module.register_parameter("adapter_a", adapter_a)
            module.register_parameter("adapter_b", adapter_b)

# Load BERT tokenizer and MRPC dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "mrpc")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Apply LoRA
apply_lora(model, rank=8)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)  # Get the index of max logit
    return {'accuracy': accuracy_score(labels, predictions)}

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-4,
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()

print(f"Evaluation results: {results}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5841,0.543621,0.72549
2,0.379,0.489193,0.767157
3,0.2191,0.526095,0.740196
4,0.1543,0.571526,0.752451
5,0.1015,0.577739,0.75


Evaluation results: {'eval_loss': 0.48919257521629333, 'eval_accuracy': 0.7671568627450981, 'eval_runtime': 2.7949, 'eval_samples_per_second': 145.982, 'eval_steps_per_second': 9.303, 'epoch': 5.0}


**Full-Finetuning of BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load BERT tokenizer and MRPC dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "mrpc")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)  # Get the index of max logit
    return {'accuracy': accuracy_score(labels, predictions)}

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-4,
    load_best_model_at_end=True
)
# Initialize Trainer
trainer = Trainer(
    model=model,                          # The model to be trained
    args=training_args,                   # Training arguments
    train_dataset=tokenized_datasets['train'],         # Training dataset
    eval_dataset=tokenized_datasets['validation'],     # Validation dataset
    compute_metrics=compute_metrics      # Metrics for evaluation
)

# Train and evaluate the model
trainer.train()  # Start training
trainer.save_model('./final_model')  # Save the trained model
results = trainer.evaluate()  # Evaluate the model on validation set

# Print the evaluation results
print(f"Evaluation results: {results}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6701,0.626005,0.683824
2,0.6287,0.626237,0.683824
3,0.6446,0.6281,0.683824
4,0.6495,0.625117,0.683824
5,0.6224,0.624552,0.683824


Evaluation results: {'eval_loss': 0.624552309513092, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 2.709, 'eval_samples_per_second': 150.611, 'eval_steps_per_second': 9.598, 'epoch': 5.0}


# Using BERT on RTE_Dataset:

**Diff-Pruning:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Diff pruning is a parameter-efficient transfer learning method, applying sparsity.
# (This is a simplified conceptual demonstration; real diff pruning would require complex reparameterization and sparsity constraints.)

# Load BERT tokenizer and RTE dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "rte")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=256)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Apply a simplified conceptual form of diff pruning (sparse fine-tuning using a minimal set of weights)
def apply_pruning(model, pruning_percentage=0.2):
    for name, param in model.named_parameters():
        if 'bias' not in name:  # Don't prune bias terms
            weight = param.data
            num_elements = weight.numel()
            num_pruned = int(pruning_percentage * num_elements)
            flattened = weight.view(-1)
            _, indices = torch.topk(flattened.abs(), num_pruned, largest=False)
            flattened[indices] = 0
            weight.copy_(flattened.view(weight.size()))

apply_pruning(model)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7056,0.698885,0.472924
2,0.6925,0.695909,0.472924
3,0.6359,0.68725,0.548736


Evaluation results: {'eval_loss': 0.6872498989105225, 'eval_accuracy': 0.5487364620938628, 'eval_runtime': 3.9468, 'eval_samples_per_second': 70.183, 'eval_steps_per_second': 8.868, 'epoch': 3.0}


**BitFit on BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load BERT tokenizer and RTE dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "rte")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=256)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Function to freeze all layers except bias terms
def apply_bitfit(model):
    for name, param in model.named_parameters():
        if 'bias' not in name:  # Freeze all non-bias parameters
            param.requires_grad = False

apply_bitfit(model)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkhanz23[0m ([33mkhanz23-cardiff-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.68,0.692698,0.498195
2,0.6979,0.691547,0.505415
3,0.6954,0.691846,0.494585


Evaluation results: {'eval_loss': 0.6915468573570251, 'eval_accuracy': 0.5054151624548736, 'eval_runtime': 3.8898, 'eval_samples_per_second': 71.212, 'eval_steps_per_second': 8.998, 'epoch': 3.0}


**LoRa on BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Function to apply LoRA (Low-Rank Adaptation)
def apply_lora(model, rank=8):
    # Add low-rank adapters to each transformer layer (example for Bert model)
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            module.weight.requires_grad = False  # Freeze weights
            # Add a low-rank decomposition (A * B)
            adapter_a = torch.nn.Parameter(torch.randn(module.in_features, rank))
            adapter_b = torch.nn.Parameter(torch.randn(rank, module.out_features))
            module.register_parameter("adapter_a", adapter_a)
            module.register_parameter("adapter_b", adapter_b)

# Load BERT tokenizer and RTE dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "rte")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=256)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Apply LoRA
apply_lora(model, rank=8)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6954,0.695964,0.509025
2,0.7045,0.693289,0.516245
3,0.6819,0.693342,0.501805


Evaluation results: {'eval_loss': 0.6932888627052307, 'eval_accuracy': 0.516245487364621, 'eval_runtime': 3.6242, 'eval_samples_per_second': 76.43, 'eval_steps_per_second': 4.967, 'epoch': 3.0}


**Full-Finetuning of BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load BERT tokenizer and RTE dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "rte")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=256)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6969,0.690002,0.519856
2,0.5919,0.649232,0.610108
3,0.3043,0.803419,0.613718


Evaluation results: {'eval_loss': 0.6492316722869873, 'eval_accuracy': 0.6101083032490975, 'eval_runtime': 3.8708, 'eval_samples_per_second': 71.561, 'eval_steps_per_second': 9.042, 'epoch': 3.0}


# Using BERT on CONLL_2003 Dataset:

**Diff-Pruning on BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, BertForTokenClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load the FAST tokenizer (note the use of AutoTokenizer and specifying use_fast=True)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', use_fast=True)


dataset = load_dataset("conll2003")

# Define a function to align labels with tokenized words
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"],
        truncation=True,
        is_split_into_words=True,
        padding="max_length",
        max_length=128
    )

    labels = []
    for i, label in enumerate(examples["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []

        for word_idx in word_ids:
            # Special tokens have a word id that is None. We set the label to -100
            # so they are automatically ignored in the loss function.
            if word_idx is None:
                label_ids.append(-100)
            # We set the label for the first token of each word.
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            # For the other tokens in a word, we set the label to -100
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx

        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# Apply the tokenization with aligned labels
tokenized_datasets = dataset.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=dataset["train"].column_names
)

# Checking the distribution of sequence lengths in the train set
import numpy as np
lengths = [len(input_ids) for input_ids in tokenized_datasets['train']['input_ids']]
print(f"Max length: {np.max(lengths)}, Min length: {np.min(lengths)}, Average length: {np.mean(lengths)}")

# Set format for PyTorch
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Initialize model
from transformers import BertForTokenClassification, AutoTokenizer

# Load the BERT model with PyTorch weights (the default should already be PyTorch weights)
model = BertForTokenClassification.from_pretrained('bert-base-uncased', num_labels=9)

# Apply a simplified conceptual form of diff pruning (sparse fine-tuning using a minimal set of weights)
def apply_pruning(model, pruning_percentage=0.2):
    for name, param in model.named_parameters():
        if 'bias' not in name:  # Don't prune bias terms
            weight = param.data
            num_elements = weight.numel()
            num_pruned = int(pruning_percentage * num_elements)
            flattened = weight.view(-1)
            _, indices = torch.topk(flattened.abs(), num_pruned, largest=False)
            flattened[indices] = 0
            weight.copy_(flattened.view(weight.size()))

apply_pruning(model)

# Define evaluation metrics - use a proper metric for token classification
def compute_metrics(p):
    predictions, labels = p
    predictions = torch.tensor(predictions).argmax(dim=-1)

    # Remove ignored index (special tokens) and convert to 1D arrays for metric calculation
    true_predictions = [
        [p for (p, l) in zip(pred, label) if l != -100]
        for pred, label in zip(predictions, labels)
    ]
    true_labels = [
        [l for l in label if l != -100]
        for label in labels
    ]

    # Flatten the predictions and labels
    flattened_preds = [p for preds in true_predictions for p in preds]
    flattened_labels = [l for labs in true_labels for l in labs]

    return {
        'accuracy': accuracy_score(flattened_labels, flattened_preds)
    }

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")

Max length: 128, Min length: 128, Average length: 128.0


Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkhanz23[0m ([33mkhanz23-cardiff-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.0886,0.07922,0.978575
2,0.025,0.078015,0.981419
3,0.014,0.075885,0.983562


Evaluation results: {'eval_loss': 0.0758848488330841, 'eval_accuracy': 0.983561537113141, 'eval_runtime': 24.4558, 'eval_samples_per_second': 132.893, 'eval_steps_per_second': 16.642, 'epoch': 3.0}


**BitFit on BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, BertForTokenClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load the Fast tokenizer for word_ids() support
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', use_fast=True)
dataset = load_dataset("conll2003")

# Define a function to align labels with tokenized words
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"],
        truncation=True,
        is_split_into_words=True,
        padding="max_length",
        max_length=128
    )

    labels = []
    for i, label in enumerate(examples["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []

        for word_idx in word_ids:
            # Special tokens have a word id that is None. We set the label to -100
            # so they are automatically ignored in the loss function.
            if word_idx is None:
                label_ids.append(-100)
            # We set the label for the first token of each word.
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            # For the other tokens in a word, we set the label to -100
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx

        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# Apply the tokenization with aligned labels
tokenized_datasets = dataset.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=dataset["train"].column_names
)

# Set format for PyTorch
tokenized_datasets.set_format(type='torch')

# Initialize model
model = BertForTokenClassification.from_pretrained('bert-base-uncased', num_labels=9)

# Function to freeze all layers except bias terms (BitFit)
def apply_bitfit(model):
    for name, param in model.named_parameters():
        if 'bias' not in name:  # Freeze all non-bias parameters
            param.requires_grad = False

apply_bitfit(model)

# Define evaluation metrics for token classification
def compute_metrics(p):
    predictions, labels = p
    predictions = torch.tensor(predictions).argmax(dim=-1)

    # Remove ignored index (special tokens) and convert to 1D arrays for metric calculation
    true_predictions = [
        [p for (p, l) in zip(pred, label) if l != -100]
        for pred, label in zip(predictions, labels)
    ]
    true_labels = [
        [l for l in label if l != -100]
        for label in labels
    ]

    # Flatten the predictions and labels
    flattened_preds = [p for preds in true_predictions for p in preds]
    flattened_labels = [l for labs in true_labels for l in labs]

    return {
        'accuracy': accuracy_score(flattened_labels, flattened_preds)
    }

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/12.3k [00:00<?, ?B/s]

conll2003.py:   0%|          | 0.00/9.57k [00:00<?, ?B/s]

The repository for conll2003 contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/conll2003.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Downloading data:   0%|          | 0.00/983k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/14041 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/3250 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3453 [00:00<?, ? examples/s]

Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Map:   0%|          | 0/3453 [00:00<?, ? examples/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkhanz23[0m ([33mkhanz23-cardiff-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.5549,0.648933,0.832558
2,0.5622,0.573831,0.832558
3,0.5108,0.556677,0.832558


Evaluation results: {'eval_loss': 0.5566769242286682, 'eval_accuracy': 0.8325575054048263, 'eval_runtime': 25.8683, 'eval_samples_per_second': 125.636, 'eval_steps_per_second': 15.734, 'epoch': 3.0}


**LoRa on BERT:**

In [3]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, BertForTokenClassification, TrainingArguments, Trainer
from peft import get_peft_model, LoraConfig, TaskType
from sklearn.metrics import accuracy_score

# Load tokenizer and dataset
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased", use_fast=True)
dataset = load_dataset("conll2003")

# Tokenize and align labels
def tokenize_and_align_labels(examples):
    tokenized_inputs = tokenizer(
        examples["tokens"],
        truncation=True,
        is_split_into_words=True,
        padding="max_length",
        max_length=128,
    )

    labels = []
    for i, label in enumerate(examples["ner_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []

        for word_idx in word_ids:
            if word_idx is None:
                label_ids.append(-100)
            elif word_idx != previous_word_idx:
                label_ids.append(label[word_idx])
            else:
                label_ids.append(-100)
            previous_word_idx = word_idx

        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    return tokenized_inputs

# Process dataset
tokenized_datasets = dataset.map(
    tokenize_and_align_labels,
    batched=True,
    remove_columns=dataset["train"].column_names
)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Load base model
base_model = BertForTokenClassification.from_pretrained("bert-base-uncased", num_labels=9)

# Apply LoRA
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query", "value"],
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.TOKEN_CLS
)

model = get_peft_model(base_model, lora_config)

# Metrics
def compute_metrics(p):
    predictions, labels = p
    predictions = torch.tensor(predictions).argmax(dim=-1)

    true_predictions = [
        [p for (p, l) in zip(pred, label) if l != -100]
        for pred, label in zip(predictions, labels)
    ]
    true_labels = [
        [l for l in label if l != -100]
        for label in labels
    ]

    flat_preds = [p for preds in true_predictions for p in preds]
    flat_labels = [l for labs in true_labels for l in labs]

    return {'accuracy': accuracy_score(flat_labels, flat_preds)}

# Training args
training_args = TrainingArguments(
    output_dir="./results_lora_conll",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=4e-5,
    load_best_model_at_end=True,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    compute_metrics=compute_metrics,
)

# Train and evaluate
trainer.train()
trainer.save_model("./final_model_lora_conll")
results = trainer.evaluate()
print("Evaluation results:", results)


Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModelForTokenClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkhanz23[0m ([33mkhanz23-cardiff-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1681,0.156779,0.959235
2,0.1047,0.10567,0.973414
3,0.0837,0.097583,0.974953


Evaluation results: {'eval_loss': 0.09758317470550537, 'eval_accuracy': 0.9749527686344779, 'eval_runtime': 25.7874, 'eval_samples_per_second': 126.031, 'eval_steps_per_second': 15.783, 'epoch': 3.0}


**Full-Finetuning of BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForTokenClassification, Trainer, TrainingArguments, DataCollatorForTokenClassification
from sklearn.metrics import accuracy_score

# Load BERT tokenizer and CoNLL-2003 dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("conll2003")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['tokens'], padding='max_length', truncation=True, max_length=128, is_split_into_words=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Rename 'ner_tags' to 'labels' to align with Hugging Face expectations
def preprocess_labels(examples):
    examples['labels'] = examples['ner_tags']  # Renaming ner_tags to labels
    return examples

tokenized_datasets = tokenized_datasets.map(preprocess_labels, batched=True)

# Set format for PyTorch
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'labels'])

# Initialize model
model = BertForTokenClassification.from_pretrained('bert-base-uncased', num_labels=9)

# Define evaluation metrics for token classification
def compute_metrics(p):
    predictions, labels = p
    predictions = torch.tensor(predictions).argmax(dim=-1)  # Convert logits to predictions (class with highest probability)

    # Remove ignored index (-100) and convert to 1D arrays for metric calculation
    true_predictions = [
        [p for (p, l) in zip(pred, label) if l != -100]
        for pred, label in zip(predictions, labels)
    ]
    true_labels = [
        [l for l in label if l != -100]
        for label in labels
    ]

    # Flatten the predictions and labels
    flattened_preds = [p for preds in true_predictions for p in preds]
    flattened_labels = [l for labs in true_labels for l in labs]

    return {
        'accuracy': accuracy_score(flattened_labels, flattened_preds)
    }

# Use DataCollatorForTokenClassification for dynamic padding
data_collator = DataCollatorForTokenClassification(tokenizer)

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    load_best_model_at_end=True
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics,
    data_collator=data_collator  # Adding the data collator for padding
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1877,0.194317,0.925178
2,0.1218,0.154405,0.940657
3,0.1043,0.139462,0.949165


Evaluation results: {'eval_loss': 0.13946153223514557, 'eval_accuracy': 0.949164752151396, 'eval_runtime': 25.9998, 'eval_samples_per_second': 125.001, 'eval_steps_per_second': 15.654, 'epoch': 3.0}


# Using BERT on SSt2 Dataset:

**Diff-Pruning on BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load BERT tokenizer and SST-2 dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "sst2")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Apply a simplified conceptual form of diff pruning (sparse fine-tuning using a minimal set of weights)
def apply_pruning(model, pruning_percentage=0.2):
    for name, param in model.named_parameters():
        if 'bias' not in name:  # Don't prune bias terms
            weight = param.data
            num_elements = weight.numel()
            num_pruned = int(pruning_percentage * num_elements)
            flattened = weight.view(-1)
            _, indices = torch.topk(flattened.abs(), num_pruned, largest=False)
            flattened[indices] = 0
            weight.copy_(flattened.view(weight.size()))

apply_pruning(model)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    save_strategy="epoch",    # Save after each epoch
    eval_strategy="epoch",    # Evaluate after each epoch
    load_best_model_at_end=True
)
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkhanz23[0m ([33mkhanz23-cardiff-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2373,0.459286,0.840596


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2373,0.459286,0.840596
2,0.3412,0.582515,0.838303
3,0.1169,0.620396,0.84633


Evaluation results: {'eval_loss': 0.45928576588630676, 'eval_accuracy': 0.8405963302752294, 'eval_runtime': 5.8894, 'eval_samples_per_second': 148.062, 'eval_steps_per_second': 18.508, 'epoch': 3.0}


**BitFit on BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load BERT tokenizer and SST-2 dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "sst2")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Function to freeze all layers except bias terms (BitFit)
def apply_bitfit(model):
    for name, param in model.named_parameters():
        if 'bias' not in name:  # Freeze all non-bias parameters
            param.requires_grad = False

apply_bitfit(model)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    save_strategy="epoch",    # Save after each epoch
    eval_strategy="epoch",    # Evaluate after each epoch
    load_best_model_at_end=True
)
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33m88rehaan88[0m ([33m88rehaan88-cardiff-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.398,0.35818,0.857798
2,0.2594,0.320404,0.881881
3,0.2984,0.316987,0.876147


Evaluation results: {'eval_loss': 0.316987007856369, 'eval_accuracy': 0.8761467889908257, 'eval_runtime': 5.8486, 'eval_samples_per_second': 149.095, 'eval_steps_per_second': 18.637, 'epoch': 3.0}


**LoRa on BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Function to apply LoRA (Low-Rank Adaptation)
def apply_lora(model, rank=8):
    # Add low-rank adapters to each transformer layer (example for Bert model)
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Linear):
            module.weight.requires_grad = False  # Freeze weights
            # Add a low-rank decomposition (A * B)
            adapter_a = torch.nn.Parameter(torch.randn(module.in_features, rank))
            adapter_b = torch.nn.Parameter(torch.randn(rank, module.out_features))
            module.register_parameter("adapter_a", adapter_a)
            module.register_parameter("adapter_b", adapter_b)

# Load BERT tokenizer and SST-2 dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "sst2")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Apply LoRA
apply_lora(model, rank=8)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    save_strategy="epoch",    # Save after each epoch
    eval_strategy="epoch",    # Evaluate after each epoch
    load_best_model_at_end=True
)
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.1648,0.358867,0.869266
2,0.2097,0.393124,0.858945
3,0.1219,0.387853,0.87156


Evaluation results: {'eval_loss': 0.35886749625205994, 'eval_accuracy': 0.8692660550458715, 'eval_runtime': 6.0446, 'eval_samples_per_second': 144.261, 'eval_steps_per_second': 18.033, 'epoch': 3.0}


**Full-Finetuning of BERT:**

In [None]:
import torch
from datasets import load_dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load BERT tokenizer and SST-2 dataset
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
dataset = load_dataset("glue", "sst2")

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['sentence'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Initialize model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Define evaluation metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert logits to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

# Training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    save_strategy="epoch",    # Save after each epoch
    eval_strategy="epoch",    # Evaluate after each epoch
    load_best_model_at_end=True
)
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)

# Train and evaluate the model
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33m88rehaan88[0m ([33m88rehaan88-cardiff-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.0787,0.450615,0.897936
2,0.2487,0.404286,0.90367
3,0.1157,0.446142,0.905963


Evaluation results: {'eval_loss': 0.40428608655929565, 'eval_accuracy': 0.9036697247706422, 'eval_runtime': 6.3993, 'eval_samples_per_second': 136.266, 'eval_steps_per_second': 17.033, 'epoch': 3.0}


# Using GPT-2 for MRPC Dataset:

**Diff-Pruning on GPT-2:**

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, GPT2ForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('gpt2', use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # GPT-2 needs a padding token set explicitly
model = GPT2ForSequenceClassification.from_pretrained('gpt2', num_labels=2, pad_token_id=tokenizer.eos_token_id)

# Tokenize the dataset (MRPC task)
dataset = load_dataset("glue", "mrpc")
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Apply Diff Pruning (simplified pruning by zeroing out some weights)
def apply_diff_pruning(model, pruning_percentage=0.2):
    for name, param in model.named_parameters():
        if 'bias' not in name:
            weight = param.data
            num_elements = weight.numel()
            num_pruned = int(pruning_percentage * num_elements)
            flattened = weight.view(-1)
            _, indices = torch.topk(flattened.abs(), num_pruned, largest=False)
            flattened[indices] = 0
            weight.copy_(flattened.view(weight.size()))

apply_diff_pruning(model)

# Define metrics
# Define metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert numpy array to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    save_strategy="epoch",    # Save after each epoch
    eval_strategy="epoch",    # Evaluate after each epoch
    load_best_model_at_end=True
)
# Initialize and train
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Epoch,Training Loss,Validation Loss,Accuracy
1,0.7378,0.594218,0.70098
2,0.6518,0.579735,0.708333
3,0.5244,0.588914,0.708333


Evaluation results: {'eval_loss': 0.5797353386878967, 'eval_accuracy': 0.7083333333333334, 'eval_runtime': 3.1568, 'eval_samples_per_second': 129.243, 'eval_steps_per_second': 16.155, 'epoch': 3.0}


**BitFit on GPT-2:**

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, GPT2ForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('gpt2', use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # GPT-2 needs a padding token set explicitly
model = GPT2ForSequenceClassification.from_pretrained('gpt2', num_labels=2, pad_token_id=tokenizer.eos_token_id)

# Tokenize the dataset (MRPC task)
dataset = load_dataset("glue", "mrpc")
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Apply BitFit (freeze all parameters except bias terms)
def apply_bitfit(model):
    for name, param in model.named_parameters():
        if 'bias' not in name:  # Freeze all non-bias parameters
            param.requires_grad = False

apply_bitfit(model)

# Define metrics
# Define metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert numpy array to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}


training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    save_strategy="epoch",    # Save after each epoch
    eval_strategy="epoch",    # Evaluate after each epoch
    load_best_model_at_end=True
)
# Initialize and train
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7714,0.644808,0.671569
2,0.6916,0.618649,0.678922
3,0.6429,0.611239,0.683824


Evaluation results: {'eval_loss': 0.6112387180328369, 'eval_accuracy': 0.6838235294117647, 'eval_runtime': 3.1395, 'eval_samples_per_second': 129.957, 'eval_steps_per_second': 16.245, 'epoch': 3.0}


**LoRa on GPT-2:**

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, GPT2ForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score
from peft import get_peft_model, LoraConfig, TaskType

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('gpt2', use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # GPT-2 needs a padding token set explicitly
model = GPT2ForSequenceClassification.from_pretrained('gpt2', num_labels=2, pad_token_id=tokenizer.eos_token_id)

# Tokenize the dataset (MRPC task)
dataset = load_dataset("glue", "mrpc")
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Define LoRA configuration
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,  # Sequence classification task
    inference_mode=False,
    r=8,                          # Rank of the update matrices
    lora_alpha=32,                # Alpha parameter for LoRA scaling
    lora_dropout=0.1,             # Dropout probability for LoRA layers
    target_modules=["attn.c_attn", "attn.c_proj"]  # Modules to apply LoRA to
)

# Apply LoRA using PEFT
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()  # This will show how many parameters are frozen vs trained

# Define metrics
# Define metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert numpy array to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    save_strategy="epoch",    # Save after each epoch
    eval_strategy="epoch",    # Evaluate after each epoch
    load_best_model_at_end=True
)
# Initialize and train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 443,904 || all params: 124,885,248 || trainable%: 0.3554


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6891,0.600844,0.691176
2,0.6486,0.574495,0.698529
3,0.5632,0.569803,0.693627


Evaluation results: {'eval_loss': 0.5698032379150391, 'eval_accuracy': 0.6936274509803921, 'eval_runtime': 3.3067, 'eval_samples_per_second': 123.387, 'eval_steps_per_second': 15.423, 'epoch': 3.0}


**Full-Finetuning on GPT-2:**

In [None]:
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, GPT2ForSequenceClassification, Trainer, TrainingArguments
from sklearn.metrics import accuracy_score

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained('gpt2', use_fast=True)
tokenizer.pad_token = tokenizer.eos_token  # GPT-2 needs a padding token set explicitly
model = GPT2ForSequenceClassification.from_pretrained('gpt2', num_labels=2, pad_token_id=tokenizer.eos_token_id)

# Tokenize the dataset (MRPC task)
dataset = load_dataset("glue", "mrpc")
def tokenize_function(examples):
    return tokenizer(examples['sentence1'], examples['sentence2'], padding='max_length', truncation=True, max_length=128)

tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

# Define metrics
def compute_metrics(p):
    logits, labels = p
    logits = torch.tensor(logits)  # Convert numpy array to tensor
    predictions = torch.argmax(logits, axis=-1)
    return {'accuracy': accuracy_score(labels, predictions)}

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    logging_dir='./logs',
    logging_steps=10,
    learning_rate=4e-5,
    save_strategy="epoch",    # Save after each epoch
    eval_strategy="epoch",    # Evaluate after each epoch
    load_best_model_at_end=True
)
# Initialize and train the model
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
    compute_metrics=compute_metrics
)
trainer.train()
trainer.save_model('./final_model')
results = trainer.evaluate()
print(f"Evaluation results: {results}")


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6794,0.515138,0.772059
2,0.4282,0.464433,0.786765
3,0.3483,0.616611,0.786765


Evaluation results: {'eval_loss': 0.464432954788208, 'eval_accuracy': 0.7867647058823529, 'eval_runtime': 3.117, 'eval_samples_per_second': 130.895, 'eval_steps_per_second': 16.362, 'epoch': 3.0}


# Using T5-Small on MRPC Dataset:





**Diff-Pruning on T5-Small:**

In [None]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments
)
from sklearn.metrics import accuracy_score, f1_score

# Load tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

# Load MRPC dataset
dataset = load_dataset("glue", "mrpc")

# Preprocessing function
def preprocess_function(examples):
    inputs = [
        f"paraphrase: {s1} </s> {s2}"
        for s1, s2 in zip(examples["sentence1"], examples["sentence2"])
    ]
    targets = [
        "equivalent" if label == 1 else "not_equivalent"
        for label in examples["label"]
    ]
    model_inputs = tokenizer(inputs, max_length=128, padding="max_length", truncation=True)
    labels = tokenizer(targets, max_length=8, padding="max_length", truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Tokenize and format dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)
tokenized_datasets.set_format("torch")

# Apply Diff Pruning (zero out smallest weights)
def apply_diff_pruning(model, pruning_percentage=0.2):
    for name, param in model.named_parameters():
        if param.requires_grad and 'bias' not in name:
            weight = param.data
            flat = weight.view(-1)
            num_pruned = int(pruning_percentage * flat.numel())
            _, idx = torch.topk(flat.abs(), num_pruned, largest=False)
            flat[idx] = 0
            param.data = flat.view_as(weight)

apply_diff_pruning(model)

# Training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir='./results_t5_small_mrpc',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=4e-4,
    logging_steps=50,
    logging_dir='./logs',
    predict_with_generate=True,
    generation_max_length=8,
    load_best_model_at_end=True,
)

# Compute metrics function
def compute_metrics(eval_preds):
    preds, labels = eval_preds

    # Decode predictions and labels
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    # Convert to binary labels
    binary_preds = [1 if pred.strip() == "equivalent" else 0 for pred in decoded_preds]
    binary_labels = [1 if label.strip() == "equivalent" else 0 for label in decoded_labels]

    accuracy = accuracy_score(binary_labels, binary_preds)
    f1 = f1_score(binary_labels, binary_preds)

    return {"accuracy": accuracy, "f1": f1}

# Trainer setup
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Train and evaluate
trainer.train()
trainer.save_model('./t5_small_mrpc_diffpruned')
results = trainer.evaluate()
print("Eval results:", results)


  trainer = Seq2SeqTrainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.073,0.086154,0.696078,0.818182
2,0.0631,0.049894,0.794118,0.853659
3,0.0321,0.053108,0.821078,0.871252


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


Eval results: {'eval_loss': 0.04989397153258324, 'eval_accuracy': 0.7941176470588235, 'eval_f1': 0.8536585365853658, 'eval_runtime': 5.4509, 'eval_samples_per_second': 74.85, 'eval_steps_per_second': 9.356, 'epoch': 3.0}


**BitFit on T5-Small:**

In [None]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments
)
from sklearn.metrics import accuracy_score, f1_score

# Load tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("t5-small")
model = T5ForConditionalGeneration.from_pretrained("t5-small")

#  Apply BitFit: Freeze all weights except biases
for name, param in model.named_parameters():
    if 'bias' in name:
        param.requires_grad = True
    else:
        param.requires_grad = False

# Load MRPC dataset
dataset = load_dataset("glue", "mrpc")

# Preprocessing function
def preprocess_function(examples):
    inputs = [
        f"paraphrase: {s1} </s> {s2}"
        for s1, s2 in zip(examples["sentence1"], examples["sentence2"])
    ]
    targets = [
        "equivalent" if label == 1 else "not_equivalent"
        for label in examples["label"]
    ]
    model_inputs = tokenizer(inputs, max_length=128, padding="max_length", truncation=True)
    labels = tokenizer(targets, max_length=8, padding="max_length", truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Tokenize and format dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)
tokenized_datasets.set_format("torch")

# Training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir='./results_t5_small_mrpc_bitfit',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=4e-4,
    logging_steps=50,
    logging_dir='./logs',
    predict_with_generate=True,
    generation_max_length=8,
    load_best_model_at_end=True,
)

# Compute metrics function
def compute_metrics(eval_preds):
    preds, labels = eval_preds
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    binary_preds = [1 if pred.strip() == "equivalent" else 0 for pred in decoded_preds]
    binary_labels = [1 if label.strip() == "equivalent" else 0 for label in decoded_labels]

    accuracy = accuracy_score(binary_labels, binary_preds)
    f1 = f1_score(binary_labels, binary_preds)

    return {"accuracy": accuracy, "f1": f1}

# Trainer setup
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Train and evaluate
trainer.train()
trainer.save_model('./t5_small_mrpc_bitfit')
results = trainer.evaluate()
print("Eval results:", results)


  trainer = Seq2SeqTrainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,12.7127,12.732745,0.316176,0.0
2,12.4854,12.652284,0.316176,0.0
3,12.6814,12.618643,0.316176,0.0


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


Eval results: {'eval_loss': 12.618642807006836, 'eval_accuracy': 0.3161764705882353, 'eval_f1': 0.0, 'eval_runtime': 6.3197, 'eval_samples_per_second': 64.56, 'eval_steps_per_second': 8.07, 'epoch': 3.0}


**LoRa on T5-Small:**

In [None]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments,
)
from sklearn.metrics import accuracy_score, f1_score
from peft import get_peft_model, LoraConfig, TaskType

# Load tokenizer and base model
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

#  Apply LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q", "v"],  # T5 uses 'q' and 'v' in attention modules
    lora_dropout=0.1,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM
)

# Convert model to a PEFT (LoRA-applied) model
model = get_peft_model(model, lora_config)

# Load MRPC dataset
dataset = load_dataset("glue", "mrpc")

# Preprocessing function
def preprocess_function(examples):
    inputs = [
        f"paraphrase: {s1} </s> {s2}"
        for s1, s2 in zip(examples["sentence1"], examples["sentence2"])
    ]
    targets = [
        "equivalent" if label == 1 else "not_equivalent"
        for label in examples["label"]
    ]
    model_inputs = tokenizer(inputs, max_length=128, padding="max_length", truncation=True)
    labels = tokenizer(targets, max_length=8, padding="max_length", truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Tokenize and format dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)
tokenized_datasets.set_format("torch")

# Training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir='./results_t5_small_mrpc_lora',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=4e-4,
    logging_steps=50,
    logging_dir='./logs',
    predict_with_generate=True,
    generation_max_length=8,
    load_best_model_at_end=True,
)

# Compute metrics function
def compute_metrics(eval_preds):
    preds, labels = eval_preds
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    binary_preds = [1 if pred.strip() == "equivalent" else 0 for pred in decoded_preds]
    binary_labels = [1 if label.strip() == "equivalent" else 0 for label in decoded_labels]

    accuracy = accuracy_score(binary_labels, binary_preds)
    f1 = f1_score(binary_labels, binary_preds)

    return {"accuracy": accuracy, "f1": f1}

# Trainer setup
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Train and evaluate
trainer.train()
trainer.save_model('./t5_small_mrpc_lora')
results = trainer.evaluate()
print("Eval results (LoRA):", results)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/649k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/75.7k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/308k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

  trainer = Seq2SeqTrainer(
No label_names provided for model class `PeftModelForSeq2SeqLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkhanz23[0m ([33mkhanz23-cardiff-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.075,0.053421,0.794118,0.857143
2,0.0653,0.046233,0.833333,0.878571
3,0.0542,0.045961,0.833333,0.879433


Eval results (LoRA): {'eval_loss': 0.045960698276758194, 'eval_accuracy': 0.8333333333333334, 'eval_f1': 0.8794326241134752, 'eval_runtime': 7.6639, 'eval_samples_per_second': 53.237, 'eval_steps_per_second': 6.655, 'epoch': 3.0}


**Full-Finetuning on T5-Small:**

In [None]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    T5Tokenizer,
    T5ForConditionalGeneration,
    Seq2SeqTrainer,
    Seq2SeqTrainingArguments
)
from sklearn.metrics import accuracy_score, f1_score

# Load tokenizer and model
model_name = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Load MRPC dataset
dataset = load_dataset("glue", "mrpc")

# Preprocessing function
def preprocess_function(examples):
    inputs = [
        f"paraphrase: {s1} </s> {s2}"
        for s1, s2 in zip(examples["sentence1"], examples["sentence2"])
    ]
    targets = [
        "equivalent" if label == 1 else "not_equivalent"
        for label in examples["label"]
    ]
    model_inputs = tokenizer(inputs, max_length=128, padding="max_length", truncation=True)
    labels = tokenizer(targets, max_length=8, padding="max_length", truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Tokenize and format dataset
tokenized_datasets = dataset.map(preprocess_function, batched=True, remove_columns=dataset["train"].column_names)
tokenized_datasets.set_format("torch")

# Training arguments
training_args = Seq2SeqTrainingArguments(
    output_dir='./results_t5_small_mrpc_full',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=4e-4,
    logging_steps=50,
    logging_dir='./logs',
    predict_with_generate=True,
    generation_max_length=8,
    load_best_model_at_end=True,
)

# Compute metrics function
def compute_metrics(eval_preds):
    preds, labels = eval_preds
    decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    binary_preds = [1 if pred.strip() == "equivalent" else 0 for pred in decoded_preds]
    binary_labels = [1 if label.strip() == "equivalent" else 0 for label in decoded_labels]

    accuracy = accuracy_score(binary_labels, binary_preds)
    f1 = f1_score(binary_labels, binary_preds)

    return {"accuracy": accuracy, "f1": f1}

# Trainer setup
trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# Train and evaluate
trainer.train()
trainer.save_model('./t5_small_mrpc_full_finetuned')
results = trainer.evaluate()
print("Eval results (Full Fine-Tuning):", results)


  trainer = Seq2SeqTrainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.0639,0.083625,0.745098,0.8429
2,0.043,0.034859,0.875,0.908438
3,0.0205,0.05114,0.852941,0.89547


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight', 'lm_head.weight'].


Eval results (Full Fine-Tuning): {'eval_loss': 0.03485921397805214, 'eval_accuracy': 0.875, 'eval_f1': 0.9084380610412927, 'eval_runtime': 5.7376, 'eval_samples_per_second': 71.11, 'eval_steps_per_second': 8.889, 'epoch': 3.0}
