# RoBERTa Testing Notebook

In [12]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [13]:
import os
import sys
sys.path.append(os.path.abspath('..'))

from constants import (
    TARGET_SPARSITY_LOW, TARGET_SPARSITY_MID, TARGET_SPARSITY_HIGH,
    BATCH_SIZE_CNN, BATCH_SIZE_VIT, BATCH_SIZE_LLM,
    EPOCHS_SMALL_MODEL, EPOCHS_LARGE_MODEL, EPOCHS_VIT
)
from utils import get_device, get_num_workers, load_weights, print_statistics
from unstructured_pruning import check_model_sparsity, check_sparsity_distribution
from trainer import TrainingArguments, Trainer
from bacp import BaCPTrainingArguments, BaCPTrainer

from datasets.utils.logging import disable_progress_bar
disable_progress_bar()
os.environ["HF_DATASETS_CACHE"] = "./cache"
os.environ["TOKENIZERS_PARALLELISM"] = "false" 


In [14]:
DEVICE = get_device()
NUM_WORKERS = get_num_workers()
print("Using device:", DEVICE)
print("Using", NUM_WORKERS, "workers")

Using device: cuda
Using 288 workers


In [24]:
MODEL_NAME = "roberta-base"
MODEL_TASK = "sst2"
TRAIN = True

## Baseline Accuracies

In [16]:
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 2e-5),
    scheduler_type='linear_with_warmup',
    epochs=5,
    learning_type="baseline",
    db=False
)
trainer = Trainer(training_args=training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[TRAINER] Image size: None
[TRAINER] Initialized models
[TRAINER] Optimizer type w/ learning rate: (adamw, 2e-05)
[DATALOADERS] ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: sst2
[TRAINER] Batch size: 64
[TRAINER] Number of dataloders: 2
[TRAINER] Linear scheduler initialized with warmup steps: 526 and total steps: 5260
[TRAINER] Pruning not initialized
[TRAINER] Saving model to: ./research/roberta-base/sst2/roberta-base_sst2_baseline.pt
[TRAINER] Loading weights: ./research/roberta-base/sst2/roberta-base_sst2_baseline.pt
[TRAINER] Weights loaded successfully
[TRAINER] Model Sparsity: 0.0


  return forward_call(*args, **kwargs)
                                                           


TRAINING STATISTICS SUMMARY

Performance Metrics:
------------------------------
  Accuracy:     94.83%

Model Information:
------------------------------
  Total Parameters:     124,647,170
  Trainable Parameters: 124,647,170
  Model Sparsity:       0.0000 (0.00%)

Training Configuration:
------------------------------
  Model:                roberta-base
  Task:                 sst2
  Learning Type:        baseline
  Batch Size:           64
  Learning Rate:        2e-05
  Optimizer:            adamw
  Epochs:               5

System Information:
------------------------------
  Device:               cuda
  Mixed Precision:      True
  Workers:              24





## Pruning Accuracies

### Magnitude Prune

In [17]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 5e-5),
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[TRAINER] Image size: None
[TRAINER] Initialized models
[TRAINER] Loading weights: ./research/roberta-base/sst2/roberta-base_sst2_baseline.pt
[TRAINER] Weights loaded
[TRAINER] Optimizer type w/ learning rate: (adamw, 5e-05)
[DATALOADERS] ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: sst2
[TRAINER] Batch size: 64
[TRAINER] Number of dataloders: 2
[TRAINER] No scheduler initialized
[TRAINER] Pruning initialized
[TRAINER] Pruning type: magnitude_pruning
[TRAINER] Target sparsity: 0.95
[TRAINER] Sparsity scheduler: cubic
[TRAINER] Pruning epochs: 5
[TRAINER] Current sparsity: 0.0000
[TRAINER] Saving model to: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.95_pruning.pt
[TRAINER] Loading weights: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.95_pruning.pt
[ERROR] Could not load weights. Path does not exist: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.95_pruning.pt


Exception: Error loading weights: {path}

In [None]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 5e-5),
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if False:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [None]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 5e-5),
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if False:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

### SNIP-it Prune

In [None]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 5e-5),
    pruning_type="snip_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if False:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [None]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 5e-5),
    pruning_type="snip_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if False:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [None]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 5e-5),
    pruning_type="snip_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if False:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

### WandA Prune

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 5e-5),
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 5e-5),
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [None]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 5e-5),
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

## BaCP Accuracies

### Magnitude Pruning

In [18]:
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 1e-5),
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 2e-5),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    epochs=10,
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[TRAINER] Image size: None
[TRAINER] Weights loaded successfully
[TRAINER] Initialized BaCP models
[TRAINER] Optimizer type w/ learning rate: (adamw, 1e-05)
[TRAINER] No scheduler initialized
[DATALOADERS] ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: sst2
[TRAINER] Batch size: 64
[TRAINER] Number of dataloders: 2
[TRAINER] Pruning initialized
[TRAINER] Pruning type: magnitude_pruning
[TRAINER] Target sparsity: 0.95
[TRAINER] Sparsity scheduler: cubic
[TRAINER] Pruning epochs: 5
[TRAINER] Current sparsity: 0.0000
[TRAINER] Saving model to: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.95_bacp_pruning.pt
[BaCP TRAINER] Mask generated from current model.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[TRAINER] Image size: None
[TRAINER] Initialized models
[TRAINER] Loading weights: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.95_bacp_pruning.pt
[TRAINER] Weights loaded
[TRAINER] Optimizer type w/ learning rate: (adamw, 2e-05)
[DATALOADERS] ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: sst2
[TRAINER] Batch size: 64
[TRAINER] Number of dataloders: 2
[TRAINER] No scheduler initialized
[TRAINER] Finetuning initialized
[TRAINER] Pruning type: magnitude_pruning
[TRAINER] Current sparsity: 0.9500
[TRAINER] Saving model to: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.95_bacp_finetune.pt
[TRAINER] Loading weights: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.95_bacp_finetune.pt
[TRAINER] Weights loaded successfully
[TRAINER] Model Sparsity: 0.95


  return forward_call(*args, **kwargs)
                                                           


TRAINING STATISTICS SUMMARY

Performance Metrics:
------------------------------
  Accuracy:     83.41%

Model Information:
------------------------------
  Total Parameters:     124,647,170
  Trainable Parameters: 124,647,170
  Model Sparsity:       0.9500 (95.00%)

Training Configuration:
------------------------------
  Model:                roberta-base
  Task:                 sst2
  Learning Type:        bacp_finetune
  Batch Size:           64
  Learning Rate:        2e-05
  Optimizer:            adamw
  Epochs:               10

System Information:
------------------------------
  Device:               cuda
  Mixed Precision:      True
  Workers:              24





In [19]:
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 1e-5),
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 2e-5),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    epochs=10,
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[TRAINER] Image size: None
[TRAINER] Weights loaded successfully
[TRAINER] Initialized BaCP models
[TRAINER] Optimizer type w/ learning rate: (adamw, 1e-05)
[TRAINER] No scheduler initialized
[DATALOADERS] ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: sst2
[TRAINER] Batch size: 64
[TRAINER] Number of dataloders: 2
[TRAINER] Pruning initialized
[TRAINER] Pruning type: magnitude_pruning
[TRAINER] Target sparsity: 0.97
[TRAINER] Sparsity scheduler: cubic
[TRAINER] Pruning epochs: 5
[TRAINER] Current sparsity: 0.0000
[TRAINER] Saving model to: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.97_bacp_pruning.pt
[BaCP TRAINER] Mask generated from current model.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[TRAINER] Image size: None
[TRAINER] Initialized models
[TRAINER] Loading weights: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.97_bacp_pruning.pt
[TRAINER] Weights loaded
[TRAINER] Optimizer type w/ learning rate: (adamw, 2e-05)
[DATALOADERS] ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: sst2
[TRAINER] Batch size: 64
[TRAINER] Number of dataloders: 2
[TRAINER] No scheduler initialized
[TRAINER] Finetuning initialized
[TRAINER] Pruning type: magnitude_pruning
[TRAINER] Current sparsity: 0.9700
[TRAINER] Saving model to: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.97_bacp_finetune.pt
[TRAINER] Loading weights: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.97_bacp_finetune.pt
[TRAINER] Weights loaded successfully
[TRAINER] Model Sparsity: 0.97


                                                           


TRAINING STATISTICS SUMMARY

Performance Metrics:
------------------------------
  Accuracy:     83.17%

Model Information:
------------------------------
  Total Parameters:     124,647,170
  Trainable Parameters: 124,647,170
  Model Sparsity:       0.9700 (97.00%)

Training Configuration:
------------------------------
  Model:                roberta-base
  Task:                 sst2
  Learning Type:        bacp_finetune
  Batch Size:           64
  Learning Rate:        2e-05
  Optimizer:            adamw
  Epochs:               10

System Information:
------------------------------
  Device:               cuda
  Mixed Precision:      True
  Workers:              24





In [None]:
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 1e-5),
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 2e-5),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    epochs=10,
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[TRAINER] Image size: None
[TRAINER] Weights loaded successfully
[TRAINER] Initialized BaCP models
[TRAINER] Optimizer type w/ learning rate: (adamw, 1e-05)
[TRAINER] No scheduler initialized
[DATALOADERS] ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: sst2
[TRAINER] Batch size: 64
[TRAINER] Number of dataloders: 2
[TRAINER] Pruning initialized
[TRAINER] Pruning type: magnitude_pruning
[TRAINER] Target sparsity: 0.99
[TRAINER] Sparsity scheduler: cubic
[TRAINER] Pruning epochs: 5
[TRAINER] Current sparsity: 0.0000
[TRAINER] Saving model to: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.99_bacp_pruning.pt
[LOGGER] Log file created at location: ./log_records/roberta-base/sst2/bacp_pruning/magnitude_pruning/0.99/run_2.log


  return forward_call(*args, **kwargs)



[Pruner] Cubic Sparsity ratio increased to 0.483.



                                                                                                                                                

Epoch [1/5]: Avg Total Loss: 5.1706 | Avg PrC Loss: 2.5881 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.4826 | Avg CE Loss: 0.0998 | Model Sparsity: 0.4832

[BaCP] weights saved!


                                                                                                                                                   

Retraining Epoch [1/10]: Avg Total Loss: 4.9427 | Avg PrC Loss: 2.6034 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.2736 | Avg CE Loss: 0.0657 | Model Sparsity: 0.4832



                                                                                                                                                   

Retraining Epoch [2/10]: Avg Total Loss: 4.8798 | Avg PrC Loss: 2.6084 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.2154 | Avg CE Loss: 0.0560 | Model Sparsity: 0.4832



                                                                                                                                                   

Retraining Epoch [3/10]: Avg Total Loss: 4.8414 | Avg PrC Loss: 2.6110 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.1810 | Avg CE Loss: 0.0495 | Model Sparsity: 0.4832



                                                                                                                                                   

Retraining Epoch [4/10]: Avg Total Loss: 4.8160 | Avg PrC Loss: 2.6125 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.1581 | Avg CE Loss: 0.0454 | Model Sparsity: 0.4832



                                                                                                                                                   

Retraining Epoch [5/10]: Avg Total Loss: 4.7984 | Avg PrC Loss: 2.6136 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.1425 | Avg CE Loss: 0.0422 | Model Sparsity: 0.4832



                                                                                                                                                   

Retraining Epoch [6/10]: Avg Total Loss: 4.7825 | Avg PrC Loss: 2.6143 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.1290 | Avg CE Loss: 0.0393 | Model Sparsity: 0.4832



                                                                                                                                                   

Retraining Epoch [7/10]: Avg Total Loss: 4.7705 | Avg PrC Loss: 2.6149 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.1187 | Avg CE Loss: 0.0368 | Model Sparsity: 0.4832



                                                                                                                                                   

Retraining Epoch [8/10]: Avg Total Loss: 4.7615 | Avg PrC Loss: 2.6154 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.1110 | Avg CE Loss: 0.0351 | Model Sparsity: 0.4832



                                                                                                                                                   

Retraining Epoch [9/10]: Avg Total Loss: 4.7531 | Avg PrC Loss: 2.6157 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.1041 | Avg CE Loss: 0.0332 | Model Sparsity: 0.4832



                                                                                                                                                    

Retraining Epoch [10/10]: Avg Total Loss: 4.7457 | Avg PrC Loss: 2.6158 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.0980 | Avg CE Loss: 0.0318 | Model Sparsity: 0.4832



Training Epoch [2/5]:   0%|          | 0/1052 [00:00<?, ?it/s]


[Pruner] Cubic Sparsity ratio increased to 0.776.



                                                                                                                                                   

Epoch [2/5]: Avg Total Loss: 7.3457 | Avg PrC Loss: 2.6043 | Avg SnC Loss: 2.1973 | Avg FiC Loss: 2.4510 | Avg CE Loss: 0.0932 | Model Sparsity: 0.7762

[BaCP] weights saved!


                                                                                                                                                      

Retraining Epoch [1/10]: Avg Total Loss: 7.0924 | Avg PrC Loss: 2.6218 | Avg SnC Loss: 2.0879 | Avg FiC Loss: 2.3121 | Avg CE Loss: 0.0705 | Model Sparsity: 0.7762



                                                                                                                                                      

Retraining Epoch [2/10]: Avg Total Loss: 7.0119 | Avg PrC Loss: 2.6267 | Avg SnC Loss: 2.0496 | Avg FiC Loss: 2.2731 | Avg CE Loss: 0.0625 | Model Sparsity: 0.7762



                                                                                                                                                      

Retraining Epoch [3/10]: Avg Total Loss: 6.9561 | Avg PrC Loss: 2.6304 | Avg SnC Loss: 2.0227 | Avg FiC Loss: 2.2463 | Avg CE Loss: 0.0568 | Model Sparsity: 0.7762



                                                                                                                                                      

Retraining Epoch [4/10]: Avg Total Loss: 6.9158 | Avg PrC Loss: 2.6328 | Avg SnC Loss: 2.0024 | Avg FiC Loss: 2.2273 | Avg CE Loss: 0.0532 | Model Sparsity: 0.7762



                                                                                                                                                      

Retraining Epoch [5/10]: Avg Total Loss: 6.8841 | Avg PrC Loss: 2.6352 | Avg SnC Loss: 1.9866 | Avg FiC Loss: 2.2124 | Avg CE Loss: 0.0499 | Model Sparsity: 0.7762



                                                                                                                                                      

Retraining Epoch [6/10]: Avg Total Loss: 6.8578 | Avg PrC Loss: 2.6370 | Avg SnC Loss: 1.9730 | Avg FiC Loss: 2.2003 | Avg CE Loss: 0.0474 | Model Sparsity: 0.7762



                                                                                                                                                      

Retraining Epoch [7/10]: Avg Total Loss: 6.8325 | Avg PrC Loss: 2.6384 | Avg SnC Loss: 1.9605 | Avg FiC Loss: 2.1884 | Avg CE Loss: 0.0452 | Model Sparsity: 0.7762



                                                                                                                                                      

Retraining Epoch [8/10]: Avg Total Loss: 6.8126 | Avg PrC Loss: 2.6396 | Avg SnC Loss: 1.9502 | Avg FiC Loss: 2.1797 | Avg CE Loss: 0.0431 | Model Sparsity: 0.7762



                                                                                                                                                      

Retraining Epoch [9/10]: Avg Total Loss: 6.7953 | Avg PrC Loss: 2.6408 | Avg SnC Loss: 1.9412 | Avg FiC Loss: 2.1717 | Avg CE Loss: 0.0416 | Model Sparsity: 0.7762



                                                                                                                                                       

Retraining Epoch [10/10]: Avg Total Loss: 6.7803 | Avg PrC Loss: 2.6417 | Avg SnC Loss: 1.9334 | Avg FiC Loss: 2.1649 | Avg CE Loss: 0.0402 | Model Sparsity: 0.7762



Training Epoch [3/5]:   0%|          | 0/1052 [00:00<?, ?it/s]


[Pruner] Cubic Sparsity ratio increased to 0.927.



                                                                                                                                                 

Epoch [3/5]: Avg Total Loss: 10.8468 | Avg PrC Loss: 2.5084 | Avg SnC Loss: 5.1309 | Avg FiC Loss: 3.0322 | Avg CE Loss: 0.1753 | Model Sparsity: 0.9266

[BaCP] weights saved!


                                                                                                                                                    

Retraining Epoch [1/10]: Avg Total Loss: 10.3944 | Avg PrC Loss: 2.5856 | Avg SnC Loss: 4.8543 | Avg FiC Loss: 2.7998 | Avg CE Loss: 0.1547 | Model Sparsity: 0.9266



                                                                                                                                                      

Retraining Epoch [2/10]: Avg Total Loss: 10.0861 | Avg PrC Loss: 2.6090 | Avg SnC Loss: 4.6751 | Avg FiC Loss: 2.6738 | Avg CE Loss: 0.1282 | Model Sparsity: 0.9266



                                                                                                                                                      

Retraining Epoch [3/10]: Avg Total Loss: 9.7695 | Avg PrC Loss: 2.6208 | Avg SnC Loss: 4.5053 | Avg FiC Loss: 2.5415 | Avg CE Loss: 0.1018 | Model Sparsity: 0.9266



                                                                                                                                                      

Retraining Epoch [4/10]: Avg Total Loss: 9.5376 | Avg PrC Loss: 2.6261 | Avg SnC Loss: 4.3881 | Avg FiC Loss: 2.4394 | Avg CE Loss: 0.0840 | Model Sparsity: 0.9266



                                                                                                                                                      

Retraining Epoch [5/10]: Avg Total Loss: 9.4257 | Avg PrC Loss: 2.6263 | Avg SnC Loss: 4.3280 | Avg FiC Loss: 2.3949 | Avg CE Loss: 0.0765 | Model Sparsity: 0.9266



                                                                                                                                                      

Retraining Epoch [6/10]: Avg Total Loss: 9.3555 | Avg PrC Loss: 2.6279 | Avg SnC Loss: 4.2873 | Avg FiC Loss: 2.3683 | Avg CE Loss: 0.0720 | Model Sparsity: 0.9266



                                                                                                                                                      

Retraining Epoch [7/10]: Avg Total Loss: 9.3049 | Avg PrC Loss: 2.6302 | Avg SnC Loss: 4.2560 | Avg FiC Loss: 2.3501 | Avg CE Loss: 0.0686 | Model Sparsity: 0.9266



                                                                                                                                                      

Retraining Epoch [8/10]: Avg Total Loss: 9.2587 | Avg PrC Loss: 2.6324 | Avg SnC Loss: 4.2272 | Avg FiC Loss: 2.3337 | Avg CE Loss: 0.0653 | Model Sparsity: 0.9266



                                                                                                                                                      

Retraining Epoch [9/10]: Avg Total Loss: 9.2206 | Avg PrC Loss: 2.6345 | Avg SnC Loss: 4.2023 | Avg FiC Loss: 2.3208 | Avg CE Loss: 0.0630 | Model Sparsity: 0.9266



                                                                                                                                                       

Retraining Epoch [10/10]: Avg Total Loss: 9.1884 | Avg PrC Loss: 2.6366 | Avg SnC Loss: 4.1801 | Avg FiC Loss: 2.3108 | Avg CE Loss: 0.0610 | Model Sparsity: 0.9266



Training Epoch [4/5]:   0%|          | 0/1052 [00:00<?, ?it/s]


[Pruner] Cubic Sparsity ratio increased to 0.982.



                                                                                                                                                 

Epoch [4/5]: Avg Total Loss: 13.2896 | Avg PrC Loss: 2.4870 | Avg SnC Loss: 7.6135 | Avg FiC Loss: 3.0260 | Avg CE Loss: 0.1631 | Model Sparsity: 0.9821

[BaCP] weights saved!


                                                                                                                                                    

Retraining Epoch [1/10]: Avg Total Loss: 12.6000 | Avg PrC Loss: 2.5630 | Avg SnC Loss: 7.1451 | Avg FiC Loss: 2.7552 | Avg CE Loss: 0.1366 | Model Sparsity: 0.9821



                                                                                                                                                      

Retraining Epoch [2/10]: Avg Total Loss: 12.2479 | Avg PrC Loss: 2.6013 | Avg SnC Loss: 6.8976 | Avg FiC Loss: 2.6297 | Avg CE Loss: 0.1192 | Model Sparsity: 0.9821



                                                                                                                                                      

Retraining Epoch [3/10]: Avg Total Loss: 12.0731 | Avg PrC Loss: 2.6151 | Avg SnC Loss: 6.7783 | Avg FiC Loss: 2.5720 | Avg CE Loss: 0.1077 | Model Sparsity: 0.9821



                                                                                                                                                      

Retraining Epoch [4/10]: Avg Total Loss: 11.9463 | Avg PrC Loss: 2.6229 | Avg SnC Loss: 6.6944 | Avg FiC Loss: 2.5302 | Avg CE Loss: 0.0988 | Model Sparsity: 0.9821



                                                                                                                                                      

Retraining Epoch [5/10]: Avg Total Loss: 11.8531 | Avg PrC Loss: 2.6277 | Avg SnC Loss: 6.6332 | Avg FiC Loss: 2.4995 | Avg CE Loss: 0.0927 | Model Sparsity: 0.9821



                                                                                                                                                      

Retraining Epoch [6/10]: Avg Total Loss: 11.7772 | Avg PrC Loss: 2.6316 | Avg SnC Loss: 6.5835 | Avg FiC Loss: 2.4742 | Avg CE Loss: 0.0878 | Model Sparsity: 0.9821



                                                                                                                                                      

Retraining Epoch [7/10]: Avg Total Loss: 11.7132 | Avg PrC Loss: 2.6344 | Avg SnC Loss: 6.5418 | Avg FiC Loss: 2.4531 | Avg CE Loss: 0.0840 | Model Sparsity: 0.9821



                                                                                                                                                      

Retraining Epoch [8/10]: Avg Total Loss: 11.6575 | Avg PrC Loss: 2.6368 | Avg SnC Loss: 6.5055 | Avg FiC Loss: 2.4346 | Avg CE Loss: 0.0806 | Model Sparsity: 0.9821



                                                                                                                                                      

Retraining Epoch [9/10]: Avg Total Loss: 11.6089 | Avg PrC Loss: 2.6392 | Avg SnC Loss: 6.4737 | Avg FiC Loss: 2.4183 | Avg CE Loss: 0.0777 | Model Sparsity: 0.9821



                                                                                                                                                       

Retraining Epoch [10/10]: Avg Total Loss: 11.5621 | Avg PrC Loss: 2.6411 | Avg SnC Loss: 6.4430 | Avg FiC Loss: 2.4027 | Avg CE Loss: 0.0752 | Model Sparsity: 0.9821



Training Epoch [5/5]:   0%|          | 0/1052 [00:00<?, ?it/s]


[Pruner] Cubic Sparsity ratio increased to 0.990.



                                                                                                                                                   

Epoch [5/5]: Avg Total Loss: 13.6351 | Avg PrC Loss: 2.6590 | Avg SnC Loss: 8.4997 | Avg FiC Loss: 2.4032 | Avg CE Loss: 0.0732 | Model Sparsity: 0.99

[BaCP] weights saved!


                                                                                                                                                      

Retraining Epoch [1/10]: Avg Total Loss: 13.5228 | Avg PrC Loss: 2.6550 | Avg SnC Loss: 8.4170 | Avg FiC Loss: 2.3801 | Avg CE Loss: 0.0706 | Model Sparsity: 0.99



                                                                                                                                                      

Retraining Epoch [2/10]: Avg Total Loss: 13.4768 | Avg PrC Loss: 2.6561 | Avg SnC Loss: 8.3814 | Avg FiC Loss: 2.3703 | Avg CE Loss: 0.0690 | Model Sparsity: 0.99



                                                                                                                                                      

Retraining Epoch [3/10]: Avg Total Loss: 13.4451 | Avg PrC Loss: 2.6575 | Avg SnC Loss: 8.3573 | Avg FiC Loss: 2.3625 | Avg CE Loss: 0.0678 | Model Sparsity: 0.99



                                                                                                                                                      

Retraining Epoch [4/10]: Avg Total Loss: 13.4177 | Avg PrC Loss: 2.6590 | Avg SnC Loss: 8.3366 | Avg FiC Loss: 2.3553 | Avg CE Loss: 0.0667 | Model Sparsity: 0.99



                                                                                                                                                      

Retraining Epoch [5/10]: Avg Total Loss: 13.3942 | Avg PrC Loss: 2.6596 | Avg SnC Loss: 8.3190 | Avg FiC Loss: 2.3497 | Avg CE Loss: 0.0659 | Model Sparsity: 0.99



                                                                                                                                                      

Retraining Epoch [6/10]: Avg Total Loss: 13.3716 | Avg PrC Loss: 2.6601 | Avg SnC Loss: 8.3030 | Avg FiC Loss: 2.3438 | Avg CE Loss: 0.0648 | Model Sparsity: 0.99



                                                                                                                                                      

Retraining Epoch [7/10]: Avg Total Loss: 13.3495 | Avg PrC Loss: 2.6610 | Avg SnC Loss: 8.2868 | Avg FiC Loss: 2.3381 | Avg CE Loss: 0.0635 | Model Sparsity: 0.99



                                                                                                                                                      

Retraining Epoch [8/10]: Avg Total Loss: 13.3336 | Avg PrC Loss: 2.6614 | Avg SnC Loss: 8.2755 | Avg FiC Loss: 2.3340 | Avg CE Loss: 0.0627 | Model Sparsity: 0.99



                                                                                                                                                      

Retraining Epoch [9/10]: Avg Total Loss: 13.3168 | Avg PrC Loss: 2.6619 | Avg SnC Loss: 8.2638 | Avg FiC Loss: 2.3294 | Avg CE Loss: 0.0618 | Model Sparsity: 0.99



                                                                                                                                                       

Retraining Epoch [10/10]: Avg Total Loss: 13.3009 | Avg PrC Loss: 2.6628 | Avg SnC Loss: 8.2518 | Avg FiC Loss: 2.3252 | Avg CE Loss: 0.0611 | Model Sparsity: 0.99

[BaCP TRAINER] Mask generated from current model.


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[TRAINER] Image size: None
[TRAINER] Initialized models
[TRAINER] Loading weights: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.99_bacp_pruning.pt
[TRAINER] Weights loaded
[TRAINER] Optimizer type w/ learning rate: (adamw, 2e-05)
[DATALOADERS] ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: sst2
[TRAINER] Batch size: 64
[TRAINER] Number of dataloders: 2
[TRAINER] No scheduler initialized
[TRAINER] Finetuning initialized
[TRAINER] Pruning type: magnitude_pruning
[TRAINER] Current sparsity: 0.9900
[TRAINER] Saving model to: ./research/roberta-base/sst2/roberta-base_sst2_magnitude_pruning_0.99_bacp_finetune.pt
[LOGGER] Log file created at location: ./log_records/roberta-base/sst2/bacp_finetune/magnitude_pruning/0.99/run_2.log
[TRAINER] Training with mixed precision enabled
[TRAINER] Initial model sparsity: 0.99


                                                                                                      

Training epoch [1/10]: Avg Loss: 0.2564 | Avg Accuracy: 82.33 | Model Sparsity: 0.99

[TRAINER] weights saved!


                                                                                                      

Training epoch [2/10]: Avg Loss: 0.2363 | Avg Accuracy: 82.21 | Model Sparsity: 0.99



                                                                                                      

Training epoch [3/10]: Avg Loss: 0.2253 | Avg Accuracy: 81.85 | Model Sparsity: 0.99



                                                                                                      

Training epoch [4/10]: Avg Loss: 0.2158 | Avg Accuracy: 82.57 | Model Sparsity: 0.99

[TRAINER] weights saved!


Training Epoch [5/10]:  81%|████████▏ | 857/1052 [01:14<00:17, 11.46it/s, Loss=0.2051, Sparsity=0.99]

### SNIP-it Pruning

In [None]:
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 1e-5),
    pruning_type="snip_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 2e-5),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    epochs=10,
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[TRAINER] Image size: None
[TRAINER] Weights loaded successfully
[TRAINER] Initialized BaCP models
[TRAINER] Optimizer type w/ learning rate: (adamw, 1e-05)
[TRAINER] No scheduler initialized
[DATALOADERS] ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: sst2
[TRAINER] Batch size: 64
[TRAINER] Number of dataloders: 2
[TRAINER] Pruning initialized
[TRAINER] Pruning type: snip_pruning
[TRAINER] Target sparsity: 0.95
[TRAINER] Sparsity scheduler: cubic
[TRAINER] Pruning epochs: 5
[TRAINER] Current sparsity: 0.0000
[TRAINER] Saving model to: ./research/roberta-base/sst2/roberta-base_sst2_snip_pruning_0.95_bacp_pruning.pt
[ERROR] Could not load weights. Path does not exist: ./research/roberta-base/sst2/roberta-base_sst2_snip_pruning_0.95_bacp_pruning.pt


Exception: Error loading weights: {path}

In [None]:
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 1e-5),
    pruning_type="snip_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 2e-5),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    epochs=10,
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


[TRAINER] Image size: None
[TRAINER] Weights loaded successfully
[TRAINER] Initialized BaCP models
[TRAINER] Optimizer type w/ learning rate: (adamw, 1e-05)
[TRAINER] No scheduler initialized
[DATALOADERS] ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: sst2
[TRAINER] Batch size: 64
[TRAINER] Number of dataloders: 2
[TRAINER] Pruning initialized
[TRAINER] Pruning type: snip_pruning
[TRAINER] Target sparsity: 0.97
[TRAINER] Sparsity scheduler: cubic
[TRAINER] Pruning epochs: 5
[TRAINER] Current sparsity: 0.0000
[TRAINER] Saving model to: ./research/roberta-base/sst2/roberta-base_sst2_snip_pruning_0.97_bacp_pruning.pt
[ERROR] Could not load weights. Path does not exist: ./research/roberta-base/sst2/roberta-base_sst2_snip_pruning_0.97_bacp_pruning.pt


Exception: Error loading weights: {path}

In [0]:
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 1e-5),
    pruning_type="snip_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 2e-5),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    epochs=10,
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)


### Wanda Pruning

In [0]:
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 1e-5),
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 2e-5),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    epochs=10,
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)


In [0]:
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 1e-5),
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 2e-5),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    epochs=10,
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)


In [None]:
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type_and_lr=('adamw', 1e-5),
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 2e-5),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    epochs=10,
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)
