In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [1]:
import os
import sys
sys.path.append(os.path.abspath('..'))

from constants import (
    TARGET_SPARSITY_LOW, TARGET_SPARSITY_MID, TARGET_SPARSITY_HIGH,
    BATCH_SIZE_CNN, BATCH_SIZE_VIT, BATCH_SIZE_LLM,
    EPOCHS_SMALL_MODEL, EPOCHS_LARGE_MODEL, EPOCHS_VIT
)
from utils import get_device, get_num_workers, load_weights, print_statistics
from unstructured_pruning import check_model_sparsity, check_sparsity_distribution
from trainer import TrainingArguments, Trainer
from bacp import BaCPTrainingArguments, BaCPTrainer

from datasets.utils.logging import disable_progress_bar
disable_progress_bar()
os.environ["HF_DATASETS_CACHE"] = "/dbfs/hf_datasets"
os.environ["TOKENIZERS_PARALLELISM"] = "false" 


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DEVICE = get_device()
NUM_WORKERS = get_num_workers()
print("Using device:", DEVICE)
print("Using", NUM_WORKERS, "workers")

Using device: cuda
Using 288 workers


# ViT-Small

In [3]:
# Notebook specific variables
MODEL_NAME = 'vit_small'
MODEL_TASK = 'cifar10'
TRAIN = True

## Baseline Accuracy

In [11]:
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.01),
    scheduler_type='linear_with_warmup',
    epochs=10,
    learning_type="baseline",
    db=False
)
trainer = Trainer(training_args=training_args)
if False:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

[TRAINER] Image size: 224
[TRAINER] Initialized models
[TRAINER] Optimizer type w/ learning rate: (sgd, 0.01)
[CV DATALOADERS] Loaded cifar10 with splits: ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: cifar10
[TRAINER] Batch size: 512
[TRAINER] Number of dataloders: 3
[TRAINER] Linear scheduler initialized with warmup steps: 83 and total steps: 830
[TRAINER] Pruning not initialized
[TRAINER] Saving model to: ./research/vit_small/cifar10/vit_small_cifar10_baseline.pt
[TRAINER] Loading weights: ./research/vit_small/cifar10/vit_small_cifar10_baseline.pt
[TRAINER] Weights loaded successfully
[TRAINER] Model Sparsity: 0.0


                                                           


TRAINING STATISTICS SUMMARY

Performance Metrics:
------------------------------
  Accuracy:     98.45%

Model Information:
------------------------------
  Total Parameters:     21,669,514
  Trainable Parameters: 21,669,514
  Model Sparsity:       0.0000 (0.00%)

Training Configuration:
------------------------------
  Model:                vit_small
  Task:                 cifar10
  Learning Type:        baseline
  Batch Size:           512
  Learning Rate:        0.01
  Optimizer:            sgd
  Epochs:               10

System Information:
------------------------------
  Device:               cuda
  Mixed Precision:      True
  Workers:              24



## Pruning Accuracy

### Magnitude Pruning

In [None]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.01),
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [None]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.01),
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.01),
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

### SNIP-it Pruning

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.01),
    pruning_type="snip_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.01),
    pruning_type="snip_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.01),
    pruning_type="snip_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

### Wanda Pruning

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.01),
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.01),
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.01),
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

## BaCP Accuracies

### Magnitude Pruning

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.1),
    pruning_type='magnitude_pruning',
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
)
bacp_trainer = BaCPTrainer(bacp_training_args=bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 0.0001),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    epochs=50,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

[TRAINER] Image size: 224
[ERROR] Could not load weights: ./research/vit_small/cifar10/vit_small_cifar10_baseline.pt
[ERROR] Attempting partial load
[TRAINER] Weights loaded successfully
[TRAINER] Initialized BaCP models
[TRAINER] Optimizer type w/ learning rate: (sgd, 0.1)
[TRAINER] No scheduler initialized
[CV DATALOADERS] Loaded cifar10 with splits: ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: cifar10
[TRAINER] Batch size: 512
[TRAINER] Number of dataloders: 3
[TRAINER] Pruning initialized
[TRAINER] Pruning type: magnitude_pruning
[TRAINER] Target sparsity: 0.95
[TRAINER] Sparsity scheduler: cubic
[TRAINER] Pruning epochs: 5
[TRAINER] Current sparsity: 0.0000
[TRAINER] Saving model to: ./research/vit_small/cifar10/vit_small_cifar10_magnitude_pruning_0.95_bacp_pruning.pt
[LOGGER] Log file created at location: ./log_records/vit_small/cifar10/bacp_pruning/magnitude_pruning/0.95/run_1.log


Training Epoch [1/5]:   0%|          | 0/83 [00:00<?, ?it/s]


[Pruner] Cubic Sparsity ratio increased to 0.464.



                                                                                                                                          

Epoch [1/5]: Avg Total Loss: 7.3171 | Avg PrC Loss: 3.4440 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 3.4050 | Avg CE Loss: 0.4682 | Model Sparsity: 0.4636

[BaCP] weights saved!


                                                                                                                                               

Retraining Epoch [1/10]: Avg Total Loss: 5.9687 | Avg PrC Loss: 3.0596 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.7266 | Avg CE Loss: 0.1825 | Model Sparsity: 0.4636



                                                                                                                                               

Retraining Epoch [2/10]: Avg Total Loss: 5.4232 | Avg PrC Loss: 2.9024 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.4520 | Avg CE Loss: 0.0688 | Model Sparsity: 0.4636



                                                                                                                                               

Retraining Epoch [3/10]: Avg Total Loss: 5.3227 | Avg PrC Loss: 2.8688 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.4016 | Avg CE Loss: 0.0524 | Model Sparsity: 0.4636



                                                                                                                                               

Retraining Epoch [4/10]: Avg Total Loss: 5.3222 | Avg PrC Loss: 2.8673 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.4020 | Avg CE Loss: 0.0529 | Model Sparsity: 0.4636



                                                                                                                                               

Retraining Epoch [5/10]: Avg Total Loss: 5.2615 | Avg PrC Loss: 2.8475 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.3720 | Avg CE Loss: 0.0420 | Model Sparsity: 0.4636



                                                                                                                                               

Retraining Epoch [6/10]: Avg Total Loss: 5.2608 | Avg PrC Loss: 2.8460 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.3739 | Avg CE Loss: 0.0409 | Model Sparsity: 0.4636



                                                                                                                                               

Retraining Epoch [7/10]: Avg Total Loss: 5.2738 | Avg PrC Loss: 2.8524 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.3791 | Avg CE Loss: 0.0423 | Model Sparsity: 0.4636

[BaCP] weights saved!


                                                                                                                                               

Retraining Epoch [8/10]: Avg Total Loss: 5.2806 | Avg PrC Loss: 2.8516 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.3852 | Avg CE Loss: 0.0438 | Model Sparsity: 0.4636

[BaCP] weights saved!


                                                                                                                                               

Retraining Epoch [9/10]: Avg Total Loss: 5.3404 | Avg PrC Loss: 2.8735 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.4135 | Avg CE Loss: 0.0535 | Model Sparsity: 0.4636

[BaCP] weights saved!


                                                                                                                                                

Retraining Epoch [10/10]: Avg Total Loss: 5.3722 | Avg PrC Loss: 2.8802 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.4309 | Avg CE Loss: 0.0611 | Model Sparsity: 0.4636

[BaCP] weights saved!


Training Epoch [2/5]:   0%|          | 0/83 [00:00<?, ?it/s]


[Pruner] Cubic Sparsity ratio increased to 0.745.



                                                                                                                                             

Epoch [2/5]: Avg Total Loss: 11.3006 | Avg PrC Loss: 3.5774 | Avg SnC Loss: 3.5319 | Avg FiC Loss: 3.6537 | Avg CE Loss: 0.5377 | Model Sparsity: 0.7448

[BaCP] weights saved!


                                                                                                                                                

Retraining Epoch [1/10]: Avg Total Loss: 10.3438 | Avg PrC Loss: 3.3772 | Avg SnC Loss: 3.1900 | Avg FiC Loss: 3.3305 | Avg CE Loss: 0.4460 | Model Sparsity: 0.7448



                                                                                                                                                

Retraining Epoch [2/10]: Avg Total Loss: 9.6372 | Avg PrC Loss: 3.2520 | Avg SnC Loss: 2.9349 | Avg FiC Loss: 3.0978 | Avg CE Loss: 0.3525 | Model Sparsity: 0.7448



                                                                                                                                                

Retraining Epoch [3/10]: Avg Total Loss: 8.9316 | Avg PrC Loss: 3.1298 | Avg SnC Loss: 2.6860 | Avg FiC Loss: 2.8656 | Avg CE Loss: 0.2502 | Model Sparsity: 0.7448



                                                                                                                                                

Retraining Epoch [4/10]: Avg Total Loss: 8.5445 | Avg PrC Loss: 3.0626 | Avg SnC Loss: 2.5548 | Avg FiC Loss: 2.7364 | Avg CE Loss: 0.1907 | Model Sparsity: 0.7448



                                                                                                                                                

Retraining Epoch [5/10]: Avg Total Loss: 8.3619 | Avg PrC Loss: 3.0291 | Avg SnC Loss: 2.4935 | Avg FiC Loss: 2.6762 | Avg CE Loss: 0.1631 | Model Sparsity: 0.7448



                                                                                                                                                

Retraining Epoch [6/10]: Avg Total Loss: 8.2045 | Avg PrC Loss: 2.9994 | Avg SnC Loss: 2.4434 | Avg FiC Loss: 2.6224 | Avg CE Loss: 0.1394 | Model Sparsity: 0.7448



                                                                                                                                                 

Retraining Epoch [7/10]: Avg Total Loss: 8.6848 | Avg PrC Loss: 3.0876 | Avg SnC Loss: 2.6037 | Avg FiC Loss: 2.7823 | Avg CE Loss: 0.2113 | Model Sparsity: 0.7448

[BaCP] weights saved!


                                                                                                                                                

Retraining Epoch [8/10]: Avg Total Loss: 8.1434 | Avg PrC Loss: 2.9883 | Avg SnC Loss: 2.4227 | Avg FiC Loss: 2.6012 | Avg CE Loss: 0.1312 | Model Sparsity: 0.7448



                                                                                                                                                

Retraining Epoch [9/10]: Avg Total Loss: 8.1232 | Avg PrC Loss: 2.9833 | Avg SnC Loss: 2.4178 | Avg FiC Loss: 2.5942 | Avg CE Loss: 0.1279 | Model Sparsity: 0.7448



                                                                                                                                                   

Retraining Epoch [10/10]: Avg Total Loss: 8.1368 | Avg PrC Loss: 2.9857 | Avg SnC Loss: 2.4216 | Avg FiC Loss: 2.5996 | Avg CE Loss: 0.1299 | Model Sparsity: 0.7448

[BaCP] weights saved!


Training Epoch [3/5]:   0%|          | 0/83 [00:00<?, ?it/s]


[Pruner] Cubic Sparsity ratio increased to 0.889.



                                                                                                                                             

Epoch [3/5]: Avg Total Loss: 11.7763 | Avg PrC Loss: 3.1649 | Avg SnC Loss: 5.4396 | Avg FiC Loss: 2.9073 | Avg CE Loss: 0.2645 | Model Sparsity: 0.8892

[BaCP] weights saved!


                                                                                                                                                

Retraining Epoch [1/10]: Avg Total Loss: 10.6077 | Avg PrC Loss: 3.0008 | Avg SnC Loss: 4.8522 | Avg FiC Loss: 2.6165 | Avg CE Loss: 0.1382 | Model Sparsity: 0.8892



                                                                                                                                                

Retraining Epoch [2/10]: Avg Total Loss: 10.5374 | Avg PrC Loss: 2.9909 | Avg SnC Loss: 4.8176 | Avg FiC Loss: 2.5990 | Avg CE Loss: 0.1299 | Model Sparsity: 0.8892



                                                                                                                                                

Retraining Epoch [3/10]: Avg Total Loss: 10.5284 | Avg PrC Loss: 2.9904 | Avg SnC Loss: 4.8133 | Avg FiC Loss: 2.5958 | Avg CE Loss: 0.1289 | Model Sparsity: 0.8892



                                                                                                                                                  

Retraining Epoch [4/10]: Avg Total Loss: 10.5045 | Avg PrC Loss: 2.9845 | Avg SnC Loss: 4.8033 | Avg FiC Loss: 2.5899 | Avg CE Loss: 0.1268 | Model Sparsity: 0.8892



                                                                                                                                                

Retraining Epoch [5/10]: Avg Total Loss: 10.5319 | Avg PrC Loss: 2.9887 | Avg SnC Loss: 4.8175 | Avg FiC Loss: 2.5953 | Avg CE Loss: 0.1304 | Model Sparsity: 0.8892

[BaCP] weights saved!


                                                                                                                                                

Retraining Epoch [6/10]: Avg Total Loss: 10.4941 | Avg PrC Loss: 2.9828 | Avg SnC Loss: 4.8000 | Avg FiC Loss: 2.5853 | Avg CE Loss: 0.1260 | Model Sparsity: 0.8892



                                                                                                                                                  

Retraining Epoch [7/10]: Avg Total Loss: 10.5033 | Avg PrC Loss: 2.9847 | Avg SnC Loss: 4.8046 | Avg FiC Loss: 2.5869 | Avg CE Loss: 0.1271 | Model Sparsity: 0.8892

[BaCP] weights saved!


                                                                                                                                                

Retraining Epoch [8/10]: Avg Total Loss: 10.5208 | Avg PrC Loss: 2.9861 | Avg SnC Loss: 4.8140 | Avg FiC Loss: 2.5908 | Avg CE Loss: 0.1298 | Model Sparsity: 0.8892

[BaCP] weights saved!


                                                                                                                                                

Retraining Epoch [9/10]: Avg Total Loss: 10.4903 | Avg PrC Loss: 2.9824 | Avg SnC Loss: 4.7987 | Avg FiC Loss: 2.5832 | Avg CE Loss: 0.1259 | Model Sparsity: 0.8892



                                                                                                                                                 

Retraining Epoch [10/10]: Avg Total Loss: 10.4771 | Avg PrC Loss: 2.9794 | Avg SnC Loss: 4.7923 | Avg FiC Loss: 2.5799 | Avg CE Loss: 0.1254 | Model Sparsity: 0.8892



Training Epoch [4/5]:   0%|          | 0/83 [00:00<?, ?it/s]


[Pruner] Cubic Sparsity ratio increased to 0.942.



                                                                                                                                             

Epoch [4/5]: Avg Total Loss: 14.9274 | Avg PrC Loss: 3.2198 | Avg SnC Loss: 8.3979 | Avg FiC Loss: 2.9990 | Avg CE Loss: 0.3107 | Model Sparsity: 0.9424

[BaCP] weights saved!


                                                                                                                                                

Retraining Epoch [1/10]: Avg Total Loss: 13.1700 | Avg PrC Loss: 3.0316 | Avg SnC Loss: 7.3124 | Avg FiC Loss: 2.6600 | Avg CE Loss: 0.1660 | Model Sparsity: 0.9424



                                                                                                                                                

Retraining Epoch [2/10]: Avg Total Loss: 12.8422 | Avg PrC Loss: 2.9945 | Avg SnC Loss: 7.1176 | Avg FiC Loss: 2.5945 | Avg CE Loss: 0.1356 | Model Sparsity: 0.9424



Retraining epoch [3/10]:   0%|          | 0/83 [00:00<?, ?it/s]

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.1),
    pruning_type='magnitude_pruning',
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
)
bacp_trainer = BaCPTrainer(bacp_training_args=bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 0.0001),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    epochs=50,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.1),
    pruning_type='magnitude_pruning',
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
)
bacp_trainer = BaCPTrainer(bacp_training_args=bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 0.0001),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    epochs=50,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

### SNIP-it Pruning

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.1),
    pruning_type='snip_pruning',
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
)
bacp_trainer = BaCPTrainer(bacp_training_args=bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 0.0001),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    epochs=50,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.1),
    pruning_type='snip_pruning',
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
)
bacp_trainer = BaCPTrainer(bacp_training_args=bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 0.0001),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    epochs=50,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.1),
    pruning_type='snip_pruning',
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
)
bacp_trainer = BaCPTrainer(bacp_training_args=bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 0.0001),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    epochs=50,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

### Wanda Pruning

In [None]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.1),
    pruning_type='wanda_pruning',
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
)
bacp_trainer = BaCPTrainer(bacp_training_args=bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 0.0001),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    epochs=50,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

[TRAINER] Image size: 224
[ERROR] Could not load weights: ./research/vit_small/cifar10/vit_small_cifar10_baseline.pt
[ERROR] Attempting partial load
[TRAINER] Weights loaded successfully
[TRAINER] Initialized BaCP models
[TRAINER] Optimizer type w/ learning rate: (sgd, 0.1)
[TRAINER] No scheduler initialized
[CV DATALOADERS] Loaded cifar10 with splits: ['train', 'validation', 'test']
[TRAINER] Data Initialized for model task: cifar10
[TRAINER] Batch size: 512
[TRAINER] Number of dataloders: 3
[TRAINER] Pruning initialized
[TRAINER] Pruning type: wanda_pruning
[TRAINER] Target sparsity: 0.95
[TRAINER] Sparsity scheduler: cubic
[TRAINER] Pruning epochs: 5
[TRAINER] Current sparsity: 0.0000
[TRAINER] Saving model to: ./research/vit_small/cifar10/vit_small_cifar10_wanda_pruning_0.95_bacp_pruning.pt
[LOGGER] Log file created at location: ./log_records/vit_small/cifar10/bacp_pruning/wanda_pruning/0.95/run_1.log
[Pruner] Adding hooks


Training Epoch [1/5]:   0%|          | 0/83 [00:00<?, ?it/s]Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 239, in _feed
    reader_close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 177, in close
    self._close()
  File "/usr/lib/python3.10/multiprocessing/connection.py", line 361, in _close
    _close(self._handle)
OSError: [Errno 9] Bad file descriptor
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7b8fcab83250>
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1664, in __del__
    self._shutdown_workers()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py", line 1647, in _shutdown_workers
    if w.is_alive():
  File "/usr/lib/python3.10/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only


[Pruner] Cubic Sparsity ratio increased to 0.464.


[Pruner] Removing hooks


                                                                                                                                          

Epoch [1/5]: Avg Total Loss: 7.8662 | Avg PrC Loss: 3.6228 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 3.6934 | Avg CE Loss: 0.5500 | Model Sparsity: 0.4636

[BaCP] weights saved!


                                                                                                                                             

Retraining Epoch [1/10]: Avg Total Loss: 7.1280 | Avg PrC Loss: 3.3778 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 3.3134 | Avg CE Loss: 0.4368 | Model Sparsity: 0.4636



                                                                                                                                             

Retraining Epoch [2/10]: Avg Total Loss: 6.4171 | Avg PrC Loss: 3.1752 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.9516 | Avg CE Loss: 0.2903 | Model Sparsity: 0.4636



                                                                                                                                             

Retraining Epoch [3/10]: Avg Total Loss: 5.8042 | Avg PrC Loss: 3.0101 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.6436 | Avg CE Loss: 0.1506 | Model Sparsity: 0.4636



                                                                                                                                               

Retraining Epoch [4/10]: Avg Total Loss: 5.5534 | Avg PrC Loss: 2.9397 | Avg SnC Loss: 0.0000 | Avg FiC Loss: 2.5170 | Avg CE Loss: 0.0967 | Model Sparsity: 0.4636



Exception in thread Thread-40 (_pin_memory_loop):                                                                                              
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/home/ubuntu/.local/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 772, in run_closure
    _threading_Thread_run(self)
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/pin_memory.py", line 61, in _pin_memory_loop
    do_one_step()
  File "/usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/pin_memory.py", line 37, in do_one_step
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
  File "/usr/local/lib/python3.10/dist-packages/torch/multiprocessing/reduc

KeyboardInterrupt: 

    s.connect(address)
FileNotFoundError: [Errno 2] No such file or directory


In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.1),
    pruning_type='wanda_pruning',
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
)
bacp_trainer = BaCPTrainer(bacp_training_args=bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 0.0001),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    epochs=50,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"./research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_CNN,
    optimizer_type_and_lr=('sgd', 0.1),
    pruning_type='wanda_pruning',
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type='bacp_pruning',
    db=False,
)
bacp_trainer = BaCPTrainer(bacp_training_args=bacp_training_args)
if TRAIN:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type_and_lr=('adamw', 0.0001),
    pruner=bacp_trainer.get_pruner(),
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    epochs=50,
    finetuned_weights=bacp_trainer.save_path,
    finetune=True,
    learning_type="bacp_finetune",
    db=False,
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print_statistics(metrics, trainer)