# DistilBERT Testing Notebook

In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [0]:
import sys
import os
sys.path.append(os.path.abspath('..'))

from bacp import BaCPTrainer, BaCPTrainingArguments
from trainer import Trainer, TrainingArguments
from unstructured_pruning import check_sparsity_distribution

from datasets.utils.logging import disable_progress_bar
disable_progress_bar()
os.environ["HF_DATASETS_CACHE"] = "/dbfs/hf_datasets"
os.environ["TOKENIZERS_PARALLELISM"] = "false" 

from utils import *
from constants import *

device = get_device()
print(f"{device = }")


In [0]:
MODEL_NAME = "distilbert-base-uncased"
MODEL_TASK = "wikitext2"
TRAIN = True

## Baseline Accuracies

In [0]:
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    learning_rate=5e-5,
    optimizer_type='adamw',
    scheduler_type='linear_with_warmup',
    epochs=50,
    learning_type="baseline",
)
trainer = Trainer(training_args=training_args)
if False:
    trainer.train()

metrics = trainer.evaluate()
print(f'\n{metrics}')

## Pruning Accuracies

### Magnitude Prune

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"/dbfs/research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    learning_rate=5e-5,
    optimizer_type='adamw',
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print(f"\n{metrics}")

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"/dbfs/research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    learning_rate=5e-5,
    optimizer_type='adamw',
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print(f"\n{metrics}")

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"/dbfs/research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    learning_rate=5e-5,
    optimizer_type='adamw',
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print(f"\n{metrics}")

### Movement Prune

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"/dbfs/research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    learning_rate=5e-5,
    optimizer_type='adamw',
    pruning_type="movement_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print(f"\n{metrics}")

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"/dbfs/research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    learning_rate=5e-5,
    optimizer_type='adamw',
    pruning_type="movement_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print(f"\n{metrics}")

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"/dbfs/research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    learning_rate=5e-5,
    optimizer_type='adamw',
    pruning_type="movement_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print(f"\n{metrics}")

### WandA Prune

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"/dbfs/research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    learning_rate=5e-5,
    optimizer_type='adamw',
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print(f"\n{metrics}")

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"/dbfs/research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    learning_rate=5e-5,
    optimizer_type='adamw',
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print(f"\n{metrics}")

In [0]:
# Initializing finetuned weights path
finetuned_weights = f"/dbfs/research/{MODEL_NAME}/{MODEL_TASK}/{MODEL_NAME}_{MODEL_TASK}_baseline.pt"
training_args = TrainingArguments(
    model_name=MODEL_NAME,
    model_task=MODEL_TASK,
    batch_size=BATCH_SIZE_LLM,
    learning_rate=5e-5,
    optimizer_type='adamw',
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=finetuned_weights,
    learning_type="pruning",
)
trainer = Trainer(training_args)
if TRAIN:
    trainer.train()

metrics = trainer.evaluate()
print(f"\n{metrics}")

# BaCP Accuracies

## SST-2

### Magnitude Pruning

In [0]:
pruning_type = "magnitude_pruning"
target_sparsity = TARGET_SPARSITY_LOW
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    sparsity_scheduler='cubic',
    learning_rate=1e-5,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    pruner=pruner,
    finetune=True,
    learning_type="bacp_finetune",
    learning_rate=1e-6
)
llm_trainer = LLMTrainer(llm_training_args)
if False:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
pruning_type = "magnitude_pruning"
target_sparsity = TARGET_SPARSITY_MID
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    sparsity_scheduler='cubic',
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    pruner=pruner,
    finetune=True,
    learning_type="bacp_finetune",
    learning_rate=2e-6
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
pruning_type = "magnitude_pruning"
target_sparsity = TARGET_SPARSITY_HIGH
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    sparsity_scheduler='cubic',
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    pruner=pruner,
    finetune=True,
    learning_type="bacp_finetune",
    learning_rate=1e-6
)
llm_trainer = LLMTrainer(llm_training_args)
if False:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")

### Movement Pruning

In [0]:
pruning_type = "movement_pruning"
target_sparsity = TARGET_SPARSITY_LOW
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    sparsity_scheduler='cubic',
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    pruner=pruner,
    finetune=True,
    learning_type="bacp_finetune",
    learning_rate=1e-6
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
pruning_type = "movement_pruning"
target_sparsity = TARGET_SPARSITY_MID
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    sparsity_scheduler='cubic',
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    pruner=pruner,
    finetune=True,
    learning_type="bacp_finetune",
    learning_rate=1e-6
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
pruning_type = "movement_pruning"
target_sparsity = TARGET_SPARSITY_HIGH
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    sparsity_scheduler='cubic',
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    pruner=pruner,
    finetune=True,
    learning_type="bacp_finetune",
    learning_rate=1e-6
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")

### Wanda Pruning

In [0]:
pruning_type = "wanda_pruning"
target_sparsity = TARGET_SPARSITY_LOW
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    sparsity_scheduler='cubic',
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    pruner=pruner,
    finetune=True,
    learning_type="bacp_finetune",
    learning_rate=1e-6
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
pruning_type = "wanda_pruning"
target_sparsity = TARGET_SPARSITY_MID
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    sparsity_scheduler='cubic',
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    pruner=pruner,
    finetune=True,
    learning_type="bacp_finetune",
    learning_rate=1e-6
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
pruning_type = "wanda_pruning"
target_sparsity = TARGET_SPARSITY_HIGH
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    sparsity_scheduler='cubic',
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    pruner=pruner,
    finetune=True,
    learning_type="bacp_finetune",
    learning_rate=1e-6
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")

## WikiText-2


### Magnitude Pruning

In [0]:
model_name = "distilbert-base-uncased"
model_task = "wikitext2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=trained_model_path,
    learning_type='bacp_pruning',
)
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    epochs=50,
    pruner=bacp_trainer.get_pruner(),
    finetune=True,
    learning_type="bacp_finetune",
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print(metrics)

In [0]:
model_name = "distilbert-base-uncased"
model_task = "wikitext2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=trained_model_path,
    learning_type='bacp_pruning',
)
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    epochs=50,
    pruner=bacp_trainer.get_pruner(),
    finetune=True,
    learning_type="bacp_finetune",
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print(metrics)

In [0]:
model_name = "distilbert-base-uncased"
model_task = "wikitext2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type="magnitude_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=trained_model_path,
    learning_type='bacp_pruning',
)
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    epochs=50,
    pruner=bacp_trainer.get_pruner(),
    finetune=True,
    learning_type="bacp_finetune",
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print(metrics)

### Movement Prune

In [0]:
model_name = "distilbert-base-uncased"
model_task = "wikitext2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type="movement_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=trained_model_path,
    learning_type='bacp_pruning',
)
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    epochs=50,
    pruner=bacp_trainer.get_pruner(),
    finetune=True,
    learning_type="bacp_finetune",
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print(metrics)

In [0]:
model_name = "distilbert-base-uncased"
model_task = "wikitext2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type="movement_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=trained_model_path,
    learning_type='bacp_pruning',
)
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    epochs=50,
    pruner=bacp_trainer.get_pruner(),
    finetune=True,
    learning_type="bacp_finetune",
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print(metrics)

In [0]:
model_name = "distilbert-base-uncased"
model_task = "wikitext2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type="movement_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=trained_model_path,
    learning_type='bacp_pruning',
)
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    epochs=50,
    pruner=bacp_trainer.get_pruner(),
    finetune=True,
    learning_type="bacp_finetune",
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print(metrics)

### WandA Prune

In [0]:
model_name = "distilbert-base-uncased"
model_task = "wikitext2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_LOW,
    sparsity_scheduler='cubic',
    finetuned_weights=trained_model_path,
    learning_type='bacp_pruning',
)
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    epochs=50,
    pruner=bacp_trainer.get_pruner(),
    finetune=True,
    learning_type="bacp_finetune",
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print(metrics)

In [0]:
model_name = "distilbert-base-uncased"
model_task = "wikitext2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_MID,
    sparsity_scheduler='cubic',
    finetuned_weights=trained_model_path,
    learning_type='bacp_pruning',
)
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    epochs=50,
    pruner=bacp_trainer.get_pruner(),
    finetune=True,
    learning_type="bacp_finetune",
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print(metrics)

In [0]:
model_name = "distilbert-base-uncased"
model_task = "wikitext2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{model_task}_baseline.pt"

bacp_training_args = BaCPTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_LLM,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type="wanda_pruning",
    target_sparsity=TARGET_SPARSITY_HIGH,
    sparsity_scheduler='cubic',
    finetuned_weights=trained_model_path,
    learning_type='bacp_pruning',
)
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
training_args = TrainingArguments(
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    optimizer_type='adamw',
    learning_rate=1e-3,
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    finetuned_weights=bacp_trainer.save_path,
    epochs=50,
    pruner=bacp_trainer.get_pruner(),
    finetune=True,
    learning_type="bacp_finetune",
)
trainer = Trainer(training_args)
if True:
    trainer.train()

metrics = trainer.evaluate()
print(metrics)