In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [0]:
%pip install torchinfo
%restart_python

In [0]:
# import sys
# import os
# sys.path.append(os.path.abspath('..'))


# from contrastive_learning import ContrastiveLearner
# from models import EncoderProjectionNetwork, ClassificationNetwork
# from datasets_class import CreateDatasets
# from supervised_learning import train, test
from bacp import BaCPLearner, BaCPTrainer, BaCPTrainingArgumentsLLM
from models import EncoderProjectionNetwork, ClassificationNetwork
from unstructured_pruning import MagnitudePrune, MovementPrune, LocalMagnitudePrune, LocalMovementPrune, WandaPrune, PRUNER_DICT, check_model_sparsity
from LLM_trainer import LLMTrainer, LLMTrainingArguments
from dataset_utils import get_glue_data
from logger import Logger

import torch
import torch.nn as nn
import torch.optim as optim
from transformers import AutoModelForSequenceClassification
from transformers import AutoTokenizer
from transformers import Trainer, TrainingArguments

from tqdm import tqdm
from torchinfo import summary

from datasets.utils.logging import disable_progress_bar
disable_progress_bar()
import os
os.environ["HF_DATASETS_CACHE"] = "/dbfs/hf_datasets"
os.environ["TOKENIZERS_PARALLELISM"] = "false" 

from utils import *
from constants import *

device = get_device()
print(f"{device = }")
BATCH_SIZE_DISTILBERT = 64
NUM_WORKERS = 24


# Baseline Accuracies

## DistilBERT Accuracies

### QQP Dataset

In [0]:
# Model initialization
model_name = "distilbert-base-uncased"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 3
weight_decay = 0.01

# Data initialization
model_task = "qqp"
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainset_qqp, valset_qqp, testset_qqp = data["trainset"], data["valset"], data["testset"]
trainloader_qqp, valloader_qqp, testloader_qqp = data["trainloader"], data["valloader"], data["testloader"]

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='baseline_accuracies',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader_qqp,
    valloader=valloader_qqp,
)

if False:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


### SST-2 Dataset

In [0]:
# # Model initialization
# model_name = "distilbert-base-uncased"
# model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
# tokenizer = AutoTokenizer.from_pretrained(model_name)

# # Hyperparameter initialization
# learning_rate = 2e-5
# optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
# epochs = 3
# weight_decay = 0.01

# # Data initialization
# model_task = "sst2"
# data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
# trainloader, valloader, testloader = data["trainloader"], data["valloader"], data["testloader"]

# save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"

# Model initialization
model_name = "distilbert-base-uncased"
model_task = "sst2"

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"

# Initializing pruning method
pruning_type = "magnitude_pruning"
target_sparsity = TARGET_SPARSITY_LOW

training_args = LLMTrainingArguments(
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    epochs=3,
    learning_type="baseline",
    prune=False,
)

trainer = LLMTrainer(training_args=training_args)

if False:
    trainer.train()

acc = trainer.evaluate()
print(f"Accuracy = {acc}")



## RoBERTa Accuracies

### QQP Dataset

In [0]:
# Model initialization
model_name = "roberta-base"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 3
weight_decay = 0.01

# Data initialization
model_task = "qqp"
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainset_qqp, valset_qqp, testset_qqp = data["trainset"], data["valset"], data["testset"]
trainloader_qqp, valloader_qqp, testloader_qqp = data["trainloader"], data["valloader"], data["testloader"]

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='baseline_accuracies',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader_qqp,
    valloader=valloader_qqp,
)

if False:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


### SST-2 Dataset

In [0]:
# Model initialization
model_name = "roberta-base"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 3
weight_decay = 0.01

# Data initialization
model_task = "sst2"
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainset_qqp, valset_qqp, testset_qqp = data["trainset"], data["valset"], data["testset"]
trainloader_qqp, valloader_qqp, testloader_qqp = data["trainloader"], data["valloader"], data["testloader"]

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='baseline_accuracies',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader_qqp,
    valloader=valloader_qqp,
)

if False:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


# Pruning Accuracies

## DistilBERT Accuracies

### SST-2 Dataset

#### Magnitude Prune

In [0]:
# Model initialization
model_name = "distilbert-base-uncased"
model_task = "sst2"

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

# Initializing pruning method
pruning_type = "magnitude_pruning"
target_sparsity = TARGET_SPARSITY_LOW

training_args = LLMTrainingArguments(
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    pruning_scheduler="cubic",
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    learning_type='pruning',
    
    learning_rate=1e-5,
    epochs=50,
    recovery_epochs=0,
)

trainer = LLMTrainer(training_args=training_args)

if True:
    trainer.train()

acc = trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
# Model initialization
model_name = "distilbert-base-uncased"
model_task = "sst2"

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

# Initializing pruning method
pruning_type = "magnitude_pruning"
target_sparsity = TARGET_SPARSITY_MID

training_args = LLMTrainingArguments(
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    pruning_scheduler="cubic",
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    learning_type='pruning',
    learning_rate=1e-5,
    epochs=50,
    recovery_epochs=0,
)

trainer = LLMTrainer(training_args=training_args)

if True:
    trainer.train()

acc = trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
# Model initialization
model_name = "distilbert-base-uncased"
model_task = "sst2"

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

# Initializing pruning method
pruning_type = "magnitude_pruning"
target_sparsity = TARGET_SPARSITY_HIGH

training_args = LLMTrainingArguments(
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    pruning_scheduler="cubic",
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    learning_type='pruning',
    learning_rate=1e-5,
    epochs=50,
    recovery_epochs=0,
)

trainer = LLMTrainer(training_args=training_args)

if True:
    trainer.train()

acc = trainer.evaluate()
print(f"Accuracy = {acc}")

#### Movement Prune

In [0]:
# Model initialization
model_name = "distilbert-base-uncased"
model_task = "sst2"

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

# Initializing pruning method
pruning_type = "movement_pruning"
target_sparsity = TARGET_SPARSITY_LOW

training_args = LLMTrainingArguments(
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    pruning_scheduler="cubic",
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    learning_type='pruning',
    learning_rate=1e-5,
    epochs=50,
    recovery_epochs=0,
)
trainer = LLMTrainer(training_args=training_args)

if True:
    trainer.train()

acc = trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
# Model initialization
model_name = "distilbert-base-uncased"
model_task = "sst2"

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

# Initializing pruning method
pruning_type = "movement_pruning"
target_sparsity = TARGET_SPARSITY_MID

training_args = LLMTrainingArguments(
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    pruning_scheduler="cubic",
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    learning_type='pruning',
    learning_rate=1e-5,
    epochs=50,
    recovery_epochs=0,
)

trainer = LLMTrainer(training_args=training_args)

if True:
    trainer.train()

acc = trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
# Model initialization
model_name = "distilbert-base-uncased"
model_task = "sst2"

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

# Initializing pruning method
pruning_type = "movement_pruning"
target_sparsity = TARGET_SPARSITY_HIGH

training_args = LLMTrainingArguments(
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    pruning_scheduler="cubic",
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    learning_type='pruning',
    learning_rate=1e-5,
    epochs=50,
    recovery_epochs=0,
)
trainer = LLMTrainer(training_args=training_args)

if True:
    trainer.train()

acc = trainer.evaluate()
print(f"Accuracy = {acc}")

#### WandA Prune

In [0]:
# Model initialization
model_name = "distilbert-base-uncased"
model_task = "sst2"

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

# Initializing pruning method
pruning_type = "wanda_pruning"
target_sparsity = TARGET_SPARSITY_LOW

training_args = LLMTrainingArguments(
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    pruning_scheduler="cubic",
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    learning_type='pruning',
    learning_rate=1e-5,
    epochs=50,
    recovery_epochs=0,
)
trainer = LLMTrainer(training_args=training_args)

if True:
    trainer.train()

acc = trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
# Model initialization
model_name = "distilbert-base-uncased"
model_task = "sst2"

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

# Initializing pruning method
pruning_type = "wanda_pruning"
target_sparsity = TARGET_SPARSITY_MID

training_args = LLMTrainingArguments(
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    pruning_scheduler="cubic",
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    learning_type='pruning',
    learning_rate=1e-5,
    epochs=50,
    recovery_epochs=0,
)

trainer = LLMTrainer(training_args=training_args)

if True:
    trainer.train()

acc = trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
# Model initialization
model_name = "distilbert-base-uncased"
model_task = "sst2"

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

# Initializing pruning method
pruning_type = "wanda_pruning"
target_sparsity = TARGET_SPARSITY_HIGH

training_args = LLMTrainingArguments(
    pruning_type=pruning_type,
    target_sparsity=target_sparsity,
    pruning_scheduler="cubic",
    model_name=model_name,
    model_task=model_task,
    batch_size=BATCH_SIZE_DISTILBERT,
    finetuned_weights=trained_model_path,
    learning_type='pruning',
    learning_rate=1e-5,
    epochs=50,
    recovery_epochs=0,
)

trainer = LLMTrainer(training_args=training_args)

if True:
    trainer.train()

acc = trainer.evaluate()
print(f"Accuracy = {acc}")

In [0]:
model.train()
for data in trainloader:
    model(input_ids=data["input_ids"].to(device), attention_mask=data["attention_mask"].to(device), labels=data["label"].to(device))
    break


pruner.prune(model)
pruner.apply_mask(model)
print(check_sparsity(model))

acc = trainer.evaluate()
print(f"Accuracy = {acc}")

for i in range(epochs-1):
    pruner.ratio_step()
    pruner.prune(model)
    pruner.apply_mask(model)
    print(check_sparsity(model))

    acc = trainer.evaluate()
    print(f"Accuracy = {acc}")


## RoBERTa Accuracies

### SST-2 Dataset

#### Magnitude Prune

In [0]:
# Model initialization
model_name = "roberta-base"
model_task = "sst2"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
model.load_state_dict(torch.load(trained_model_path, map_location="cpu"))

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 10
weight_decay = 0.01

# Data initialization
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainloader, valloader, testloader = data["trainloader"], data["valloader"], data["testloader"]

# Initializing pruning method
pruning_type = "magnitude_pruning"
pruner = MagnitudePrune(epochs, TARGET_SPARSITY_LOW)
recovery_epochs = 5

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{pruning_type}_{TARGET_SPARSITY_LOW}.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='pruning',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path,
    pruner=pruner,
    pruning_type=pruning_type,
    recovery_epochs=recovery_epochs,
    target_sparsity=TARGET_SPARSITY_LOW
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader,
    valloader=valloader,
)

if True:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
# Model initialization
model_name = "roberta-base"
model_task = "sst2"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
model.load_state_dict(torch.load(trained_model_path, map_location="cpu"))

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 10
weight_decay = 0.01

# Data initialization
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainloader, valloader, testloader = data["trainloader"], data["valloader"], data["testloader"]

# Initializing pruning method
pruning_type = "magnitude_pruning"
pruner = MagnitudePrune(epochs, TARGET_SPARSITY_MID)
recovery_epochs = 5

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{pruning_type}_{TARGET_SPARSITY_MID}.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='pruning',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path,
    pruner=pruner,
    pruning_type=pruning_type,
    recovery_epochs=recovery_epochs,
    target_sparsity=TARGET_SPARSITY_MID
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader,
    valloader=valloader,
)

if True:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
# Model initialization
model_name = "roberta-base"
model_task = "sst2"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
model.load_state_dict(torch.load(trained_model_path, map_location="cpu"))

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 10
weight_decay = 0.01

# Data initialization
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainloader, valloader, testloader = data["trainloader"], data["valloader"], data["testloader"]

# Initializing pruning method
pruning_type = "magnitude_pruning"
pruner = MagnitudePrune(epochs, TARGET_SPARSITY_HIGH)
recovery_epochs = 5

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{pruning_type}_{TARGET_SPARSITY_HIGH}.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='pruning',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path,
    pruner=pruner,
    pruning_type=pruning_type,
    recovery_epochs=recovery_epochs,
    target_sparsity=TARGET_SPARSITY_HIGH
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader,
    valloader=valloader,
)

if True:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


#### Movement Prune

In [0]:
# Model initialization
model_name = "roberta-base"
model_task = "sst2"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
model.load_state_dict(torch.load(trained_model_path, map_location="cpu"))

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 10
weight_decay = 0.01

# Data initialization
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainloader, valloader, testloader = data["trainloader"], data["valloader"], data["testloader"]

# Initializing pruning method
pruning_type = "movement_pruning"
pruner = MovementPrune(epochs, TARGET_SPARSITY_LOW)
recovery_epochs = 5

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{pruning_type}_{TARGET_SPARSITY_LOW}.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='pruning',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path,
    pruner=pruner,
    pruning_type=pruning_type,
    recovery_epochs=recovery_epochs,
    target_sparsity=TARGET_SPARSITY_LOW
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader,
    valloader=valloader,
)

if True:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
# Model initialization
model_name = "roberta-base"
model_task = "sst2"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
model.load_state_dict(torch.load(trained_model_path, map_location="cpu"))

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 10
weight_decay = 0.01

# Data initialization
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainloader, valloader, testloader = data["trainloader"], data["valloader"], data["testloader"]

# Initializing pruning method
pruning_type = "movement_pruning"
pruner = MovementPrune(epochs, TARGET_SPARSITY_MID)
recovery_epochs = 5

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{pruning_type}_{TARGET_SPARSITY_MID}.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='pruning',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path,
    pruner=pruner,
    pruning_type=pruning_type,
    recovery_epochs=recovery_epochs,
    target_sparsity=TARGET_SPARSITY_MID
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader,
    valloader=valloader,
)

if True:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
# Model initialization
model_name = "roberta-base"
model_task = "sst2"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
model.load_state_dict(torch.load(trained_model_path, map_location="cpu"))

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 10
weight_decay = 0.01

# Data initialization
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainloader, valloader, testloader = data["trainloader"], data["valloader"], data["testloader"]

# Initializing pruning method
pruning_type = "movement_pruning"
pruner = MovementPrune(epochs, TARGET_SPARSITY_HIGH)
recovery_epochs = 5

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{pruning_type}_{TARGET_SPARSITY_HIGH}.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='pruning',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path,
    pruner=pruner,
    pruning_type=pruning_type,
    recovery_epochs=recovery_epochs,
    target_sparsity=TARGET_SPARSITY_HIGH
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader,
    valloader=valloader,
)

if True:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


#### Wanda Prune

In [0]:
# Model initialization
model_name = "roberta-base"
model_task = "sst2"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
model.load_state_dict(torch.load(trained_model_path, map_location="cpu"))

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 10
weight_decay = 0.01

# Data initialization
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainloader, valloader, testloader = data["trainloader"], data["valloader"], data["testloader"]

# Initializing pruning method
pruning_type = "wanda_pruning"
pruner = WandaPrune(epochs, TARGET_SPARSITY_LOW, model)
recovery_epochs = 5

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{pruning_type}_{TARGET_SPARSITY_LOW}.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='pruning',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path,
    pruner=pruner,
    pruning_type=pruning_type,
    recovery_epochs=recovery_epochs,
    target_sparsity=TARGET_SPARSITY_LOW
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader,
    valloader=valloader,
)

if True:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
# Model initialization
model_name = "roberta-base"
model_task = "sst2"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
model.load_state_dict(torch.load(trained_model_path, map_location="cpu"))

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 10
weight_decay = 0.01

# Data initialization
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainloader, valloader, testloader = data["trainloader"], data["valloader"], data["testloader"]

# Initializing pruning method
pruning_type = "wanda_pruning"
pruner = WandaPrune(epochs, TARGET_SPARSITY_MID, model)
recovery_epochs = 5

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{pruning_type}_{TARGET_SPARSITY_MID}.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='pruning',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path,
    pruner=pruner,
    pruning_type=pruning_type,
    recovery_epochs=recovery_epochs,
    target_sparsity=TARGET_SPARSITY_MID
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader,
    valloader=valloader,
)

if True:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
# Model initialization
model_name = "roberta-base"
model_task = "sst2"
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Loading trained weights
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pth"
model.load_state_dict(torch.load(trained_model_path, map_location="cpu"))

# Hyperparameter initialization
learning_rate = 2e-5
optimizer = optim.AdamW(model.parameters(), lr=learning_rate)
epochs = 10
weight_decay = 0.01

# Data initialization
data = get_glue_data(model_name, tokenizer, model_task, BATCH_SIZE_DISTILBERT, NUM_WORKERS)
trainloader, valloader, testloader = data["trainloader"], data["valloader"], data["testloader"]

# Initializing pruning method
pruning_type = "wanda_pruning"
pruner = WandaPrune(epochs, TARGET_SPARSITY_HIGH, model)
recovery_epochs = 5

save_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_{pruning_type}_{TARGET_SPARSITY_HIGH}.pth"
training_args = LLMTrainingArguments(
    model_name=model_name,
    model_task=model_task,
    learning_rate=learning_rate,
    epochs=epochs,
    batch_size=BATCH_SIZE_DISTILBERT,
    optimizer=optimizer,
    weight_decay=weight_decay,
    learning_type='pruning',
    log_epochs=True,
    enable_tqdm=True,
    enable_mixed_precision=True,
    save_path = save_path,
    pruner=pruner,
    pruning_type=pruning_type,
    recovery_epochs=recovery_epochs,
    target_sparsity=TARGET_SPARSITY_HIGH
)

trainer = LLMTrainer(
    model=model,
    training_args=training_args,
    trainloader=trainloader,
    valloader=valloader,
)

if True:
    trainer.train()

model.load_state_dict(torch.load(save_path, map_location='cpu'))
acc = trainer.evaluate()
print(f"Accuracy = {acc}")


# BaCP Accuracies

## Magnitude Pruning

In [0]:
pruning_type = "magnitude_pruning"
target_sparsity = TARGET_SPARSITY_LOW
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    pruning_type,
    target_sparsity,
    model_name,
    model_task,
    BATCH_SIZE_DISTILBERT,
    trained_model_path,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    prune=False,
    pruner=pruner,
    finetune=True,
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
pruning_type = "magnitude_pruning"
target_sparsity = TARGET_SPARSITY_MID
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    pruning_type,
    target_sparsity,
    model_name,
    model_task,
    BATCH_SIZE_DISTILBERT,
    trained_model_path,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    prune=False,
    pruner=pruner,
    finetune=True,
    learning_type="bacp_finetune"
)
llm_trainer = LLMTrainer(llm_training_args)
if False:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
pruning_type = "magnitude_pruning"
target_sparsity = TARGET_SPARSITY_HIGH
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    pruning_type,
    target_sparsity,
    model_name,
    model_task,
    BATCH_SIZE_DISTILBERT,
    trained_model_path,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if False:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    prune=False,
    pruner=pruner,
    finetune=True,
)
llm_trainer = LLMTrainer(llm_training_args)
if False:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")


## Movement Pruning

In [0]:
pruning_type = "movement_pruning"
target_sparsity = TARGET_SPARSITY_LOW
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    pruning_type,
    target_sparsity,
    model_name,
    model_task,
    BATCH_SIZE_DISTILBERT,
    trained_model_path,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    prune=False,
    pruner=pruner,
    finetune=True,
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
pruning_type = "movement_pruning"
target_sparsity = TARGET_SPARSITY_MID
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    pruning_type,
    target_sparsity,
    model_name,
    model_task,
    BATCH_SIZE_DISTILBERT,
    trained_model_path,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    prune=False,
    pruner=pruner,
    finetune=True,
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
pruning_type = "movement_pruning"
target_sparsity = TARGET_SPARSITY_HIGH
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    pruning_type,
    target_sparsity,
    model_name,
    model_task,
    BATCH_SIZE_DISTILBERT,
    trained_model_path,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    prune=False,
    pruner=pruner,
    finetune=True,
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")


## Wanda Pruning

In [0]:
pruning_type = "wanda_pruning"
target_sparsity = TARGET_SPARSITY_LOW
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    pruning_type,
    target_sparsity,
    model_name,
    model_task,
    BATCH_SIZE_DISTILBERT,
    trained_model_path,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    prune=False,
    pruner=pruner,
    finetune=True,
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
pruning_type = "wanda_pruning"
target_sparsity = TARGET_SPARSITY_MID
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    pruning_type,
    target_sparsity,
    model_name,
    model_task,
    BATCH_SIZE_DISTILBERT,
    trained_model_path,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    prune=False,
    pruner=pruner,
    finetune=True,
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
pruning_type = "wanda_pruning"
target_sparsity = TARGET_SPARSITY_HIGH
model_name = "distilbert-base-uncased"
model_task = "sst2"
trained_model_path = f"/dbfs/research/{model_name}/{model_task}/{model_name}_baseline.pt"

bacp_training_args = BaCPTrainingArgumentsLLM(
    pruning_type,
    target_sparsity,
    model_name,
    model_task,
    BATCH_SIZE_DISTILBERT,
    trained_model_path,
    )
bacp_trainer = BaCPTrainer(bacp_training_args)
if True:
    bacp_trainer.train()

# Finetuning Phase
bacp_trainer.generate_mask_from_model()
pruner = bacp_trainer.get_pruner()

llm_training_args = LLMTrainingArguments(
    pruning_type=bacp_trainer.pruning_type,
    target_sparsity=bacp_trainer.target_sparsity,
    model_name=bacp_trainer.model_name,
    model_task=bacp_trainer.model_task,
    batch_size=bacp_trainer.batch_size,
    finetuned_weights=bacp_trainer.cm_save_path,
    epochs=5,
    prune=False,
    pruner=pruner,
    finetune=True,
)
llm_trainer = LLMTrainer(llm_training_args)
if True:
    llm_trainer.train()

acc = llm_trainer.evaluate()
print(f"Accuracy = {acc}")


In [0]:
%sh
ls /Workspace/
