In [1]:
import os
import sys
sys.path.append("../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from utils.helper import ModelConfig, color_print
from utils.dataset_utils.load_dataset import (
    load_data,
)
from utils.model_utils.save_module import save_module
from utils.model_utils.load_model import load_model
from utils.model_utils.evaluate import evaluate_model, get_sparsity, similar
from utils.dataset_utils.sampling import SamplingDataset
from utils.prune_utils.prune import (
    prune_magnitude
)

In [3]:
name= "YahooAnswersTopics"
device = torch.device("cuda:0")
checkpoint = None
batch_size=16
num_workers=4
num_samples=16
magnitude_ratio=0.4
seed=44
include_layers=["attention", "intermediate", "output"]
exclude_layers=None

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-08-21 20:22:59


In [5]:
model_config = ModelConfig(name, device)
num_labels = model_config.config["num_labels"]
model, tokenizer, checkpoint = load_model(model_config)

Loading the model.




{'model_name': 'fabriceyhc/bert-base-uncased-yahoo_answers_topics', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'YahooAnswersTopics', 'num_labels': 10, 'cache_dir': 'Models'}




The model fabriceyhc/bert-base-uncased-yahoo_answers_topics is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    name, batch_size=batch_size, num_workers=num_workers, do_cache=True, seed=seed
)

{'dataset_name': 'YahooAnswersTopics', 'path': 'yahoo_answers_topics', 'config_name': 'yahoo_answers_topics', 'text_column': 'question_title', 'label_column': 'topic', 'cache_dir': 'Datasets/Yahoo', 'task_type': 'classification'}




Loading cached dataset YahooAnswersTopics.




The dataset YahooAnswersTopics is loaded




In [7]:
all_samples = SamplingDataset(
    train_dataloader, 200, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
)

In [8]:
# print("Evaluate the original model")
# result = evaluate_model(model, model_config, test_dataloader)

In [9]:
module = copy.deepcopy(model)
prune_magnitude(module, sparsity_ratio=magnitude_ratio, include_layers=include_layers, exclude_layers=exclude_layers)
print("Evaluate the pruned model")
result = evaluate_model(model, model_config, test_dataloader)
# save_module(module, "Modules/", f"magnitude_{name}_{magnitude_ratio}p.pt")

Evaluate the pruned model




Evaluating:   0%|                                                                             | 0/1875 [00:26<…

  attn_output = torch.nn.functional.scaled_dot_product_attention(


Loss: 1.0014




Precision: 0.6875, Recall: 0.6865, F1-Score: 0.6838




              precision    recall  f1-score   support

           0       0.57      0.55      0.56      2972
           1       0.74      0.67      0.70      3016
           2       0.71      0.78      0.74      2985
           3       0.54      0.53      0.53      3023
           4       0.81      0.82      0.82      3039
           5       0.90      0.84      0.87      3076
           6       0.60      0.43      0.50      2965
           7       0.62      0.74      0.67      3031
           8       0.63      0.76      0.69      2932
           9       0.75      0.75      0.75      2961

    accuracy                           0.69     30000
   macro avg       0.69      0.69      0.68     30000
weighted avg       0.69      0.69      0.68     30000





In [10]:
for concern in range(num_labels):
    print(f"--{concern}--")
    valid = copy.deepcopy(valid_dataloader)
    similar(model, module, valid, concern, num_samples, num_labels, device=device, seed=seed)

--0--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7603520169230096, 0.7603520169230096)




CCA coefficients mean non-concern: (0.7631685935634227, 0.7631685935634227)




Linear CKA concern: 0.8948187795505315




Linear CKA non-concern: 0.8976299088038353




Kernel CKA concern: 0.829101072243262




Kernel CKA non-concern: 0.8607683478983651




--1--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7623931705139153, 0.7623931705139153)




CCA coefficients mean non-concern: (0.764446820487368, 0.764446820487368)




Linear CKA concern: 0.9064111969837633




Linear CKA non-concern: 0.8946311411037177




Kernel CKA concern: 0.8440301556428182




Kernel CKA non-concern: 0.8552285297987873




--2--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7524195614882411, 0.7524195614882411)




CCA coefficients mean non-concern: (0.7627105764708588, 0.7627105764708588)




Linear CKA concern: 0.8988166106071983




Linear CKA non-concern: 0.896300525751935




Kernel CKA concern: 0.85235117970703




Kernel CKA non-concern: 0.8501149515396937




--3--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7580157074521753, 0.7580157074521753)




CCA coefficients mean non-concern: (0.7637177922374999, 0.7637177922374999)




Linear CKA concern: 0.9032394243719439




Linear CKA non-concern: 0.8967898502914977




Kernel CKA concern: 0.8465273947936007




Kernel CKA non-concern: 0.8644837754131147




--4--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7557568936307818, 0.7557568936307818)




CCA coefficients mean non-concern: (0.7648859418576336, 0.7648859418576336)




Linear CKA concern: 0.9095437536155773




Linear CKA non-concern: 0.8953359662109903




Kernel CKA concern: 0.8513718608020453




Kernel CKA non-concern: 0.855226796054968




--5--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.756774389965269, 0.756774389965269)




CCA coefficients mean non-concern: (0.7611095131455813, 0.7611095131455813)




Linear CKA concern: 0.925212043966884




Linear CKA non-concern: 0.8935908680716599




Kernel CKA concern: 0.8758377397859712




Kernel CKA non-concern: 0.8485770277322299




--6--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7637207875537546, 0.7637207875537546)




CCA coefficients mean non-concern: (0.7649152069128433, 0.7649152069128433)




Linear CKA concern: 0.8896498823729737




Linear CKA non-concern: 0.8950372666455002




Kernel CKA concern: 0.7985944314344604




Kernel CKA non-concern: 0.8611700363239859




--7--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7579208051389905, 0.7579208051389905)




CCA coefficients mean non-concern: (0.7647525542940301, 0.7647525542940301)




Linear CKA concern: 0.9167393999942471




Linear CKA non-concern: 0.8974725889493035




Kernel CKA concern: 0.8657007091089723




Kernel CKA non-concern: 0.8632860446996238




--8--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7544574049022587, 0.7544574049022587)




CCA coefficients mean non-concern: (0.7639281383742276, 0.7639281383742276)




Linear CKA concern: 0.9083606598004456




Linear CKA non-concern: 0.8945750670377371




Kernel CKA concern: 0.8504356158217692




Kernel CKA non-concern: 0.8583099537562071




--9--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7581265306229062, 0.7581265306229062)




CCA coefficients mean non-concern: (0.763653216874023, 0.763653216874023)




Linear CKA concern: 0.909194001468322




Linear CKA non-concern: 0.8949762662943619




Kernel CKA concern: 0.8568885273144136




Kernel CKA non-concern: 0.8599761174039382




In [11]:
get_sparsity(module)

(0.39681683125100553,
 {'bert.encoder.layer.0.attention.self.query.weight': 0.3999989827473958,
  'bert.encoder.layer.0.attention.self.query.bias': 0.0,
  'bert.encoder.layer.0.attention.self.key.weight': 0.3999989827473958,
  'bert.encoder.layer.0.attention.self.key.bias': 0.0,
  'bert.encoder.layer.0.attention.self.value.weight': 0.3999989827473958,
  'bert.encoder.layer.0.attention.self.value.bias': 0.0,
  'bert.encoder.layer.0.attention.output.dense.weight': 0.3999989827473958,
  'bert.encoder.layer.0.attention.output.dense.bias': 0.0,
  'bert.encoder.layer.0.intermediate.dense.weight': 0.39999983045789933,
  'bert.encoder.layer.0.intermediate.dense.bias': 0.0,
  'bert.encoder.layer.0.output.dense.weight': 0.39999983045789933,
  'bert.encoder.layer.0.output.dense.bias': 0.0,
  'bert.encoder.layer.1.attention.self.query.weight': 0.3999989827473958,
  'bert.encoder.layer.1.attention.self.query.bias': 0.0,
  'bert.encoder.layer.1.attention.self.key.weight': 0.3999989827473958,
  'bert