In [1]:
import os
import sys
sys.path.append("../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from utils.helper import ModelConfig, color_print
from utils.dataset_utils.load_dataset import (
    load_data,
)
from utils.model_utils.save_module import save_module
from utils.model_utils.load_model import load_model
from utils.model_utils.evaluate import evaluate_model, get_sparsity, similar
from utils.dataset_utils.sampling import SamplingDataset
from utils.prune_utils.prune import (
    prune_wanda
)

In [3]:
name= "OSDG"
device = torch.device("cuda:1")
checkpoint = None
batch_size=32
num_workers=48
num_samples=16
wanda_ratio=0.5
seed=44
include_layers=["attention", "intermediate", "output"]
exclude_layers=None

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-08-19 14:47:23


In [5]:
model_config = ModelConfig(name, device)
num_labels = model_config.config["num_labels"]
model, tokenizer, checkpoint = load_model(model_config)

Loading the model.




{'model_name': 'sadickam/sdg-classification-bert', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'OSDG', 'num_labels': 16, 'cache_dir': 'Models'}




The model sadickam/sdg-classification-bert is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    name, batch_size=batch_size, num_workers=num_workers, seed=seed
)

{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




In [7]:
all_samples = SamplingDataset(
    train_dataloader, 200, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
)

In [8]:
# print("Evaluate the original model")
# result = evaluate_model(model, model_config, test_dataloader)

In [9]:
# Evaluate the original model
# Evaluating: 100%|█████████████████████████████████████████████████████████████████████| 200/200 [03:16<00:00,  1.02it/s]
# Loss: 0.9485
# Precision: 0.7801, Recall: 0.7867, F1-Score: 0.7793
#               precision    recall  f1-score   support

#            0       0.77      0.66      0.71       797
#            1       0.84      0.72      0.78       775
#            2       0.88      0.87      0.88       795
#            3       0.87      0.83      0.85      1110
#            4       0.86      0.80      0.83      1260
#            5       0.88      0.69      0.77       882
#            6       0.85      0.80      0.83       940
#            7       0.49      0.61      0.54       473
#            8       0.66      0.85      0.74       746
#            9       0.62      0.73      0.67       689
#           10       0.75      0.79      0.77       670
#           11       0.62      0.81      0.70       312
#           12       0.73      0.81      0.77       665
#           13       0.83      0.85      0.84       314
#           14       0.85      0.78      0.81       756
#           15       0.97      0.98      0.97      1607

#     accuracy                           0.80     12791
#    macro avg       0.78      0.79      0.78     12791
# weighted avg       0.81      0.80      0.80     12791

In [10]:
module = copy.deepcopy(model)
prune_wanda(module, model_config, all_samples, sparsity_ratio=wanda_ratio, include_layers=include_layers, exclude_layers=exclude_layers)
print("Evaluate the pruned model")
result = evaluate_model(module, model_config, test_dataloader)
# save_module(module, "Modules/", f"wanda_{name}_{wanda_ratio}p.pt")

Evaluate the pruned model




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:35,  2.09it/s]

Evaluating:   1%|          | 2/200 [00:00<01:28,  2.24it/s]

Evaluating:   2%|▏         | 3/200 [00:01<01:26,  2.28it/s]

Evaluating:   2%|▏         | 4/200 [00:01<01:24,  2.31it/s]

Evaluating:   2%|▎         | 5/200 [00:02<01:24,  2.32it/s]

Evaluating:   3%|▎         | 6/200 [00:02<01:23,  2.33it/s]

Evaluating:   4%|▎         | 7/200 [00:03<01:22,  2.33it/s]

Evaluating:   4%|▍         | 8/200 [00:03<01:22,  2.33it/s]

Evaluating:   4%|▍         | 9/200 [00:03<01:21,  2.33it/s]

Evaluating:   5%|▌         | 10/200 [00:04<01:21,  2.33it/s]

Evaluating:   6%|▌         | 11/200 [00:04<01:20,  2.33it/s]

Evaluating:   6%|▌         | 12/200 [00:05<01:20,  2.33it/s]

Evaluating:   6%|▋         | 13/200 [00:05<01:20,  2.33it/s]

Evaluating:   7%|▋         | 14/200 [00:06<01:19,  2.33it/s]

Evaluating:   8%|▊         | 15/200 [00:06<01:19,  2.33it/s]

Evaluating:   8%|▊         | 16/200 [00:06<01:19,  2.32it/s]

Evaluating:   8%|▊         | 17/200 [00:07<01:18,  2.32it/s]

Evaluating:   9%|▉         | 18/200 [00:07<01:18,  2.32it/s]

Evaluating:  10%|▉         | 19/200 [00:08<01:18,  2.32it/s]

Evaluating:  10%|█         | 20/200 [00:08<01:17,  2.32it/s]

Evaluating:  10%|█         | 21/200 [00:09<01:17,  2.31it/s]

Evaluating:  11%|█         | 22/200 [00:09<01:17,  2.29it/s]

Evaluating:  12%|█▏        | 23/200 [00:09<01:18,  2.24it/s]

Evaluating:  12%|█▏        | 24/200 [00:10<01:20,  2.20it/s]

Evaluating:  12%|█▎        | 25/200 [00:10<01:22,  2.12it/s]

Evaluating:  13%|█▎        | 26/200 [00:11<01:26,  2.02it/s]

Evaluating:  14%|█▎        | 27/200 [00:12<01:29,  1.93it/s]

Evaluating:  14%|█▍        | 28/200 [00:12<01:33,  1.84it/s]

Evaluating:  14%|█▍        | 29/200 [00:13<01:37,  1.75it/s]

Evaluating:  15%|█▌        | 30/200 [00:13<01:42,  1.67it/s]

Evaluating:  16%|█▌        | 31/200 [00:14<01:45,  1.60it/s]

Evaluating:  16%|█▌        | 32/200 [00:15<01:47,  1.56it/s]

Evaluating:  16%|█▋        | 33/200 [00:16<01:48,  1.53it/s]

Evaluating:  17%|█▋        | 34/200 [00:16<01:51,  1.49it/s]

Evaluating:  18%|█▊        | 35/200 [00:17<01:54,  1.45it/s]

Evaluating:  18%|█▊        | 36/200 [00:18<01:56,  1.40it/s]

Evaluating:  18%|█▊        | 37/200 [00:19<02:00,  1.35it/s]

Evaluating:  19%|█▉        | 38/200 [00:19<02:03,  1.31it/s]

Evaluating:  20%|█▉        | 39/200 [00:20<02:04,  1.29it/s]

Evaluating:  20%|██        | 40/200 [00:21<02:09,  1.23it/s]

Evaluating:  20%|██        | 41/200 [00:22<02:17,  1.15it/s]

Evaluating:  21%|██        | 42/200 [00:23<02:22,  1.11it/s]

Evaluating:  22%|██▏       | 43/200 [00:24<02:25,  1.08it/s]

Evaluating:  22%|██▏       | 44/200 [00:25<02:27,  1.06it/s]

Evaluating:  22%|██▎       | 45/200 [00:26<02:28,  1.04it/s]

Evaluating:  23%|██▎       | 46/200 [00:27<02:28,  1.04it/s]

Evaluating:  24%|██▎       | 47/200 [00:28<02:28,  1.03it/s]

Evaluating:  24%|██▍       | 48/200 [00:29<02:28,  1.03it/s]

Evaluating:  24%|██▍       | 49/200 [00:30<02:27,  1.02it/s]

Evaluating:  25%|██▌       | 50/200 [00:31<02:27,  1.02it/s]

Evaluating:  26%|██▌       | 51/200 [00:32<02:26,  1.02it/s]

Evaluating:  26%|██▌       | 52/200 [00:33<02:25,  1.02it/s]

Evaluating:  26%|██▋       | 53/200 [00:34<02:24,  1.02it/s]

Evaluating:  27%|██▋       | 54/200 [00:35<02:23,  1.02it/s]

Evaluating:  28%|██▊       | 55/200 [00:36<02:21,  1.02it/s]

Evaluating:  28%|██▊       | 56/200 [00:37<02:21,  1.02it/s]

Evaluating:  28%|██▊       | 57/200 [00:38<02:20,  1.02it/s]

Evaluating:  29%|██▉       | 58/200 [00:39<02:19,  1.02it/s]

Evaluating:  30%|██▉       | 59/200 [00:40<02:17,  1.02it/s]

Evaluating:  30%|███       | 60/200 [00:41<02:16,  1.02it/s]

Evaluating:  30%|███       | 61/200 [00:42<02:16,  1.02it/s]

Evaluating:  31%|███       | 62/200 [00:43<02:15,  1.02it/s]

Evaluating:  32%|███▏      | 63/200 [00:44<02:11,  1.04it/s]

Evaluating:  32%|███▏      | 64/200 [00:45<02:08,  1.06it/s]

Evaluating:  32%|███▎      | 65/200 [00:45<02:06,  1.07it/s]

Evaluating:  33%|███▎      | 66/200 [00:46<02:04,  1.08it/s]

Evaluating:  34%|███▎      | 67/200 [00:47<02:02,  1.08it/s]

Evaluating:  34%|███▍      | 68/200 [00:48<02:01,  1.08it/s]

Evaluating:  34%|███▍      | 69/200 [00:49<02:00,  1.09it/s]

Evaluating:  35%|███▌      | 70/200 [00:50<01:59,  1.09it/s]

Evaluating:  36%|███▌      | 71/200 [00:51<01:57,  1.09it/s]

Evaluating:  36%|███▌      | 72/200 [00:52<01:56,  1.09it/s]

Evaluating:  36%|███▋      | 73/200 [00:53<01:55,  1.10it/s]

Evaluating:  37%|███▋      | 74/200 [00:54<01:55,  1.10it/s]

Evaluating:  38%|███▊      | 75/200 [00:55<01:54,  1.10it/s]

Evaluating:  38%|███▊      | 76/200 [00:55<01:53,  1.10it/s]

Evaluating:  38%|███▊      | 77/200 [00:56<01:52,  1.09it/s]

Evaluating:  39%|███▉      | 78/200 [00:57<01:51,  1.09it/s]

Evaluating:  40%|███▉      | 79/200 [00:58<01:50,  1.09it/s]

Evaluating:  40%|████      | 80/200 [00:59<01:49,  1.09it/s]

Evaluating:  40%|████      | 81/200 [01:00<01:48,  1.09it/s]

Evaluating:  41%|████      | 82/200 [01:01<01:47,  1.10it/s]

Evaluating:  42%|████▏     | 83/200 [01:02<01:46,  1.10it/s]

Evaluating:  42%|████▏     | 84/200 [01:03<01:45,  1.10it/s]

Evaluating:  42%|████▎     | 85/200 [01:04<01:45,  1.09it/s]

Evaluating:  43%|████▎     | 86/200 [01:05<01:44,  1.10it/s]

Evaluating:  44%|████▎     | 87/200 [01:06<01:43,  1.10it/s]

Evaluating:  44%|████▍     | 88/200 [01:06<01:42,  1.09it/s]

Evaluating:  44%|████▍     | 89/200 [01:07<01:41,  1.09it/s]

Evaluating:  45%|████▌     | 90/200 [01:08<01:40,  1.09it/s]

Evaluating:  46%|████▌     | 91/200 [01:09<01:39,  1.10it/s]

Evaluating:  46%|████▌     | 92/200 [01:10<01:38,  1.10it/s]

Evaluating:  46%|████▋     | 93/200 [01:11<01:37,  1.10it/s]

Evaluating:  47%|████▋     | 94/200 [01:12<01:36,  1.09it/s]

Evaluating:  48%|████▊     | 95/200 [01:13<01:35,  1.10it/s]

Evaluating:  48%|████▊     | 96/200 [01:14<01:35,  1.09it/s]

Evaluating:  48%|████▊     | 97/200 [01:15<01:34,  1.09it/s]

Evaluating:  49%|████▉     | 98/200 [01:16<01:33,  1.09it/s]

Evaluating:  50%|████▉     | 99/200 [01:16<01:32,  1.09it/s]

Evaluating:  50%|█████     | 100/200 [01:17<01:31,  1.09it/s]

Evaluating:  50%|█████     | 101/200 [01:18<01:30,  1.10it/s]

Evaluating:  51%|█████     | 102/200 [01:19<01:29,  1.09it/s]

Evaluating:  52%|█████▏    | 103/200 [01:20<01:28,  1.09it/s]

Evaluating:  52%|█████▏    | 104/200 [01:21<01:27,  1.09it/s]

Evaluating:  52%|█████▎    | 105/200 [01:22<01:26,  1.09it/s]

Evaluating:  53%|█████▎    | 106/200 [01:23<01:26,  1.09it/s]

Evaluating:  54%|█████▎    | 107/200 [01:24<01:25,  1.09it/s]

Evaluating:  54%|█████▍    | 108/200 [01:25<01:24,  1.09it/s]

Evaluating:  55%|█████▍    | 109/200 [01:26<01:23,  1.09it/s]

Evaluating:  55%|█████▌    | 110/200 [01:27<01:22,  1.09it/s]

Evaluating:  56%|█████▌    | 111/200 [01:27<01:21,  1.09it/s]

Evaluating:  56%|█████▌    | 112/200 [01:28<01:20,  1.09it/s]

Evaluating:  56%|█████▋    | 113/200 [01:29<01:19,  1.09it/s]

Evaluating:  57%|█████▋    | 114/200 [01:30<01:18,  1.09it/s]

Evaluating:  57%|█████▊    | 115/200 [01:31<01:17,  1.09it/s]

Evaluating:  58%|█████▊    | 116/200 [01:32<01:16,  1.09it/s]

Evaluating:  58%|█████▊    | 117/200 [01:33<01:15,  1.09it/s]

Evaluating:  59%|█████▉    | 118/200 [01:34<01:14,  1.09it/s]

Evaluating:  60%|█████▉    | 119/200 [01:35<01:14,  1.09it/s]

Evaluating:  60%|██████    | 120/200 [01:36<01:13,  1.09it/s]

Evaluating:  60%|██████    | 121/200 [01:37<01:12,  1.09it/s]

Evaluating:  61%|██████    | 122/200 [01:38<01:11,  1.09it/s]

Evaluating:  62%|██████▏   | 123/200 [01:38<01:10,  1.09it/s]

Evaluating:  62%|██████▏   | 124/200 [01:39<01:09,  1.09it/s]

Evaluating:  62%|██████▎   | 125/200 [01:40<01:08,  1.09it/s]

Evaluating:  63%|██████▎   | 126/200 [01:41<01:07,  1.09it/s]

Evaluating:  64%|██████▎   | 127/200 [01:42<01:06,  1.09it/s]

Evaluating:  64%|██████▍   | 128/200 [01:43<01:06,  1.09it/s]

Evaluating:  64%|██████▍   | 129/200 [01:44<01:05,  1.09it/s]

Evaluating:  65%|██████▌   | 130/200 [01:45<01:04,  1.09it/s]

Evaluating:  66%|██████▌   | 131/200 [01:46<01:03,  1.09it/s]

Evaluating:  66%|██████▌   | 132/200 [01:47<01:02,  1.09it/s]

Evaluating:  66%|██████▋   | 133/200 [01:48<01:01,  1.09it/s]

Evaluating:  67%|██████▋   | 134/200 [01:49<01:00,  1.09it/s]

Evaluating:  68%|██████▊   | 135/200 [01:49<00:59,  1.09it/s]

Evaluating:  68%|██████▊   | 136/200 [01:50<00:58,  1.09it/s]

Evaluating:  68%|██████▊   | 137/200 [01:51<00:58,  1.09it/s]

Evaluating:  69%|██████▉   | 138/200 [01:52<00:57,  1.09it/s]

Evaluating:  70%|██████▉   | 139/200 [01:53<00:56,  1.09it/s]

Evaluating:  70%|███████   | 140/200 [01:54<00:55,  1.09it/s]

Evaluating:  70%|███████   | 141/200 [01:55<00:54,  1.09it/s]

Evaluating:  71%|███████   | 142/200 [01:56<00:53,  1.09it/s]

Evaluating:  72%|███████▏  | 143/200 [01:57<00:52,  1.09it/s]

Evaluating:  72%|███████▏  | 144/200 [01:58<00:51,  1.09it/s]

Evaluating:  72%|███████▎  | 145/200 [01:59<00:50,  1.09it/s]

Evaluating:  73%|███████▎  | 146/200 [02:00<00:49,  1.09it/s]

Evaluating:  74%|███████▎  | 147/200 [02:00<00:48,  1.09it/s]

Evaluating:  74%|███████▍  | 148/200 [02:01<00:47,  1.09it/s]

Evaluating:  74%|███████▍  | 149/200 [02:02<00:46,  1.09it/s]

Evaluating:  75%|███████▌  | 150/200 [02:03<00:45,  1.09it/s]

Evaluating:  76%|███████▌  | 151/200 [02:04<00:44,  1.09it/s]

Evaluating:  76%|███████▌  | 152/200 [02:05<00:43,  1.09it/s]

Evaluating:  76%|███████▋  | 153/200 [02:06<00:43,  1.09it/s]

Evaluating:  77%|███████▋  | 154/200 [02:07<00:42,  1.09it/s]

Evaluating:  78%|███████▊  | 155/200 [02:08<00:41,  1.09it/s]

Evaluating:  78%|███████▊  | 156/200 [02:09<00:40,  1.09it/s]

Evaluating:  78%|███████▊  | 157/200 [02:10<00:39,  1.09it/s]

Evaluating:  79%|███████▉  | 158/200 [02:11<00:38,  1.09it/s]

Evaluating:  80%|███████▉  | 159/200 [02:11<00:37,  1.09it/s]

Evaluating:  80%|████████  | 160/200 [02:12<00:36,  1.09it/s]

Evaluating:  80%|████████  | 161/200 [02:13<00:35,  1.09it/s]

Evaluating:  81%|████████  | 162/200 [02:14<00:34,  1.09it/s]

Evaluating:  82%|████████▏ | 163/200 [02:15<00:33,  1.09it/s]

Evaluating:  82%|████████▏ | 164/200 [02:16<00:32,  1.09it/s]

Evaluating:  82%|████████▎ | 165/200 [02:17<00:32,  1.09it/s]

Evaluating:  83%|████████▎ | 166/200 [02:18<00:31,  1.09it/s]

Evaluating:  84%|████████▎ | 167/200 [02:19<00:30,  1.09it/s]

Evaluating:  84%|████████▍ | 168/200 [02:20<00:29,  1.09it/s]

Evaluating:  84%|████████▍ | 169/200 [02:21<00:28,  1.09it/s]

Evaluating:  85%|████████▌ | 170/200 [02:22<00:27,  1.09it/s]

Evaluating:  86%|████████▌ | 171/200 [02:22<00:26,  1.09it/s]

Evaluating:  86%|████████▌ | 172/200 [02:23<00:25,  1.09it/s]

Evaluating:  86%|████████▋ | 173/200 [02:24<00:24,  1.09it/s]

Evaluating:  87%|████████▋ | 174/200 [02:25<00:23,  1.09it/s]

Evaluating:  88%|████████▊ | 175/200 [02:26<00:22,  1.09it/s]

Evaluating:  88%|████████▊ | 176/200 [02:27<00:21,  1.09it/s]

Evaluating:  88%|████████▊ | 177/200 [02:28<00:21,  1.09it/s]

Evaluating:  89%|████████▉ | 178/200 [02:29<00:20,  1.09it/s]

Evaluating:  90%|████████▉ | 179/200 [02:30<00:19,  1.09it/s]

Evaluating:  90%|█████████ | 180/200 [02:31<00:18,  1.09it/s]

Evaluating:  90%|█████████ | 181/200 [02:32<00:17,  1.08it/s]

Evaluating:  91%|█████████ | 182/200 [02:33<00:16,  1.08it/s]

Evaluating:  92%|█████████▏| 183/200 [02:34<00:15,  1.08it/s]

Evaluating:  92%|█████████▏| 184/200 [02:34<00:14,  1.08it/s]

Evaluating:  92%|█████████▎| 185/200 [02:35<00:13,  1.07it/s]

Evaluating:  93%|█████████▎| 186/200 [02:36<00:12,  1.08it/s]

Evaluating:  94%|█████████▎| 187/200 [02:37<00:12,  1.07it/s]

Evaluating:  94%|█████████▍| 188/200 [02:38<00:11,  1.08it/s]

Evaluating:  94%|█████████▍| 189/200 [02:39<00:10,  1.07it/s]

Evaluating:  95%|█████████▌| 190/200 [02:40<00:09,  1.08it/s]

Evaluating:  96%|█████████▌| 191/200 [02:41<00:08,  1.08it/s]

Evaluating:  96%|█████████▌| 192/200 [02:42<00:07,  1.08it/s]

Evaluating:  96%|█████████▋| 193/200 [02:43<00:06,  1.08it/s]

Evaluating:  97%|█████████▋| 194/200 [02:44<00:05,  1.08it/s]

Evaluating:  98%|█████████▊| 195/200 [02:45<00:04,  1.08it/s]

Evaluating:  98%|█████████▊| 196/200 [02:46<00:03,  1.08it/s]

Evaluating:  98%|█████████▊| 197/200 [02:47<00:02,  1.08it/s]

Evaluating:  99%|█████████▉| 198/200 [02:47<00:01,  1.08it/s]

Evaluating: 100%|█████████▉| 199/200 [02:48<00:00,  1.08it/s]

Evaluating: 100%|██████████| 200/200 [02:49<00:00,  1.12it/s]

Evaluating: 100%|██████████| 200/200 [02:49<00:00,  1.18it/s]




Loss: 0.9330




Precision: 0.7677, Recall: 0.7693, F1-Score: 0.7639




              precision    recall  f1-score   support

           0       0.75      0.63      0.68       797
           1       0.83      0.69      0.75       775
           2       0.87      0.87      0.87       795
           3       0.87      0.81      0.84      1110
           4       0.83      0.81      0.82      1260
           5       0.90      0.67      0.77       882
           6       0.85      0.78      0.81       940
           7       0.46      0.56      0.51       473
           8       0.66      0.84      0.74       746
           9       0.55      0.73      0.62       689
          10       0.74      0.78      0.76       670
          11       0.65      0.77      0.71       312
          12       0.68      0.80      0.73       665
          13       0.83      0.85      0.84       314
          14       0.86      0.77      0.81       756
          15       0.96      0.96      0.96      1607

    accuracy                           0.79     12791
   macro avg       0.77   




In [11]:
for concern in range(num_labels):
    print(f"--{concern}--")
    positive_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, True, 4, device=device, resample=False, seed=seed
    )
    negative_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
    )
    similar(model, module, valid_dataloader, concern, num_samples, num_labels, device=device, seed=seed)

--0--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7285056283563428, 0.7285056283563428)




CCA coefficients mean non-concern: (0.7353052360545831, 0.7353052360545831)




Linear CKA concern: 0.9345302639110677




Linear CKA non-concern: 0.883297852154053




Kernel CKA concern: 0.9235366288868616




Kernel CKA non-concern: 0.8954454060634802




--1--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7323923344947308, 0.7323923344947308)




CCA coefficients mean non-concern: (0.7352961071473088, 0.7352961071473088)




Linear CKA concern: 0.9045104065632273




Linear CKA non-concern: 0.8783157982773925




Kernel CKA concern: 0.9016595686115412




Kernel CKA non-concern: 0.8891634792360046




--2--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7330674425982607, 0.7330674425982607)




CCA coefficients mean non-concern: (0.7340070019457844, 0.7340070019457844)




Linear CKA concern: 0.9346781896946136




Linear CKA non-concern: 0.8785966992121998




Kernel CKA concern: 0.9217603795840971




Kernel CKA non-concern: 0.8899704957816229




--3--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7273449550678598, 0.7273449550678598)




CCA coefficients mean non-concern: (0.734171556185653, 0.734171556185653)




Linear CKA concern: 0.8938954392502179




Linear CKA non-concern: 0.8815749919305922




Kernel CKA concern: 0.8882730925286755




Kernel CKA non-concern: 0.8936822666638385




--4--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7373758129283947, 0.7373758129283947)




CCA coefficients mean non-concern: (0.7343500042413817, 0.7343500042413817)




Linear CKA concern: 0.9232349962221302




Linear CKA non-concern: 0.8793649607266539




Kernel CKA concern: 0.9156871707833648




Kernel CKA non-concern: 0.8921738021995524




--5--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7217193669871398, 0.7217193669871398)




CCA coefficients mean non-concern: (0.736441323367012, 0.736441323367012)




Linear CKA concern: 0.8604347302912694




Linear CKA non-concern: 0.8860130473353919




Kernel CKA concern: 0.8456679243927164




Kernel CKA non-concern: 0.898019247181131




--6--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7233652264756902, 0.7233652264756902)




CCA coefficients mean non-concern: (0.7365184599701875, 0.7365184599701875)




Linear CKA concern: 0.8976937815532063




Linear CKA non-concern: 0.8780576676210678




Kernel CKA concern: 0.8838517813601204




Kernel CKA non-concern: 0.8929871118405062




--7--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7341591582711211, 0.7341591582711211)




CCA coefficients mean non-concern: (0.7349998045999129, 0.7349998045999129)




Linear CKA concern: 0.8977401894298733




Linear CKA non-concern: 0.8836451595964606




Kernel CKA concern: 0.89287354069375




Kernel CKA non-concern: 0.8956039631488719




--8--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7329502398942048, 0.7329502398942048)




CCA coefficients mean non-concern: (0.7353869897857245, 0.7353869897857245)




Linear CKA concern: 0.9001816548586681




Linear CKA non-concern: 0.881411587206041




Kernel CKA concern: 0.8932262542298794




Kernel CKA non-concern: 0.8943022347267859




--9--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7343479327931742, 0.7343479327931742)




CCA coefficients mean non-concern: (0.7346789740284035, 0.7346789740284035)




Linear CKA concern: 0.9315278647936174




Linear CKA non-concern: 0.877817323975794




Kernel CKA concern: 0.9156049564959229




Kernel CKA non-concern: 0.8916577631314471




--10--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7335469994713374, 0.7335469994713374)




CCA coefficients mean non-concern: (0.736391121887931, 0.736391121887931)




Linear CKA concern: 0.9294746918819645




Linear CKA non-concern: 0.8774448671963261




Kernel CKA concern: 0.9158511575997236




Kernel CKA non-concern: 0.8925626062992527




--11--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7314250481734472, 0.7314250481734472)




CCA coefficients mean non-concern: (0.7360962105443595, 0.7360962105443595)




Linear CKA concern: 0.9150959753081004




Linear CKA non-concern: 0.8825899608273275




Kernel CKA concern: 0.9021073792036716




Kernel CKA non-concern: 0.8954907914547784




--12--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7291819603860934, 0.7291819603860934)




CCA coefficients mean non-concern: (0.7368749510977878, 0.7368749510977878)




Linear CKA concern: 0.9180888327535247




Linear CKA non-concern: 0.8830111277269803




Kernel CKA concern: 0.9081438386715689




Kernel CKA non-concern: 0.8963670651086021




--13--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7383323692120793, 0.7383323692120793)




CCA coefficients mean non-concern: (0.7347066920161509, 0.7347066920161509)




Linear CKA concern: 0.917274520488022




Linear CKA non-concern: 0.8821898118866105




Kernel CKA concern: 0.9044932563740762




Kernel CKA non-concern: 0.8933623929452977




--14--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.736909841212441, 0.736909841212441)




CCA coefficients mean non-concern: (0.7346249933817416, 0.7346249933817416)




Linear CKA concern: 0.9173584120401286




Linear CKA non-concern: 0.8811899916185567




Kernel CKA concern: 0.9039377177173292




Kernel CKA non-concern: 0.8943247391288349




--15--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.7238660792715249, 0.7238660792715249)




CCA coefficients mean non-concern: (0.7344402069893445, 0.7344402069893445)




Linear CKA concern: 0.8633578288160105




Linear CKA non-concern: 0.8838539310545878




Kernel CKA concern: 0.8510329635598497




Kernel CKA non-concern: 0.8960912046038546




In [12]:
get_sparsity(module)

(0.4959948842155738,
 {'bert.encoder.layer.0.attention.self.query.weight': 0.5,
  'bert.encoder.layer.0.attention.self.query.bias': 0.0,
  'bert.encoder.layer.0.attention.self.key.weight': 0.5,
  'bert.encoder.layer.0.attention.self.key.bias': 0.0,
  'bert.encoder.layer.0.attention.self.value.weight': 0.5,
  'bert.encoder.layer.0.attention.self.value.bias': 0.0,
  'bert.encoder.layer.0.attention.output.dense.weight': 0.5,
  'bert.encoder.layer.0.attention.output.dense.bias': 0.0,
  'bert.encoder.layer.0.intermediate.dense.weight': 0.5,
  'bert.encoder.layer.0.intermediate.dense.bias': 0.0,
  'bert.encoder.layer.0.output.dense.weight': 0.5,
  'bert.encoder.layer.0.output.dense.bias': 0.0,
  'bert.encoder.layer.1.attention.self.query.weight': 0.5,
  'bert.encoder.layer.1.attention.self.query.bias': 0.0,
  'bert.encoder.layer.1.attention.self.key.weight': 0.5,
  'bert.encoder.layer.1.attention.self.key.bias': 0.0,
  'bert.encoder.layer.1.attention.self.value.weight': 0.5,
  'bert.encoder.