In [1]:
import os
import sys
sys.path.append("../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from utils.helper import ModelConfig, color_print
from utils.dataset_utils.load_dataset import (
    load_data,
)
from utils.model_utils.save_module import save_module
from utils.model_utils.load_model import load_model
from utils.model_utils.evaluate import evaluate_model, get_sparsity, similar
from utils.dataset_utils.sampling import SamplingDataset
from utils.prune_utils.prune import (
    prune_wanda
)

In [3]:
name= "OSDG"
device = torch.device("cuda:1")
checkpoint = None
batch_size=32
num_workers=48
num_samples=16
wanda_ratio=0.6
seed=44
include_layers=["attention", "intermediate", "output"]
exclude_layers=None

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-08-19 14:52:52


In [5]:
model_config = ModelConfig(name, device)
num_labels = model_config.config["num_labels"]
model, tokenizer, checkpoint = load_model(model_config)

Loading the model.




{'model_name': 'sadickam/sdg-classification-bert', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'OSDG', 'num_labels': 16, 'cache_dir': 'Models'}




The model sadickam/sdg-classification-bert is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    name, batch_size=batch_size, num_workers=num_workers, seed=seed
)

{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




In [7]:
all_samples = SamplingDataset(
    train_dataloader, 200, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
)

In [8]:
# print("Evaluate the original model")
# result = evaluate_model(model, model_config, test_dataloader)

In [9]:
# Evaluate the original model
# Evaluating: 100%|█████████████████████████████████████████████████████████████████████| 200/200 [03:16<00:00,  1.02it/s]
# Loss: 0.9485
# Precision: 0.7801, Recall: 0.7867, F1-Score: 0.7793
#               precision    recall  f1-score   support

#            0       0.77      0.66      0.71       797
#            1       0.84      0.72      0.78       775
#            2       0.88      0.87      0.88       795
#            3       0.87      0.83      0.85      1110
#            4       0.86      0.80      0.83      1260
#            5       0.88      0.69      0.77       882
#            6       0.85      0.80      0.83       940
#            7       0.49      0.61      0.54       473
#            8       0.66      0.85      0.74       746
#            9       0.62      0.73      0.67       689
#           10       0.75      0.79      0.77       670
#           11       0.62      0.81      0.70       312
#           12       0.73      0.81      0.77       665
#           13       0.83      0.85      0.84       314
#           14       0.85      0.78      0.81       756
#           15       0.97      0.98      0.97      1607

#     accuracy                           0.80     12791
#    macro avg       0.78      0.79      0.78     12791
# weighted avg       0.81      0.80      0.80     12791

In [10]:
module = copy.deepcopy(model)
prune_wanda(module, model_config, all_samples, sparsity_ratio=wanda_ratio, include_layers=include_layers, exclude_layers=exclude_layers)
print("Evaluate the pruned model")
result = evaluate_model(module, model_config, test_dataloader)
# save_module(module, "Modules/", f"wanda_{name}_{wanda_ratio}p.pt")

Evaluate the pruned model




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:41,  1.96it/s]

Evaluating:   1%|          | 2/200 [00:00<01:30,  2.18it/s]

Evaluating:   2%|▏         | 3/200 [00:01<01:27,  2.26it/s]

Evaluating:   2%|▏         | 4/200 [00:01<01:25,  2.29it/s]

Evaluating:   2%|▎         | 5/200 [00:02<01:24,  2.31it/s]

Evaluating:   3%|▎         | 6/200 [00:02<01:23,  2.33it/s]

Evaluating:   4%|▎         | 7/200 [00:03<01:22,  2.33it/s]

Evaluating:   4%|▍         | 8/200 [00:03<01:22,  2.34it/s]

Evaluating:   4%|▍         | 9/200 [00:03<01:21,  2.34it/s]

Evaluating:   5%|▌         | 10/200 [00:04<01:21,  2.34it/s]

Evaluating:   6%|▌         | 11/200 [00:04<01:20,  2.34it/s]

Evaluating:   6%|▌         | 12/200 [00:05<01:20,  2.34it/s]

Evaluating:   6%|▋         | 13/200 [00:05<01:20,  2.34it/s]

Evaluating:   7%|▋         | 14/200 [00:06<01:19,  2.33it/s]

Evaluating:   8%|▊         | 15/200 [00:06<01:19,  2.33it/s]

Evaluating:   8%|▊         | 16/200 [00:06<01:18,  2.33it/s]

Evaluating:   8%|▊         | 17/200 [00:07<01:18,  2.33it/s]

Evaluating:   9%|▉         | 18/200 [00:07<01:18,  2.33it/s]

Evaluating:  10%|▉         | 19/200 [00:08<01:17,  2.33it/s]

Evaluating:  10%|█         | 20/200 [00:08<01:17,  2.32it/s]

Evaluating:  10%|█         | 21/200 [00:09<01:17,  2.32it/s]

Evaluating:  11%|█         | 22/200 [00:09<01:16,  2.32it/s]

Evaluating:  12%|█▏        | 23/200 [00:09<01:16,  2.31it/s]

Evaluating:  12%|█▏        | 24/200 [00:10<01:16,  2.29it/s]

Evaluating:  12%|█▎        | 25/200 [00:10<01:17,  2.26it/s]

Evaluating:  13%|█▎        | 26/200 [00:11<01:19,  2.18it/s]

Evaluating:  14%|█▎        | 27/200 [00:11<01:22,  2.10it/s]

Evaluating:  14%|█▍        | 28/200 [00:12<01:24,  2.03it/s]

Evaluating:  14%|█▍        | 29/200 [00:12<01:27,  1.96it/s]

Evaluating:  15%|█▌        | 30/200 [00:13<01:31,  1.86it/s]

Evaluating:  16%|█▌        | 31/200 [00:14<01:35,  1.77it/s]

Evaluating:  16%|█▌        | 32/200 [00:14<01:39,  1.69it/s]

Evaluating:  16%|█▋        | 33/200 [00:15<01:42,  1.63it/s]

Evaluating:  17%|█▋        | 34/200 [00:16<01:44,  1.58it/s]

Evaluating:  18%|█▊        | 35/200 [00:16<01:47,  1.54it/s]

Evaluating:  18%|█▊        | 36/200 [00:17<01:49,  1.49it/s]

Evaluating:  18%|█▊        | 37/200 [00:18<01:54,  1.42it/s]

Evaluating:  19%|█▉        | 38/200 [00:19<01:57,  1.38it/s]

Evaluating:  20%|█▉        | 39/200 [00:19<01:59,  1.35it/s]

Evaluating:  20%|██        | 40/200 [00:20<02:00,  1.33it/s]

Evaluating:  20%|██        | 41/200 [00:21<02:00,  1.32it/s]

Evaluating:  21%|██        | 42/200 [00:22<02:00,  1.31it/s]

Evaluating:  22%|██▏       | 43/200 [00:23<02:02,  1.28it/s]

Evaluating:  22%|██▏       | 44/200 [00:23<02:05,  1.24it/s]

Evaluating:  22%|██▎       | 45/200 [00:24<02:07,  1.21it/s]

Evaluating:  23%|██▎       | 46/200 [00:25<02:08,  1.20it/s]

Evaluating:  24%|██▎       | 47/200 [00:26<02:09,  1.18it/s]

Evaluating:  24%|██▍       | 48/200 [00:27<02:09,  1.17it/s]

Evaluating:  24%|██▍       | 49/200 [00:28<02:09,  1.17it/s]

Evaluating:  25%|██▌       | 50/200 [00:29<02:08,  1.16it/s]

Evaluating:  26%|██▌       | 51/200 [00:29<02:08,  1.16it/s]

Evaluating:  26%|██▌       | 52/200 [00:30<02:07,  1.16it/s]

Evaluating:  26%|██▋       | 53/200 [00:31<02:06,  1.16it/s]

Evaluating:  27%|██▋       | 54/200 [00:32<02:06,  1.16it/s]

Evaluating:  28%|██▊       | 55/200 [00:33<02:05,  1.16it/s]

Evaluating:  28%|██▊       | 56/200 [00:34<02:04,  1.15it/s]

Evaluating:  28%|██▊       | 57/200 [00:35<02:03,  1.15it/s]

Evaluating:  29%|██▉       | 58/200 [00:36<02:03,  1.15it/s]

Evaluating:  30%|██▉       | 59/200 [00:36<02:02,  1.15it/s]

Evaluating:  30%|███       | 60/200 [00:37<02:01,  1.15it/s]

Evaluating:  30%|███       | 61/200 [00:38<02:00,  1.15it/s]

Evaluating:  31%|███       | 62/200 [00:39<01:59,  1.15it/s]

Evaluating:  32%|███▏      | 63/200 [00:40<01:58,  1.15it/s]

Evaluating:  32%|███▏      | 64/200 [00:41<01:58,  1.15it/s]

Evaluating:  32%|███▎      | 65/200 [00:42<01:57,  1.15it/s]

Evaluating:  33%|███▎      | 66/200 [00:43<01:59,  1.12it/s]

Evaluating:  34%|███▎      | 67/200 [00:44<02:00,  1.10it/s]

Evaluating:  34%|███▍      | 68/200 [00:44<02:00,  1.09it/s]

Evaluating:  34%|███▍      | 69/200 [00:45<02:00,  1.09it/s]

Evaluating:  35%|███▌      | 70/200 [00:46<02:00,  1.08it/s]

Evaluating:  36%|███▌      | 71/200 [00:47<02:04,  1.04it/s]

Evaluating:  36%|███▌      | 72/200 [00:49<02:23,  1.12s/it]

Evaluating:  36%|███▋      | 73/200 [00:50<02:35,  1.23s/it]

Evaluating:  37%|███▋      | 74/200 [00:52<02:47,  1.33s/it]

Evaluating:  38%|███▊      | 75/200 [00:53<02:56,  1.41s/it]

Evaluating:  38%|███▊      | 76/200 [00:55<03:03,  1.48s/it]

Evaluating:  38%|███▊      | 77/200 [00:57<03:12,  1.56s/it]

Evaluating:  39%|███▉      | 78/200 [00:59<03:17,  1.62s/it]

Evaluating:  40%|███▉      | 79/200 [01:00<03:21,  1.66s/it]

Evaluating:  40%|████      | 80/200 [01:02<03:22,  1.68s/it]

Evaluating:  40%|████      | 81/200 [01:04<03:23,  1.71s/it]

Evaluating:  41%|████      | 82/200 [01:06<03:22,  1.72s/it]

Evaluating:  42%|████▏     | 83/200 [01:07<03:22,  1.73s/it]

Evaluating:  42%|████▏     | 84/200 [01:09<03:21,  1.74s/it]

Evaluating:  42%|████▎     | 85/200 [01:11<03:19,  1.74s/it]

Evaluating:  43%|████▎     | 86/200 [01:13<03:18,  1.74s/it]

Evaluating:  44%|████▎     | 87/200 [01:14<03:16,  1.74s/it]

Evaluating:  44%|████▍     | 88/200 [01:16<03:15,  1.75s/it]

Evaluating:  44%|████▍     | 89/200 [01:18<03:13,  1.75s/it]

Evaluating:  45%|████▌     | 90/200 [01:20<03:11,  1.74s/it]

Evaluating:  46%|████▌     | 91/200 [01:21<03:09,  1.74s/it]

Evaluating:  46%|████▌     | 92/200 [01:23<03:08,  1.74s/it]

Evaluating:  46%|████▋     | 93/200 [01:25<03:06,  1.74s/it]

Evaluating:  47%|████▋     | 94/200 [01:27<03:04,  1.74s/it]

Evaluating:  48%|████▊     | 95/200 [01:28<03:03,  1.75s/it]

Evaluating:  48%|████▊     | 96/200 [01:30<03:01,  1.75s/it]

Evaluating:  48%|████▊     | 97/200 [01:32<02:59,  1.74s/it]

Evaluating:  49%|████▉     | 98/200 [01:34<02:58,  1.75s/it]

Evaluating:  50%|████▉     | 99/200 [01:35<02:56,  1.74s/it]

Evaluating:  50%|█████     | 100/200 [01:37<02:54,  1.74s/it]

Evaluating:  50%|█████     | 101/200 [01:39<02:52,  1.74s/it]

Evaluating:  51%|█████     | 102/200 [01:41<02:50,  1.74s/it]

Evaluating:  52%|█████▏    | 103/200 [01:42<02:48,  1.74s/it]

Evaluating:  52%|█████▏    | 104/200 [01:44<02:46,  1.73s/it]

Evaluating:  52%|█████▎    | 105/200 [01:46<02:45,  1.74s/it]

Evaluating:  53%|█████▎    | 106/200 [01:47<02:43,  1.74s/it]

Evaluating:  54%|█████▎    | 107/200 [01:49<02:41,  1.74s/it]

Evaluating:  54%|█████▍    | 108/200 [01:51<02:39,  1.73s/it]

Evaluating:  55%|█████▍    | 109/200 [01:53<02:37,  1.74s/it]

Evaluating:  55%|█████▌    | 110/200 [01:54<02:36,  1.73s/it]

Evaluating:  56%|█████▌    | 111/200 [01:56<02:34,  1.73s/it]

Evaluating:  56%|█████▌    | 112/200 [01:58<02:32,  1.73s/it]

Evaluating:  56%|█████▋    | 113/200 [02:00<02:30,  1.73s/it]

Evaluating:  57%|█████▋    | 114/200 [02:01<02:29,  1.74s/it]

Evaluating:  57%|█████▊    | 115/200 [02:03<02:26,  1.73s/it]

Evaluating:  58%|█████▊    | 116/200 [02:04<02:08,  1.53s/it]

Evaluating:  58%|█████▊    | 117/200 [02:05<01:59,  1.44s/it]

Evaluating:  59%|█████▉    | 118/200 [02:06<01:46,  1.29s/it]

Evaluating:  60%|█████▉    | 119/200 [02:07<01:37,  1.20s/it]

Evaluating:  60%|██████    | 120/200 [02:08<01:30,  1.13s/it]

Evaluating:  60%|██████    | 121/200 [02:09<01:25,  1.08s/it]

Evaluating:  61%|██████    | 122/200 [02:10<01:21,  1.05s/it]

Evaluating:  62%|██████▏   | 123/200 [02:11<01:24,  1.10s/it]

Evaluating:  62%|██████▏   | 124/200 [02:12<01:20,  1.05s/it]

Evaluating:  62%|██████▎   | 125/200 [02:13<01:16,  1.03s/it]

Evaluating:  63%|██████▎   | 126/200 [02:14<01:14,  1.01s/it]

Evaluating:  64%|██████▎   | 127/200 [02:15<01:12,  1.01it/s]

Evaluating:  64%|██████▍   | 128/200 [02:16<01:13,  1.01s/it]

Evaluating:  64%|██████▍   | 129/200 [02:18<01:24,  1.18s/it]

Evaluating:  65%|██████▌   | 130/200 [02:19<01:31,  1.30s/it]

Evaluating:  66%|██████▌   | 131/200 [02:21<01:35,  1.39s/it]

Evaluating:  66%|██████▌   | 132/200 [02:23<01:37,  1.44s/it]

Evaluating:  66%|██████▋   | 133/200 [02:24<01:39,  1.49s/it]

Evaluating:  67%|██████▋   | 134/200 [02:26<01:40,  1.52s/it]

Evaluating:  68%|██████▊   | 135/200 [02:27<01:40,  1.54s/it]

Evaluating:  68%|██████▊   | 136/200 [02:29<01:39,  1.56s/it]

Evaluating:  68%|██████▊   | 137/200 [02:31<01:38,  1.56s/it]

Evaluating:  69%|██████▉   | 138/200 [02:32<01:37,  1.57s/it]

Evaluating:  70%|██████▉   | 139/200 [02:34<01:35,  1.57s/it]

Evaluating:  70%|███████   | 140/200 [02:35<01:34,  1.58s/it]

Evaluating:  70%|███████   | 141/200 [02:37<01:33,  1.58s/it]

Evaluating:  71%|███████   | 142/200 [02:38<01:31,  1.58s/it]

Evaluating:  72%|███████▏  | 143/200 [02:40<01:30,  1.58s/it]

Evaluating:  72%|███████▏  | 144/200 [02:42<01:29,  1.59s/it]

Evaluating:  72%|███████▎  | 145/200 [02:43<01:27,  1.60s/it]

Evaluating:  73%|███████▎  | 146/200 [02:45<01:26,  1.60s/it]

Evaluating:  74%|███████▎  | 147/200 [02:46<01:24,  1.60s/it]

Evaluating:  74%|███████▍  | 148/200 [02:48<01:23,  1.60s/it]

Evaluating:  74%|███████▍  | 149/200 [02:50<01:21,  1.60s/it]

Evaluating:  75%|███████▌  | 150/200 [02:51<01:20,  1.60s/it]

Evaluating:  76%|███████▌  | 151/200 [02:53<01:18,  1.60s/it]

Evaluating:  76%|███████▌  | 152/200 [02:55<01:17,  1.61s/it]

Evaluating:  76%|███████▋  | 153/200 [02:56<01:15,  1.61s/it]

Evaluating:  77%|███████▋  | 154/200 [02:58<01:13,  1.61s/it]

Evaluating:  78%|███████▊  | 155/200 [02:59<01:12,  1.60s/it]

Evaluating:  78%|███████▊  | 156/200 [03:01<01:10,  1.60s/it]

Evaluating:  78%|███████▊  | 157/200 [03:03<01:08,  1.60s/it]

Evaluating:  79%|███████▉  | 158/200 [03:04<01:07,  1.60s/it]

Evaluating:  80%|███████▉  | 159/200 [03:06<01:05,  1.60s/it]

Evaluating:  80%|████████  | 160/200 [03:07<01:03,  1.60s/it]

Evaluating:  80%|████████  | 161/200 [03:09<01:02,  1.60s/it]

Evaluating:  81%|████████  | 162/200 [03:11<01:00,  1.60s/it]

Evaluating:  82%|████████▏ | 163/200 [03:12<00:59,  1.60s/it]

Evaluating:  82%|████████▏ | 164/200 [03:14<00:57,  1.60s/it]

Evaluating:  82%|████████▎ | 165/200 [03:15<00:55,  1.59s/it]

Evaluating:  83%|████████▎ | 166/200 [03:17<00:54,  1.60s/it]

Evaluating:  84%|████████▎ | 167/200 [03:19<00:52,  1.60s/it]

Evaluating:  84%|████████▍ | 168/200 [03:20<00:51,  1.60s/it]

Evaluating:  84%|████████▍ | 169/200 [03:22<00:49,  1.60s/it]

Evaluating:  85%|████████▌ | 170/200 [03:23<00:48,  1.60s/it]

Evaluating:  86%|████████▌ | 171/200 [03:25<00:46,  1.60s/it]

Evaluating:  86%|████████▌ | 172/200 [03:27<00:44,  1.60s/it]

Evaluating:  86%|████████▋ | 173/200 [03:28<00:43,  1.60s/it]

Evaluating:  87%|████████▋ | 174/200 [03:30<00:41,  1.59s/it]

Evaluating:  88%|████████▊ | 175/200 [03:31<00:39,  1.59s/it]

Evaluating:  88%|████████▊ | 176/200 [03:33<00:38,  1.60s/it]

Evaluating:  88%|████████▊ | 177/200 [03:35<00:36,  1.60s/it]

Evaluating:  89%|████████▉ | 178/200 [03:36<00:35,  1.60s/it]

Evaluating:  90%|████████▉ | 179/200 [03:38<00:33,  1.59s/it]

Evaluating:  90%|█████████ | 180/200 [03:39<00:31,  1.60s/it]

Evaluating:  90%|█████████ | 181/200 [03:41<00:30,  1.60s/it]

Evaluating:  91%|█████████ | 182/200 [03:43<00:28,  1.60s/it]

Evaluating:  92%|█████████▏| 183/200 [03:44<00:27,  1.60s/it]

Evaluating:  92%|█████████▏| 184/200 [03:46<00:25,  1.60s/it]

Evaluating:  92%|█████████▎| 185/200 [03:47<00:23,  1.59s/it]

Evaluating:  93%|█████████▎| 186/200 [03:49<00:22,  1.60s/it]

Evaluating:  94%|█████████▎| 187/200 [03:50<00:20,  1.59s/it]

Evaluating:  94%|█████████▍| 188/200 [03:52<00:19,  1.59s/it]

Evaluating:  94%|█████████▍| 189/200 [03:54<00:17,  1.59s/it]

Evaluating:  95%|█████████▌| 190/200 [03:55<00:15,  1.59s/it]

Evaluating:  96%|█████████▌| 191/200 [03:57<00:14,  1.59s/it]

Evaluating:  96%|█████████▌| 192/200 [03:58<00:12,  1.60s/it]

Evaluating:  96%|█████████▋| 193/200 [04:00<00:11,  1.59s/it]

Evaluating:  97%|█████████▋| 194/200 [04:02<00:09,  1.59s/it]

Evaluating:  98%|█████████▊| 195/200 [04:03<00:07,  1.59s/it]

Evaluating:  98%|█████████▊| 196/200 [04:05<00:06,  1.59s/it]

Evaluating:  98%|█████████▊| 197/200 [04:06<00:04,  1.59s/it]

Evaluating:  99%|█████████▉| 198/200 [04:08<00:03,  1.59s/it]

Evaluating: 100%|█████████▉| 199/200 [04:10<00:01,  1.59s/it]

Evaluating: 100%|██████████| 200/200 [04:11<00:00,  1.53s/it]

Evaluating: 100%|██████████| 200/200 [04:11<00:00,  1.26s/it]




Loss: 0.9410




Precision: 0.7445, Recall: 0.7266, F1-Score: 0.7273




              precision    recall  f1-score   support

           0       0.75      0.57      0.65       797
           1       0.82      0.60      0.69       775
           2       0.88      0.83      0.86       795
           3       0.86      0.78      0.82      1110
           4       0.78      0.81      0.80      1260
           5       0.91      0.63      0.75       882
           6       0.84      0.74      0.79       940
           7       0.45      0.37      0.40       473
           8       0.60      0.79      0.69       746
           9       0.44      0.76      0.56       689
          10       0.75      0.71      0.73       670
          11       0.67      0.69      0.68       312
          12       0.64      0.78      0.70       665
          13       0.82      0.84      0.83       314
          14       0.84      0.74      0.79       756
          15       0.87      0.96      0.91      1607

    accuracy                           0.75     12791
   macro avg       0.74   




In [11]:
for concern in range(num_labels):
    print(f"--{concern}--")
    positive_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, True, 4, device=device, resample=False, seed=seed
    )
    negative_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
    )
    similar(model, module, valid_dataloader, concern, num_samples, num_labels, device=device, seed=seed)

--0--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6272300602434997, 0.6272300602434997)




CCA coefficients mean non-concern: (0.6251031293379349, 0.6251031293379349)




Linear CKA concern: 0.7425310275627316




Linear CKA non-concern: 0.6149154590542169




Kernel CKA concern: 0.7268473604191663




Kernel CKA non-concern: 0.6449215172011605




--1--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6235139966148322, 0.6235139966148322)




CCA coefficients mean non-concern: (0.6289507462544686, 0.6289507462544686)




Linear CKA concern: 0.6300089153134476




Linear CKA non-concern: 0.6257122943453405




Kernel CKA concern: 0.6412431206791329




Kernel CKA non-concern: 0.6455998923257635




--2--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6308255623251035, 0.6308255623251035)




CCA coefficients mean non-concern: (0.6249286245946049, 0.6249286245946049)




Linear CKA concern: 0.7733249024158668




Linear CKA non-concern: 0.6149622639251848




Kernel CKA concern: 0.7452706405645622




Kernel CKA non-concern: 0.6350353119377112




--3--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6205567500690481, 0.6205567500690481)




CCA coefficients mean non-concern: (0.6266708858803237, 0.6266708858803237)




Linear CKA concern: 0.6572385736790876




Linear CKA non-concern: 0.6229100904223751




Kernel CKA concern: 0.6422529321376019




Kernel CKA non-concern: 0.6460490997828418




--4--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.630943628540446, 0.630943628540446)




CCA coefficients mean non-concern: (0.6262925294152749, 0.6262925294152749)




Linear CKA concern: 0.7208155361343648




Linear CKA non-concern: 0.613790759706646




Kernel CKA concern: 0.7025981521650644




Kernel CKA non-concern: 0.6390538168191335




--5--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6117479322567261, 0.6117479322567261)




CCA coefficients mean non-concern: (0.6270502905450555, 0.6270502905450555)




Linear CKA concern: 0.6035196350725064




Linear CKA non-concern: 0.628263015135531




Kernel CKA concern: 0.587822098932205




Kernel CKA non-concern: 0.6527068527591717




--6--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6229788463351631, 0.6229788463351631)




CCA coefficients mean non-concern: (0.6284833186594648, 0.6284833186594648)




Linear CKA concern: 0.7117076281384669




Linear CKA non-concern: 0.6139118149425715




Kernel CKA concern: 0.7087899644718274




Kernel CKA non-concern: 0.6362931639899547




--7--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6284493736216753, 0.6284493736216753)




CCA coefficients mean non-concern: (0.6265711210099403, 0.6265711210099403)




Linear CKA concern: 0.6734185874150573




Linear CKA non-concern: 0.6252506363069941




Kernel CKA concern: 0.6598941406820311




Kernel CKA non-concern: 0.6525109488691514




--8--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6269411269077103, 0.6269411269077103)




CCA coefficients mean non-concern: (0.6271770541884779, 0.6271770541884779)




Linear CKA concern: 0.6208098217130508




Linear CKA non-concern: 0.6243896404972581




Kernel CKA concern: 0.6020246025977489




Kernel CKA non-concern: 0.6532563705525998




--9--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6364010994589047, 0.6364010994589047)




CCA coefficients mean non-concern: (0.6266405640297309, 0.6266405640297309)




Linear CKA concern: 0.7944994580248678




Linear CKA non-concern: 0.6073040363119877




Kernel CKA concern: 0.762992780269991




Kernel CKA non-concern: 0.6367904880105214




--10--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6353327591405061, 0.6353327591405061)




CCA coefficients mean non-concern: (0.6278908751693302, 0.6278908751693302)




Linear CKA concern: 0.747483800673385




Linear CKA non-concern: 0.6133089071913319




Kernel CKA concern: 0.7251961512224346




Kernel CKA non-concern: 0.6337831071579594




--11--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6296083896653389, 0.6296083896653389)




CCA coefficients mean non-concern: (0.6284610501951233, 0.6284610501951233)




Linear CKA concern: 0.6773343964604838




Linear CKA non-concern: 0.6223393390882981




Kernel CKA concern: 0.6614640779165253




Kernel CKA non-concern: 0.6518386684340988




--12--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.617505333051283, 0.617505333051283)




CCA coefficients mean non-concern: (0.6308026129701383, 0.6308026129701383)




Linear CKA concern: 0.6979280791150977




Linear CKA non-concern: 0.6286162481322731




Kernel CKA concern: 0.6808896372615545




Kernel CKA non-concern: 0.660565098407734




--13--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6308838228512073, 0.6308838228512073)




CCA coefficients mean non-concern: (0.626110242906857, 0.626110242906857)




Linear CKA concern: 0.7317812781217782




Linear CKA non-concern: 0.6198461917047114




Kernel CKA concern: 0.718735264355465




Kernel CKA non-concern: 0.6435063021703008




--14--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.633418226066532, 0.633418226066532)




CCA coefficients mean non-concern: (0.6265875468368406, 0.6265875468368406)




Linear CKA concern: 0.7360461252590548




Linear CKA non-concern: 0.6142116186231246




Kernel CKA concern: 0.7039643918011805




Kernel CKA non-concern: 0.6432940374429545




--15--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.6209390727260458, 0.6209390727260458)




CCA coefficients mean non-concern: (0.6266705905997167, 0.6266705905997167)




Linear CKA concern: 0.6779150451288851




Linear CKA non-concern: 0.6194842009540751




Kernel CKA concern: 0.6440448251465667




Kernel CKA non-concern: 0.6479393351088323




In [12]:
get_sparsity(module)

(0.5944834517193173,
 {'bert.encoder.layer.0.attention.self.query.weight': 0.5989583333333334,
  'bert.encoder.layer.0.attention.self.query.bias': 0.0,
  'bert.encoder.layer.0.attention.self.key.weight': 0.5989583333333334,
  'bert.encoder.layer.0.attention.self.key.bias': 0.0,
  'bert.encoder.layer.0.attention.self.value.weight': 0.5989583333333334,
  'bert.encoder.layer.0.attention.self.value.bias': 0.0,
  'bert.encoder.layer.0.attention.output.dense.weight': 0.5989583333333334,
  'bert.encoder.layer.0.attention.output.dense.bias': 0.0,
  'bert.encoder.layer.0.intermediate.dense.weight': 0.5989583333333334,
  'bert.encoder.layer.0.intermediate.dense.bias': 0.0,
  'bert.encoder.layer.0.output.dense.weight': 0.5999348958333334,
  'bert.encoder.layer.0.output.dense.bias': 0.0,
  'bert.encoder.layer.1.attention.self.query.weight': 0.5989583333333334,
  'bert.encoder.layer.1.attention.self.query.bias': 0.0,
  'bert.encoder.layer.1.attention.self.key.weight': 0.5989583333333334,
  'bert.en