In [1]:
import os
import sys
sys.path.append("../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from utils.helper import ModelConfig, color_print
from utils.dataset_utils.load_dataset import (
    load_data,
)
from utils.model_utils.save_module import save_module
from utils.model_utils.load_model import load_model
from utils.model_utils.evaluate import evaluate_model, get_sparsity, similar
from utils.dataset_utils.sampling import SamplingDataset
from utils.prune_utils.prune import (
    prune_magnitude
)

In [3]:
name= "OSDG"
device = torch.device("cuda:0")
checkpoint = None
batch_size=32
num_workers=48
num_samples=16
magnitude_ratio=0.3
seed=44
include_layers=["attention", "intermediate", "output"]
exclude_layers=None

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-08-19 22:36:42


In [5]:
model_config = ModelConfig(name, device)
num_labels = model_config.config["num_labels"]
model, tokenizer, checkpoint = load_model(model_config)

Loading the model.




{'model_name': 'sadickam/sdg-classification-bert', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'OSDG', 'num_labels': 16, 'cache_dir': 'Models'}




The model sadickam/sdg-classification-bert is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    name, batch_size=batch_size, num_workers=num_workers, do_cache=True, seed=seed
)

{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




In [7]:
all_samples = SamplingDataset(
    train_dataloader, 200, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
)

In [8]:
# print("Evaluate the original model")
# result = evaluate_model(model, model_config, test_dataloader)

In [9]:
# Evaluate the original model
# Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [03:16<00:00,  1.02it/s]
# Loss: 0.9485
# Precision: 0.7801, Recall: 0.7867, F1-Score: 0.7793
#               precision    recall  f1-score   support

#            0       0.77      0.66      0.71       797
#            1       0.84      0.72      0.78       775
#            2       0.88      0.87      0.88       795
#            3       0.87      0.83      0.85      1110
#            4       0.86      0.80      0.83      1260
#            5       0.88      0.69      0.77       882
#            6       0.85      0.80      0.83       940
#            7       0.49      0.61      0.54       473
#            8       0.66      0.85      0.74       746
#            9       0.62      0.73      0.67       689
#           10       0.75      0.79      0.77       670
#           11       0.62      0.81      0.70       312
#           12       0.73      0.81      0.77       665
#           13       0.83      0.85      0.84       314
#           14       0.85      0.78      0.81       756
#           15       0.97      0.98      0.97      1607

#     accuracy                           0.80     12791
#    macro avg       0.78      0.79      0.78     12791
# weighted avg       0.81      0.80      0.80     12791

In [10]:
module = copy.deepcopy(model)
prune_magnitude(module, sparsity_ratio=magnitude_ratio, include_layers=include_layers, exclude_layers=exclude_layers)
print("Evaluate the pruned model")
result = evaluate_model(model, model_config, test_dataloader)
# save_module(module, "Modules/", f"magnitude_{name}_{magnitude_ratio}p.pt")

Evaluate the pruned model




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<02:43,  1.21it/s]

Evaluating:   1%|          | 2/200 [00:01<01:57,  1.69it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:42,  1.92it/s]

Evaluating:   2%|?둞         | 4/200 [00:02<01:35,  2.05it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:31,  2.14it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:28,  2.18it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:26,  2.22it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:25,  2.24it/s]

Evaluating:   4%|?둜         | 9/200 [00:04<01:25,  2.24it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:25,  2.22it/s]

Evaluating:   6%|?둛         | 11/200 [00:05<01:26,  2.18it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:28,  2.13it/s]

Evaluating:   6%|?둚         | 13/200 [00:06<01:32,  2.02it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:37,  1.91it/s]

Evaluating:   8%|?둙         | 15/200 [00:07<01:41,  1.82it/s]

Evaluating:   8%|?둙         | 16/200 [00:08<01:45,  1.74it/s]

Evaluating:   8%|?둙         | 17/200 [00:08<01:50,  1.65it/s]

Evaluating:   9%|?둘         | 18/200 [00:09<01:56,  1.56it/s]

Evaluating:  10%|?둘         | 19/200 [00:10<02:01,  1.49it/s]

Evaluating:  10%|?둗         | 20/200 [00:11<02:11,  1.37it/s]

Evaluating:  10%|?둗         | 21/200 [00:12<02:17,  1.30it/s]

Evaluating:  11%|?둗         | 22/200 [00:12<02:22,  1.25it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:13<02:25,  1.22it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:14<02:38,  1.11it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:15<02:47,  1.05it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:16<02:53,  1.01it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:18<02:56,  1.02s/it]

Evaluating:  14%|?둗?둜        | 28/200 [00:19<02:58,  1.04s/it]

Evaluating:  14%|?둗?둜        | 29/200 [00:20<02:59,  1.05s/it]

Evaluating:  15%|?둗?둛        | 30/200 [00:21<03:00,  1.06s/it]

Evaluating:  16%|?둗?둛        | 31/200 [00:22<03:00,  1.07s/it]

Evaluating:  16%|?둗?둛        | 32/200 [00:23<03:00,  1.07s/it]

Evaluating:  16%|?둗?둚        | 33/200 [00:24<02:59,  1.08s/it]

Evaluating:  17%|?둗?둚        | 34/200 [00:25<02:59,  1.08s/it]

Evaluating:  18%|?둗?둙        | 35/200 [00:26<02:58,  1.08s/it]

Evaluating:  18%|?둗?둙        | 36/200 [00:27<02:58,  1.09s/it]

Evaluating:  18%|?둗?둙        | 37/200 [00:29<03:02,  1.12s/it]

Evaluating:  19%|?둗?둘        | 38/200 [00:30<03:06,  1.15s/it]

Evaluating:  20%|?둗?둘        | 39/200 [00:31<03:09,  1.17s/it]

Evaluating:  20%|?둗?둗        | 40/200 [00:32<03:10,  1.19s/it]

Evaluating:  20%|?둗?둗        | 41/200 [00:33<03:11,  1.20s/it]

Evaluating:  21%|?둗?둗        | 42/200 [00:35<03:10,  1.21s/it]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:36<03:10,  1.22s/it]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:37<03:10,  1.22s/it]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:38<03:09,  1.22s/it]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:40<03:08,  1.22s/it]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:41<03:07,  1.23s/it]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:42<03:06,  1.23s/it]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:43<03:05,  1.23s/it]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:45<03:03,  1.23s/it]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:46<03:02,  1.23s/it]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:47<03:01,  1.23s/it]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:48<03:00,  1.23s/it]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:49<02:59,  1.23s/it]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:51<02:58,  1.23s/it]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:52<02:57,  1.23s/it]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:53<02:56,  1.23s/it]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:54<02:54,  1.23s/it]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:56<02:53,  1.23s/it]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:57<02:52,  1.23s/it]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:58<02:50,  1.23s/it]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:59<02:49,  1.23s/it]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [01:01<02:48,  1.23s/it]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [01:02<02:47,  1.23s/it]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [01:03<02:45,  1.23s/it]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [01:04<02:44,  1.23s/it]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [01:05<02:43,  1.23s/it]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [01:07<02:42,  1.23s/it]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [01:08<02:40,  1.23s/it]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [01:09<02:39,  1.23s/it]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [01:10<02:38,  1.23s/it]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [01:12<02:37,  1.23s/it]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [01:13<02:35,  1.23s/it]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [01:14<02:34,  1.23s/it]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [01:15<02:33,  1.23s/it]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [01:16<02:32,  1.23s/it]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [01:18<02:30,  1.23s/it]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [01:19<02:29,  1.23s/it]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [01:20<02:28,  1.23s/it]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [01:21<02:27,  1.23s/it]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [01:23<02:26,  1.23s/it]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [01:24<02:24,  1.23s/it]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [01:25<02:23,  1.23s/it]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [01:26<02:22,  1.23s/it]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [01:28<02:21,  1.23s/it]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [01:29<02:20,  1.23s/it]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [01:30<02:18,  1.23s/it]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [01:31<02:17,  1.23s/it]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [01:32<02:16,  1.23s/it]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [01:34<02:15,  1.23s/it]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [01:35<02:14,  1.23s/it]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [01:36<02:12,  1.23s/it]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [01:37<02:11,  1.23s/it]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [01:39<02:10,  1.23s/it]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [01:40<02:09,  1.23s/it]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [01:41<02:08,  1.23s/it]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [01:42<02:07,  1.23s/it]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [01:44<02:05,  1.23s/it]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [01:45<02:04,  1.23s/it]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:46<02:03,  1.23s/it]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:47<02:01,  1.23s/it]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:48<02:00,  1.23s/it]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:50<01:59,  1.23s/it]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:51<01:58,  1.23s/it]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:52<01:56,  1.23s/it]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:53<01:55,  1.23s/it]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:55<01:54,  1.23s/it]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:56<01:53,  1.23s/it]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:57<01:51,  1.23s/it]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:58<01:50,  1.23s/it]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [02:00<01:49,  1.23s/it]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [02:01<01:47,  1.23s/it]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [02:02<01:46,  1.23s/it]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [02:03<01:45,  1.23s/it]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [02:04<01:44,  1.23s/it]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [02:06<01:43,  1.23s/it]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [02:07<01:41,  1.23s/it]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [02:08<01:40,  1.23s/it]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [02:09<01:39,  1.23s/it]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [02:11<01:38,  1.23s/it]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [02:12<01:37,  1.23s/it]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [02:13<01:35,  1.23s/it]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [02:14<01:34,  1.23s/it]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [02:15<01:33,  1.23s/it]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [02:17<01:32,  1.23s/it]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [02:18<01:30,  1.23s/it]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [02:19<01:29,  1.23s/it]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [02:20<01:28,  1.23s/it]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [02:22<01:27,  1.23s/it]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [02:23<01:25,  1.23s/it]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [02:24<01:24,  1.23s/it]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [02:25<01:23,  1.23s/it]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [02:27<01:22,  1.23s/it]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [02:28<01:21,  1.23s/it]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [02:29<01:19,  1.23s/it]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [02:30<01:18,  1.23s/it]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [02:31<01:17,  1.23s/it]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [02:33<01:16,  1.23s/it]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [02:34<01:14,  1.23s/it]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [02:35<01:13,  1.23s/it]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [02:36<01:12,  1.23s/it]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [02:38<01:11,  1.23s/it]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [02:39<01:09,  1.23s/it]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [02:40<01:08,  1.23s/it]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [02:41<01:07,  1.23s/it]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [02:43<01:06,  1.23s/it]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [02:44<01:05,  1.23s/it]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [02:45<01:03,  1.23s/it]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [02:46<01:02,  1.23s/it]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [02:47<01:01,  1.23s/it]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [02:49<01:00,  1.23s/it]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [02:50<00:59,  1.23s/it]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [02:51<00:58,  1.23s/it]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [02:52<00:56,  1.24s/it]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [02:54<00:55,  1.24s/it]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [02:55<00:54,  1.23s/it]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [02:56<00:53,  1.23s/it]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [02:57<00:51,  1.23s/it]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [02:59<00:50,  1.23s/it]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [03:00<00:49,  1.24s/it]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [03:01<00:48,  1.23s/it]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [03:02<00:46,  1.23s/it]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [03:03<00:45,  1.23s/it]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [03:05<00:44,  1.23s/it]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [03:06<00:43,  1.23s/it]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [03:07<00:41,  1.23s/it]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [03:08<00:40,  1.23s/it]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [03:10<00:39,  1.23s/it]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [03:11<00:38,  1.23s/it]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [03:12<00:36,  1.23s/it]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [03:13<00:35,  1.23s/it]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [03:15<00:34,  1.23s/it]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [03:16<00:33,  1.23s/it]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [03:17<00:31,  1.23s/it]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [03:18<00:30,  1.23s/it]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [03:19<00:29,  1.23s/it]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [03:21<00:28,  1.23s/it]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [03:22<00:27,  1.23s/it]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [03:23<00:25,  1.23s/it]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [03:24<00:24,  1.23s/it]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [03:26<00:23,  1.23s/it]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [03:27<00:22,  1.23s/it]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [03:28<00:20,  1.23s/it]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [03:29<00:19,  1.23s/it]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [03:31<00:18,  1.23s/it]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [03:32<00:17,  1.23s/it]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [03:33<00:15,  1.23s/it]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [03:34<00:14,  1.23s/it]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [03:35<00:13,  1.23s/it]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [03:37<00:12,  1.23s/it]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [03:38<00:11,  1.23s/it]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [03:39<00:09,  1.23s/it]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [03:40<00:08,  1.23s/it]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [03:42<00:07,  1.23s/it]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [03:43<00:06,  1.23s/it]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [03:44<00:04,  1.23s/it]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [03:45<00:03,  1.23s/it]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [03:47<00:02,  1.23s/it]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [03:48<00:01,  1.23s/it]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [03:49<00:00,  1.21s/it]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [03:49<00:00,  1.15s/it]




Loss: 0.9485




Precision: 0.7801, Recall: 0.7867, F1-Score: 0.7793




              precision    recall  f1-score   support

           0       0.77      0.66      0.71       797
           1       0.84      0.72      0.78       775
           2       0.88      0.87      0.88       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.88      0.69      0.77       882
           6       0.85      0.80      0.83       940
           7       0.49      0.61      0.54       473
           8       0.66      0.85      0.74       746
           9       0.62      0.73      0.67       689
          10       0.75      0.79      0.77       670
          11       0.62      0.81      0.70       312
          12       0.73      0.81      0.77       665
          13       0.83      0.85      0.84       314
          14       0.85      0.78      0.81       756
          15       0.97      0.98      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




In [11]:
for concern in range(num_labels):
    print(f"--{concern}--")
    positive_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, True, 4, device=device, resample=False, seed=seed
    )
    negative_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
    )
    similar(model, module, valid_dataloader, concern, num_samples, num_labels, device=device, seed=seed)

--0--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.831068257121122, 0.831068257121122)




CCA coefficients mean non-concern: (0.8441595751734212, 0.8441595751734212)




Linear CKA concern: 0.9738395966959208




Linear CKA non-concern: 0.9674094657281553




Kernel CKA concern: 0.9668430436139632




Kernel CKA non-concern: 0.9690359565302572




--1--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8334461411175614, 0.8334461411175614)




CCA coefficients mean non-concern: (0.8446490033908034, 0.8446490033908034)




Linear CKA concern: 0.9700128499700185




Linear CKA non-concern: 0.9669615845389624




Kernel CKA concern: 0.9637468635017802




Kernel CKA non-concern: 0.9681939064897804




--2--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8391877528228648, 0.8391877528228648)




CCA coefficients mean non-concern: (0.843495999680554, 0.843495999680554)




Linear CKA concern: 0.9793174683231914




Linear CKA non-concern: 0.9665304647598332




Kernel CKA concern: 0.9722937016385764




Kernel CKA non-concern: 0.9679449235664063




--3--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8402904222068376, 0.8402904222068376)




CCA coefficients mean non-concern: (0.8418189417888919, 0.8418189417888919)




Linear CKA concern: 0.9694738737998547




Linear CKA non-concern: 0.9673198495318781




Kernel CKA concern: 0.9639971989929808




Kernel CKA non-concern: 0.9686888740447196




--4--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8478193190592056, 0.8478193190592056)




CCA coefficients mean non-concern: (0.8429568482469104, 0.8429568482469104)




Linear CKA concern: 0.9803159694666504




Linear CKA non-concern: 0.9659084897170966




Kernel CKA concern: 0.9746067416276166




Kernel CKA non-concern: 0.967904718696344




--5--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.842506701068457, 0.842506701068457)




CCA coefficients mean non-concern: (0.8428487191418247, 0.8428487191418247)




Linear CKA concern: 0.9716100698337461




Linear CKA non-concern: 0.9673034253541007




Kernel CKA concern: 0.9648267369763227




Kernel CKA non-concern: 0.968736865750337




--6--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8332312885487271, 0.8332312885487271)




CCA coefficients mean non-concern: (0.8430425001455353, 0.8430425001455353)




Linear CKA concern: 0.9740632709963699




Linear CKA non-concern: 0.9662940978449277




Kernel CKA concern: 0.9712054654173079




Kernel CKA non-concern: 0.9680267234325056




--7--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8447723461560855, 0.8447723461560855)




CCA coefficients mean non-concern: (0.8430701500729362, 0.8430701500729362)




Linear CKA concern: 0.9727413517826561




Linear CKA non-concern: 0.9675409140119562




Kernel CKA concern: 0.9692080112365536




Kernel CKA non-concern: 0.9689596761779323




--8--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8387545368791602, 0.8387545368791602)




CCA coefficients mean non-concern: (0.8437113900130826, 0.8437113900130826)




Linear CKA concern: 0.9704777131157647




Linear CKA non-concern: 0.9672799629303304




Kernel CKA concern: 0.9651278404158524




Kernel CKA non-concern: 0.9688262854379882




--9--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8313536351554877, 0.8313536351554877)




CCA coefficients mean non-concern: (0.843591558582816, 0.843591558582816)




Linear CKA concern: 0.9733223397182326




Linear CKA non-concern: 0.9668288591946906




Kernel CKA concern: 0.9653506795999623




Kernel CKA non-concern: 0.968908697594421




--10--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8376811025323065, 0.8376811025323065)




CCA coefficients mean non-concern: (0.8443115326424219, 0.8443115326424219)




Linear CKA concern: 0.9718681870436092




Linear CKA non-concern: 0.9660390219842985




Kernel CKA concern: 0.9660334407257577




Kernel CKA non-concern: 0.967794715918686




--11--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8398657321872339, 0.8398657321872339)




CCA coefficients mean non-concern: (0.8436214456909142, 0.8436214456909142)




Linear CKA concern: 0.971996254230643




Linear CKA non-concern: 0.9675772001683952




Kernel CKA concern: 0.9646882004292233




Kernel CKA non-concern: 0.9691284016020194




--12--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.840381891775204, 0.840381891775204)




CCA coefficients mean non-concern: (0.8428169085420496, 0.8428169085420496)




Linear CKA concern: 0.974730244772421




Linear CKA non-concern: 0.9672714942456396




Kernel CKA concern: 0.9706753370012438




Kernel CKA non-concern: 0.9688867475242248




--13--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8410885476557188, 0.8410885476557188)




CCA coefficients mean non-concern: (0.8439942634042091, 0.8439942634042091)




Linear CKA concern: 0.9753768085735152




Linear CKA non-concern: 0.9668258828392337




Kernel CKA concern: 0.9683475755976827




Kernel CKA non-concern: 0.9680893072655644




--14--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8411883185423137, 0.8411883185423137)




CCA coefficients mean non-concern: (0.8431194687874329, 0.8431194687874329)




Linear CKA concern: 0.9762039293319166




Linear CKA non-concern: 0.9664976913513084




Kernel CKA concern: 0.9696362265620787




Kernel CKA non-concern: 0.9685076149843134




--15--




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8481493649643577, 0.8481493649643577)




CCA coefficients mean non-concern: (0.8416454021773245, 0.8416454021773245)




Linear CKA concern: 0.9649629226865248




Linear CKA non-concern: 0.9682323333773646




Kernel CKA concern: 0.9580075880771178




Kernel CKA non-concern: 0.9697255243147499




In [12]:
get_sparsity(module)

(0.29759659416128587,
 {'bert.encoder.layer.0.attention.self.query.weight': 0.2999996609157986,
  'bert.encoder.layer.0.attention.self.query.bias': 0.0,
  'bert.encoder.layer.0.attention.self.key.weight': 0.2999996609157986,
  'bert.encoder.layer.0.attention.self.key.bias': 0.0,
  'bert.encoder.layer.0.attention.self.value.weight': 0.2999996609157986,
  'bert.encoder.layer.0.attention.self.value.bias': 0.0,
  'bert.encoder.layer.0.attention.output.dense.weight': 0.2999996609157986,
  'bert.encoder.layer.0.attention.output.dense.bias': 0.0,
  'bert.encoder.layer.0.intermediate.dense.weight': 0.2999996609157986,
  'bert.encoder.layer.0.intermediate.dense.bias': 0.0,
  'bert.encoder.layer.0.output.dense.weight': 0.2999996609157986,
  'bert.encoder.layer.0.output.dense.bias': 0.0,
  'bert.encoder.layer.1.attention.self.query.weight': 0.2999996609157986,
  'bert.encoder.layer.1.attention.self.query.bias': 0.0,
  'bert.encoder.layer.1.attention.self.key.weight': 0.2999996609157986,
  'bert.e