In [1]:
import os
import sys

sys.path.append("../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from utils.helper import ModelConfig, color_print
from utils.dataset_utils.load_dataset import (
    load_data,
)
from utils.model_utils.load_model import load_model
from utils.model_utils.save_module import save_module
from utils.model_utils.evaluate import evaluate_model, get_sparsity, similar
from utils.dataset_utils.sampling import SamplingDataset
from utils.prune_utils.prune import (
    prune_concern_identification,
    recover_tangling_identification,
)

In [3]:
name = "YahooAnswersTopics"
device = torch.device("cuda:0")
checkpoint = None
batch_size = 16
num_workers = 4
num_samples = 16
ci_ratio = 0.3
seed = 44
include_layers = ["attention", "intermediate", "output"]
exclude_layers = None

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-08-25 07:06:20


In [5]:
model_config = ModelConfig(name, device)
num_labels = model_config.config["num_labels"]
model, tokenizer, checkpoint = load_model(model_config)

Loading the model.




{'model_name': 'fabriceyhc/bert-base-uncased-yahoo_answers_topics', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'YahooAnswersTopics', 'num_labels': 10, 'cache_dir': 'Models'}




The model fabriceyhc/bert-base-uncased-yahoo_answers_topics is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    name, batch_size=batch_size, num_workers=num_workers, do_cache=True, seed=seed
)

{'dataset_name': 'YahooAnswersTopics', 'path': 'yahoo_answers_topics', 'config_name': 'yahoo_answers_topics', 'text_column': 'question_title', 'label_column': 'topic', 'cache_dir': 'Datasets/Yahoo', 'task_type': 'classification'}




Loading cached dataset YahooAnswersTopics.




The dataset YahooAnswersTopics is loaded




In [7]:
# print("Evaluate the original model")
# result = evaluate_model(model, model_config, test_dataloader)

In [8]:
for concern in range(num_labels):
    train = copy.deepcopy(train_dataloader)
    valid = copy.deepcopy(valid_dataloader)
    positive_samples = SamplingDataset(
        train, concern, num_samples // 2, num_labels, True, 4, device=device, resample=False, seed=seed
    )
    negative_samples = SamplingDataset(
        train, concern, num_samples // 2, num_labels, False, 4, device=device, resample=False, seed=seed
    )
    all_samples = SamplingDataset(
        train, 200, num_samples // 2, num_labels, False, 4, device=device, resample=False, seed=seed
    )

    module = copy.deepcopy(model)

    prune_concern_identification(
        module,
        model_config,
        positive_samples,
        negative_samples,
        include_layers=include_layers,
        exclude_layers=exclude_layers,
        sparsity_ratio=ci_ratio,
    )

    print(f"Evaluate the pruned model {concern}")
    result = evaluate_model(module, model_config, test_dataloader)
    get_sparsity(module)

    similar(model, module, valid, concern, num_samples, num_labels, device=device, seed=seed)

    # save_module(module, "Modules/", f"ci_{name}_{ci_ratio}p.pt")

  attn_output = torch.nn.functional.scaled_dot_product_attention(


Evaluate the pruned model 0




Evaluating:   0%|                                                                   | 0/1875 [00:46<?, ?it/s]

Loss: 1.0101




Precision: 0.6799, Recall: 0.6787, F1-Score: 0.6762




              precision    recall  f1-score   support

           0       0.56      0.55      0.55      2972
           1       0.73      0.66      0.70      3016
           2       0.72      0.77      0.74      2985
           3       0.53      0.51      0.52      3023
           4       0.80      0.82      0.81      3039
           5       0.90      0.82      0.86      3076
           6       0.57      0.42      0.49      2965
           7       0.60      0.75      0.66      3031
           8       0.64      0.76      0.69      2932
           9       0.74      0.74      0.74      2961

    accuracy                           0.68     30000
   macro avg       0.68      0.68      0.68     30000
weighted avg       0.68      0.68      0.68     30000





adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8944044719206737, 0.8944044719206737)




CCA coefficients mean non-concern: (0.8859150193299319, 0.8859150193299319)




Linear CKA concern: 0.9817991752427826




Linear CKA non-concern: 0.9724635751819891




Kernel CKA concern: 0.9685056167148162




Kernel CKA non-concern: 0.9582759755549636




Evaluate the pruned model 1




Evaluating:   0%|                                                                   | 0/1875 [01:45<?, ?it/s]

Loss: 1.0048




Precision: 0.6836, Recall: 0.6800, F1-Score: 0.6788




              precision    recall  f1-score   support

           0       0.57      0.55      0.56      2972
           1       0.73      0.67      0.70      3016
           2       0.71      0.77      0.74      2985
           3       0.52      0.53      0.53      3023
           4       0.84      0.79      0.81      3039
           5       0.91      0.82      0.86      3076
           6       0.58      0.43      0.49      2965
           7       0.59      0.75      0.66      3031
           8       0.66      0.74      0.70      2932
           9       0.73      0.76      0.74      2961

    accuracy                           0.68     30000
   macro avg       0.68      0.68      0.68     30000
weighted avg       0.68      0.68      0.68     30000





adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9024885740008456, 0.9024885740008456)




CCA coefficients mean non-concern: (0.8911290131817858, 0.8911290131817858)




Linear CKA concern: 0.988962882971103




Linear CKA non-concern: 0.9726211025478484




Kernel CKA concern: 0.9814448640198944




Kernel CKA non-concern: 0.960527708375459




Evaluate the pruned model 2




Evaluating:   0%|                                                                   | 0/1875 [01:47<?, ?it/s]

Loss: 1.0055




Precision: 0.6822, Recall: 0.6794, F1-Score: 0.6777




              precision    recall  f1-score   support

           0       0.56      0.55      0.56      2972
           1       0.74      0.65      0.69      3016
           2       0.71      0.77      0.74      2985
           3       0.53      0.52      0.52      3023
           4       0.82      0.80      0.81      3039
           5       0.91      0.82      0.86      3076
           6       0.57      0.43      0.49      2965
           7       0.59      0.75      0.66      3031
           8       0.65      0.75      0.70      2932
           9       0.75      0.74      0.74      2961

    accuracy                           0.68     30000
   macro avg       0.68      0.68      0.68     30000
weighted avg       0.68      0.68      0.68     30000





adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8901029582503339, 0.8901029582503339)




CCA coefficients mean non-concern: (0.8891570261925503, 0.8891570261925503)




Linear CKA concern: 0.9910036999234629




Linear CKA non-concern: 0.9676624316523729




Kernel CKA concern: 0.9856894897767255




Kernel CKA non-concern: 0.9493682002224816




Evaluate the pruned model 3




Evaluating:   0%|                                                                   | 0/1875 [01:46<?, ?it/s]

Loss: 1.0043




Precision: 0.6820, Recall: 0.6805, F1-Score: 0.6780




              precision    recall  f1-score   support

           0       0.56      0.55      0.55      2972
           1       0.74      0.67      0.70      3016
           2       0.72      0.76      0.74      2985
           3       0.54      0.52      0.53      3023
           4       0.81      0.82      0.81      3039
           5       0.91      0.82      0.86      3076
           6       0.58      0.42      0.48      2965
           7       0.60      0.75      0.66      3031
           8       0.64      0.76      0.69      2932
           9       0.73      0.75      0.74      2961

    accuracy                           0.68     30000
   macro avg       0.68      0.68      0.68     30000
weighted avg       0.68      0.68      0.68     30000





adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.8996963116035979, 0.8996963116035979)




CCA coefficients mean non-concern: (0.9024454978025902, 0.9024454978025902)




Linear CKA concern: 0.9870405942980487




Linear CKA non-concern: 0.9817912707496502




Kernel CKA concern: 0.9791072193052895




Kernel CKA non-concern: 0.974604297747883




Evaluate the pruned model 4




Evaluating:   0%|                                                                   | 0/1875 [01:48<?, ?it/s]

Loss: 1.0069




Precision: 0.6805, Recall: 0.6787, F1-Score: 0.6760




              precision    recall  f1-score   support

           0       0.56      0.55      0.56      2972
           1       0.74      0.66      0.69      3016
           2       0.71      0.77      0.74      2985
           3       0.54      0.51      0.52      3023
           4       0.81      0.82      0.81      3039
           5       0.91      0.82      0.86      3076
           6       0.58      0.41      0.48      2965
           7       0.59      0.75      0.66      3031
           8       0.63      0.76      0.69      2932
           9       0.74      0.74      0.74      2961

    accuracy                           0.68     30000
   macro avg       0.68      0.68      0.68     30000
weighted avg       0.68      0.68      0.68     30000





adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9030679805895548, 0.9030679805895548)




CCA coefficients mean non-concern: (0.8915680408202035, 0.8915680408202035)




Linear CKA concern: 0.9914205603021851




Linear CKA non-concern: 0.9729565703772475




Kernel CKA concern: 0.9852488094320682




Kernel CKA non-concern: 0.959788351668348




Evaluate the pruned model 5




Evaluating:   0%|                                                                   | 0/1875 [01:56<?, ?it/s]

Unexpected exception formatting exception. Falling back to standard exception




Traceback (most recent call last):


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)


  File "C:\Users\Administrator\AppData\Local\Temp\ipykernel_10708\3617918227.py", line 27, in <module>
    result = evaluate_model(module, model_config, test_dataloader)


  File "C:\Users\Administrator\Documents\Lab\DecomposeTransformer\utils\model_utils\evaluate.py", line 32, in evaluate_model
    for batch in tqdm(


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\tqdm\notebook.py", line 250, in __iter__
    for obj in it:


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\tqdm\std.py", line 1181, in __iter__
    for obj in iterable:


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\torch\utils\data\dataloader.py", line 440, in __iter__
    return self._get_iterator()


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\torch\utils\data\dataloader.py", line 388, in _get_iterator
    return _MultiProcessingDataLoaderIter(self)


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\torch\utils\data\dataloader.py", line 1038, in __init__
    w.start()


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\multiprocessing\process.py", line 121, in start
    self._popen = self._Popen(self)


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\multiprocessing\context.py", line 224, in _Popen
    return _default_context.get_context().Process._Popen(process_obj)


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\multiprocessing\context.py", line 327, in _Popen
    return Popen(process_obj)


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\multiprocessing\popen_spawn_win32.py", line 93, in __init__
    reduction.dump(process_obj, to_child)


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\multiprocessing\reduction.py", line 60, in dump
    ForkingPickler(file, protocol).dump(obj)


MemoryError



During handling of the above exception, another exception occurred:



Traceback (most recent call last):


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\executing\executing.py", line 317, in executing
    args = executing_cache[key]


KeyError: (<code object run_code at 0x0000010CA9D3C240, file "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\IPython\core\interactiveshell.py", line 3472>, 1153900462656, 74)



During handling of the above exception, another exception occurred:



Traceback (most recent call last):


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\IPython\core\interactiveshell.py", line 2105, in showtraceback
    stb = self.InteractiveTB.structured_traceback(


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\IPython\core\ultratb.py", line 1396, in structured_traceback
    return FormattedTB.structured_traceback(


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\IPython\core\ultratb.py", line 1287, in structured_traceback
    return VerboseTB.structured_traceback(


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\IPython\core\ultratb.py", line 1140, in structured_traceback
    formatted_exception = self.format_exception_as_a_whole(etype, evalue, etb, number_of_lines_of_context,


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\IPython\core\ultratb.py", line 1030, in format_exception_as_a_whole
    self.get_records(etb, number_of_lines_of_context, tb_offset) if etb else []


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\IPython\core\ultratb.py", line 1127, in get_records
    res = list(stack_data.FrameInfo.stack_data(etb, options=options))[tb_offset:]


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\stack_data\core.py", line 565, in stack_data
    yield from collapse_repeated(


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\stack_data\utils.py", line 84, in collapse_repeated
    yield from map(mapper, original_group)


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\stack_data\core.py", line 555, in mapper
    return cls(f, options)


  File "C:\Users\Administrator\anaconda3\envs\DecomposeTransformer\lib\site-packages\stack_data\core.py", line 520, in __init__
    self.executing = Source.executing(frame_or_tb)
