In [1]:
import os
import sys
sys.path.append("../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from utils.helper import ModelConfig, color_print
from utils.dataset_utils.load_dataset import (
    load_data,
)
from utils.model_utils.load_model import load_model
from utils.model_utils.save_module import save_module
from utils.model_utils.evaluate import evaluate_model, get_sparsity, similar
from utils.dataset_utils.sampling import SamplingDataset
from utils.prune_utils.prune import (
    prune_concern_identification,
    recover_tangling_identification,
)

In [3]:
name = "YahooAnswersTopics"
device = torch.device("cuda:0")
checkpoint = None
batch_size=16
num_workers=4
num_samples=4
ci_ratio=0.4
seed=44
include_layers=["attention", "intermediate", "output"]
exclude_layers=None

In [None]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

In [4]:
model_config = ModelConfig(name, device)
num_labels = model_config.config["num_labels"]
model, tokenizer, checkpoint = load_model(model_config)

Loading the model.
{'model_name': 'fabriceyhc/bert-base-uncased-yahoo_answers_topics', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'YahooAnswersTopics', 'num_labels': 10, 'cache_dir': 'Models'}
The model fabriceyhc/bert-base-uncased-yahoo_answers_topics is loaded.


In [5]:
# print("Evaluate the original model")
# result = evaluate_model(model, model_config, test_dataloader)

In [6]:
# Evaluate the original model
# Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [30:03<00:00,  1.04it/s]
# Loss: 1.0044
# Precision: 0.6874, Recall: 0.6865, F1-Score: 0.6839
#               precision    recall  f1-score   support

#            0       0.57      0.57      0.57      6000
#            1       0.74      0.66      0.69      6000
#            2       0.71      0.78      0.74      6000
#            3       0.54      0.53      0.53      6000
#            4       0.80      0.82      0.81      6000
#            5       0.90      0.84      0.87      6000
#            6       0.61      0.43      0.50      6000
#            7       0.62      0.73      0.67      6000
#            8       0.64      0.76      0.70      6000
#            9       0.75      0.75      0.75      6000

#     accuracy                           0.69     60000
#    macro avg       0.69      0.69      0.68     60000
# weighted avg       0.69      0.69      0.68     60000

In [7]:
for concern in range(num_labels):
    train_dataloader, valid_dataloader, test_dataloader = load_data(
    name, batch_size=batch_size, num_workers=num_workers, do_cache=True
    )
    
    positive_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, True, 4, device=device, resample=False, seed=seed
    )
    negative_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
    )
    all_samples = SamplingDataset(
        train_dataloader, 200, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
    )
    
    module = copy.deepcopy(model)
    
    prune_concern_identification(
        module,
        model_config,
        positive_samples,
        negative_samples,
        include_layers=include_layers,
        exclude_layers=exclude_layers,
        sparsity_ratio=ci_ratio,
    )
    
    print(f"Evaluate the pruned model {concern}")
    result = evaluate_model(module, model_config, test_dataloader)
    get_sparsity(module)
    
    similar(model, module, valid_dataloader, concern, num_samples, num_labels, device=device, seed=seed)
    
    # save_module(module, "Modules/", f"ci_{name}_{ci_ratio}p.pt")

{'dataset_name': 'YahooAnswersTopics', 'path': 'yahoo_answers_topics', 'config_name': 'yahoo_answers_topics', 'text_column': 'question_title', 'label_column': 'topic', 'cache_dir': 'Datasets/Yahoo', 'task_type': 'classification'}
Loading cached dataset YahooAnswersTopics.
The dataset YahooAnswersTopics is loaded
Evaluate the pruned model 0


Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [09:44<00:00,  3.21it/s]


Loss: 1.0367
Precision: 0.6741, Recall: 0.6699, F1-Score: 0.6684
              precision    recall  f1-score   support

           0       0.57      0.56      0.56      6000
           1       0.73      0.63      0.68      6000
           2       0.71      0.75      0.73      6000
           3       0.54      0.49      0.51      6000
           4       0.80      0.80      0.80      6000
           5       0.91      0.80      0.85      6000
           6       0.53      0.42      0.47      6000
           7       0.56      0.75      0.64      6000
           8       0.64      0.76      0.69      6000
           9       0.75      0.73      0.74      6000

    accuracy                           0.67     60000
   macro avg       0.67      0.67      0.67     60000
weighted avg       0.67      0.67      0.67     60000

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking squa

Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [10:03<00:00,  3.11it/s] 


Loss: 1.0286
Precision: 0.6772, Recall: 0.6718, F1-Score: 0.6702
              precision    recall  f1-score   support

           0       0.55      0.57      0.56      6000
           1       0.74      0.63      0.68      6000
           2       0.71      0.76      0.74      6000
           3       0.54      0.51      0.52      6000
           4       0.81      0.80      0.81      6000
           5       0.91      0.80      0.85      6000
           6       0.57      0.40      0.47      6000
           7       0.55      0.75      0.64      6000
           8       0.65      0.75      0.70      6000
           9       0.74      0.73      0.74      6000

    accuracy                           0.67     60000
   macro avg       0.68      0.67      0.67     60000
weighted avg       0.68      0.67      0.67     60000

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking squa

Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [10:43<00:00,  2.92it/s]


Loss: 1.0356
Precision: 0.6761, Recall: 0.6702, F1-Score: 0.6687
              precision    recall  f1-score   support

           0       0.55      0.57      0.56      6000
           1       0.74      0.62      0.68      6000
           2       0.72      0.76      0.74      6000
           3       0.52      0.51      0.52      6000
           4       0.81      0.80      0.81      6000
           5       0.92      0.78      0.85      6000
           6       0.57      0.40      0.47      6000
           7       0.56      0.75      0.64      6000
           8       0.64      0.76      0.69      6000
           9       0.73      0.74      0.74      6000

    accuracy                           0.67     60000
   macro avg       0.68      0.67      0.67     60000
weighted avg       0.68      0.67      0.67     60000

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking squa

Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [11:18<00:00,  2.76it/s]


Loss: 1.0318
Precision: 0.6760, Recall: 0.6707, F1-Score: 0.6690
              precision    recall  f1-score   support

           0       0.56      0.56      0.56      6000
           1       0.73      0.64      0.68      6000
           2       0.73      0.74      0.74      6000
           3       0.54      0.51      0.52      6000
           4       0.80      0.81      0.80      6000
           5       0.92      0.79      0.85      6000
           6       0.56      0.40      0.47      6000
           7       0.55      0.75      0.64      6000
           8       0.63      0.77      0.69      6000
           9       0.74      0.74      0.74      6000

    accuracy                           0.67     60000
   macro avg       0.68      0.67      0.67     60000
weighted avg       0.68      0.67      0.67     60000

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking squa

Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [11:14<00:00,  2.78it/s]


Loss: 1.0458
Precision: 0.6718, Recall: 0.6663, F1-Score: 0.6651
              precision    recall  f1-score   support

           0       0.54      0.57      0.55      6000
           1       0.74      0.62      0.68      6000
           2       0.73      0.74      0.73      6000
           3       0.53      0.50      0.51      6000
           4       0.78      0.83      0.80      6000
           5       0.92      0.77      0.84      6000
           6       0.54      0.41      0.46      6000
           7       0.56      0.74      0.64      6000
           8       0.64      0.76      0.69      6000
           9       0.74      0.73      0.74      6000

    accuracy                           0.67     60000
   macro avg       0.67      0.67      0.67     60000
weighted avg       0.67      0.67      0.67     60000

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking squa

Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [11:00<00:00,  2.84it/s]


Loss: 1.0431
Precision: 0.6725, Recall: 0.6678, F1-Score: 0.6669
              precision    recall  f1-score   support

           0       0.56      0.57      0.56      6000
           1       0.74      0.61      0.67      6000
           2       0.75      0.73      0.74      6000
           3       0.51      0.52      0.52      6000
           4       0.81      0.80      0.80      6000
           5       0.91      0.80      0.85      6000
           6       0.51      0.41      0.45      6000
           7       0.58      0.74      0.65      6000
           8       0.63      0.77      0.69      6000
           9       0.73      0.75      0.74      6000

    accuracy                           0.67     60000
   macro avg       0.67      0.67      0.67     60000
weighted avg       0.67      0.67      0.67     60000

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking squa

Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [11:11<00:00,  2.79it/s]


Loss: 1.0283
Precision: 0.6769, Recall: 0.6729, F1-Score: 0.6711
              precision    recall  f1-score   support

           0       0.57      0.56      0.56      6000
           1       0.73      0.64      0.68      6000
           2       0.71      0.76      0.73      6000
           3       0.53      0.51      0.52      6000
           4       0.81      0.81      0.81      6000
           5       0.92      0.79      0.85      6000
           6       0.56      0.42      0.48      6000
           7       0.57      0.74      0.64      6000
           8       0.64      0.77      0.70      6000
           9       0.74      0.74      0.74      6000

    accuracy                           0.67     60000
   macro avg       0.68      0.67      0.67     60000
weighted avg       0.68      0.67      0.67     60000

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking squa

Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [11:12<00:00,  2.79it/s]


Loss: 1.0475
Precision: 0.6738, Recall: 0.6675, F1-Score: 0.6667
              precision    recall  f1-score   support

           0       0.56      0.56      0.56      6000
           1       0.74      0.62      0.67      6000
           2       0.73      0.74      0.74      6000
           3       0.52      0.51      0.52      6000
           4       0.81      0.79      0.80      6000
           5       0.92      0.78      0.85      6000
           6       0.53      0.41      0.46      6000
           7       0.56      0.75      0.64      6000
           8       0.63      0.77      0.69      6000
           9       0.73      0.74      0.74      6000

    accuracy                           0.67     60000
   macro avg       0.67      0.67      0.67     60000
weighted avg       0.67      0.67      0.67     60000

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking squa

Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [10:51<00:00,  2.88it/s]


Loss: 1.0720
Precision: 0.6709, Recall: 0.6614, F1-Score: 0.6620
              precision    recall  f1-score   support

           0       0.56      0.55      0.55      6000
           1       0.75      0.58      0.66      6000
           2       0.74      0.73      0.73      6000
           3       0.51      0.52      0.51      6000
           4       0.82      0.78      0.80      6000
           5       0.91      0.78      0.84      6000
           6       0.49      0.43      0.46      6000
           7       0.56      0.74      0.64      6000
           8       0.62      0.78      0.69      6000
           9       0.75      0.72      0.73      6000

    accuracy                           0.66     60000
   macro avg       0.67      0.66      0.66     60000
weighted avg       0.67      0.66      0.66     60000

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking squa

Evaluating: 100%|?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ?–ˆ| 1875/1875 [10:22<00:00,  3.01it/s]


Loss: 1.0314
Precision: 0.6755, Recall: 0.6711, F1-Score: 0.6696
              precision    recall  f1-score   support

           0       0.56      0.56      0.56      6000
           1       0.73      0.63      0.68      6000
           2       0.71      0.76      0.73      6000
           3       0.53      0.51      0.52      6000
           4       0.81      0.80      0.80      6000
           5       0.91      0.80      0.85      6000
           6       0.56      0.41      0.47      6000
           7       0.56      0.74      0.64      6000
           8       0.64      0.76      0.70      6000
           9       0.74      0.74      0.74      6000

    accuracy                           0.67     60000
   macro avg       0.68      0.67      0.67     60000
weighted avg       0.68      0.67      0.67     60000

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking squa