In [1]:
import os
import sys
sys.path.append("../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from utils.helper import ModelConfig, color_print
from utils.dataset_utils.load_dataset import (
    load_data,
)
from utils.model_utils.load_model import load_model
from utils.model_utils.save_module import save_module
from utils.model_utils.evaluate import evaluate_model, get_sparsity, similar
from utils.dataset_utils.sampling import SamplingDataset
from utils.prune_utils.prune import (
    prune_magnitude,
    prune_concern_identification,
    recover_tangling_identification,
)

In [3]:
name = "IMDB"
device = torch.device("cuda:0")
checkpoint = None
batch_size=32
num_workers=48
num_samples=16
concern=0
magnitude_ratio=0.1
ci_ratio=0.4
ti_ratio=0.1
include_layers=["attention", "intermediate", "output"]

In [4]:
model_config = ModelConfig(name, device)
num_labels = model_config.config["num_labels"]
model, tokenizer, checkpoint = load_model(model_config)

Loading the model.
{'model_name': 'textattack/bert-base-uncased-imdb', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'IMDB', 'num_labels': 2, 'cache_dir': 'Models'}
The model textattack/bert-base-uncased-imdb is loaded.


In [5]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    name, batch_size=batch_size, num_workers=num_workers
)

{'dataset_name': 'IMDB', 'path': 'imdb', 'config_name': 'plain_text', 'text_column': 'text', 'label_column': 'label', 'cache_dir': 'Datasets/IMDB', 'task_type': 'classification'}
Loading cached dataset IMDB.
The dataset IMDB is loaded


In [6]:
positive_samples = SamplingDataset(
    train_dataloader, concern, num_samples, num_labels, True, 4, device=device, resample=False
)
negative_samples = SamplingDataset(
    train_dataloader, concern, num_samples, num_labels, False, 4, device=device, resample=False
)
all_samples = SamplingDataset(
    train_dataloader, 200, num_samples, num_labels, False, 4, device=device, resample=False
)

In [7]:
# print("Evaluate the original model")
# result = evaluate_model(model, model_config, test_dataloader)

In [8]:
# Evaluate the original model
# Evaluating: 100%|█████████████████████████████████████████████████████████████████████| 782/782 [05:36<00:00,  2.32it/s]
# Loss: 0.3423
# Precision: 0.9306, Recall: 0.9303, F1-Score: 0.9303
#               precision    recall  f1-score   support

#            0       0.92      0.94      0.93     12500
#            1       0.94      0.92      0.93     12500

#     accuracy                           0.93     25000
#    macro avg       0.93      0.93      0.93     25000
# weighted avg       0.93      0.93      0.93     25000

In [9]:
module = copy.deepcopy(model)
prune_magnitude(
    module, include_layers=include_layers, sparsity_ratio=magnitude_ratio
)

In [10]:
prune_concern_identification(
    model,
    module,
    model_config,
    positive_samples,
    negative_samples,
    include_layers=include_layers,
    sparsity_ratio=ci_ratio,
)

In [11]:
print("Evaluate the pruned model")
result = evaluate_model(module, model_config, test_dataloader)
# get_sparsity(module)

Evaluate the pruned model


Evaluating: 100%|██████████| 782/782 [06:03<00:00,  2.15it/s]


Loss: 0.7852
Precision: 0.7879, Recall: 0.6539, F1-Score: 0.6083
              precision    recall  f1-score   support

           0       0.59      1.00      0.74     12500
           1       0.98      0.31      0.47     12500

    accuracy                           0.65     25000
   macro avg       0.79      0.65      0.61     25000
weighted avg       0.79      0.65      0.61     25000



In [12]:
similar(model, module, valid_dataloader, concern, num_samples, num_labels, device=device)

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
CCA coefficients mean: (0.5433493594006527, 0.5433493594006527)
CCA coefficients sum: (368.0388546534932, 368.0388546534932)
CCA coefficients mean: (0.5451780714295638, 0.5451780714295638)
CCA coefficients sum: (369.8632166900369, 369.8632166900369)
0.5670135844400125
0.26540227562591734


In [13]:
recover_tangling_identification(
    model,
    module,
    model_config,
    negative_samples,
    recovery_ratio=ti_ratio,
    include_layers=include_layers,
)

In [14]:
result = evaluate_model(module, model_config, test_dataloader)

Evaluating: 100%|██████████| 782/782 [06:13<00:00,  2.09it/s]


Loss: 0.3230
Precision: 0.9270, Recall: 0.9259, F1-Score: 0.9258
              precision    recall  f1-score   support

           0       0.91      0.95      0.93     12500
           1       0.95      0.90      0.92     12500

    accuracy                           0.93     25000
   macro avg       0.93      0.93      0.93     25000
weighted avg       0.93      0.93      0.93     25000



In [15]:
similar(model, module, valid_dataloader, concern, num_samples, num_labels, device=device)

adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
adding eps to diagonal and taking inverse
taking square root
dot products...
trying to take final svd
computed everything!
CCA coefficients mean: (0.8291382499619583, 0.8291382499619583)
CCA coefficients sum: (627.7335690551771, 627.7335690551771)
CCA coefficients mean: (0.8217907297861077, 0.8217907297861077)
CCA coefficients sum: (621.7892149505208, 621.7892149505208)
0.9522767673558088
0.9232366924736657


In [16]:
# get_sparsity(module)

In [17]:
# save_module(module, "Modules/", f"citi_{name}_{ci_ratio-ti_ratio}p.pt")

In [18]:
        # importance_score = torch.abs(current_weight) * torch.abs((y).reshape((-1, 1)))
