In [1]:
import os
import sys
sys.path.append("../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from utils.helper import ModelConfig, color_print
from utils.dataset_utils.load_dataset import (
    load_data,
)
from utils.model_utils.load_model import load_model
from utils.model_utils.evaluate import evaluate_model, get_sparsity
from utils.dataset_utils.sampling import SamplingDataset
from utils.prune_utils.prune import (
    prune_magnitude,
    prune_concern_identification,
    recover_tangling_identification,
)

In [3]:
name = "OSDG"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

checkpoint = None
model_config = ModelConfig(name, device)
num_labels = model_config.config["num_labels"]

model, tokenizer, checkpoint = load_model(model_config)

Loading the model.
{'model_name': 'sadickam/sdg-classification-bert', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'OSDG', 'num_labels': 16, 'cache_dir': 'Models'}
The model sadickam/sdg-classification-bert is loaded.


In [4]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    name, batch_size=32, num_workers=48
)

{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}
Loading cached dataset OSDG.
The dataset OSDG is loaded


In [5]:
i = 0
color_print("Start Time:" + datetime.now().strftime("%H:%M:%S"))
color_print("#Module " + str(i) + " in progress....")
num_samples = 64

positive_samples = SamplingDataset(
    train_dataloader, i, num_samples, num_labels, True, 4, device=device
)
negative_samples = SamplingDataset(
    train_dataloader, i, num_samples, num_labels, False, 4, device=device
)
all_samples = SamplingDataset(
    train_dataloader, 200, 20, num_labels, False, 4, device=device
)

Start Time:04:45:20
#Module 0 in progress....


In [6]:
print("Evaluate the original model")
# result = evaluate_model(model, model_config, test_dataloader)

Evaluate the original model


In [7]:
module = copy.deepcopy(model)
prune_magnitude(
    module, include_layers=["attention", "intermediate", "output"], sparsity_ratio=0.1
)

In [8]:
# result = evaluate_model(module, model_config, test_dataloader)
get_sparsity(module)

(0.09919858441371328,
 {'bert.encoder.layer.0.attention.self.query.weight': 0.09999932183159722,
  'bert.encoder.layer.0.attention.self.query.bias': 0.0,
  'bert.encoder.layer.0.attention.self.key.weight': 0.09999932183159722,
  'bert.encoder.layer.0.attention.self.key.bias': 0.0,
  'bert.encoder.layer.0.attention.self.value.weight': 0.09999932183159722,
  'bert.encoder.layer.0.attention.self.value.bias': 0.0,
  'bert.encoder.layer.0.attention.output.dense.weight': 0.09999932183159722,
  'bert.encoder.layer.0.attention.output.dense.bias': 0.0,
  'bert.encoder.layer.0.intermediate.dense.weight': 0.09999974568684895,
  'bert.encoder.layer.0.intermediate.dense.bias': 0.0,
  'bert.encoder.layer.0.output.dense.weight': 0.09999974568684895,
  'bert.encoder.layer.0.output.dense.bias': 0.0,
  'bert.encoder.layer.1.attention.self.query.weight': 0.09999932183159722,
  'bert.encoder.layer.1.attention.self.query.bias': 0.0,
  'bert.encoder.layer.1.attention.self.key.weight': 0.09999932183159722,
 

In [9]:
prune_concern_identification(
    model,
    module,
    positive_samples,
    include_layers=["attention", "intermediate", "output"],
    sparsity_ratio=0.5,
)

In [10]:
# result = evaluate_model(module, model_config, test_dataloader)
get_sparsity(module)

(0.49600633007311706,
 {'bert.encoder.layer.0.attention.self.query.weight': 0.5,
  'bert.encoder.layer.0.attention.self.query.bias': 0.0,
  'bert.encoder.layer.0.attention.self.key.weight': 0.5,
  'bert.encoder.layer.0.attention.self.key.bias': 0.0,
  'bert.encoder.layer.0.attention.self.value.weight': 0.5001339382595487,
  'bert.encoder.layer.0.attention.self.value.bias': 0.0,
  'bert.encoder.layer.0.attention.output.dense.weight': 0.5,
  'bert.encoder.layer.0.attention.output.dense.bias': 0.0,
  'bert.encoder.layer.0.intermediate.dense.weight': 0.5,
  'bert.encoder.layer.0.intermediate.dense.bias': 0.0,
  'bert.encoder.layer.0.output.dense.weight': 0.5,
  'bert.encoder.layer.0.output.dense.bias': 0.0,
  'bert.encoder.layer.1.attention.self.query.weight': 0.5,
  'bert.encoder.layer.1.attention.self.query.bias': 0.0,
  'bert.encoder.layer.1.attention.self.key.weight': 0.5,
  'bert.encoder.layer.1.attention.self.key.bias': 0.0,
  'bert.encoder.layer.1.attention.self.value.weight': 0.5,


In [11]:
recover_tangling_identification(
    model,
    module,
    negative_samples,
    recovery_ratio=0.1,
    include_layers=["attention", "intermediate", "output"],
)

In [12]:
result = evaluate_model(module, model_config, test_dataloader)
# save_module(module, "Modules/", "citi_osdg_40p.pt")
get_sparsity(module)

Evaluating: 100%|██████████| 200/200 [03:22<00:00,  1.01s/it]


Loss: 0.9497
Precision: 0.7684, Recall: 0.7689, F1-Score: 0.7640
              precision    recall  f1-score   support

           0       0.74      0.63      0.68       797
           1       0.86      0.67      0.75       775
           2       0.87      0.87      0.87       795
           3       0.88      0.79      0.83      1110
           4       0.84      0.81      0.82      1260
           5       0.89      0.69      0.78       882
           6       0.82      0.79      0.81       940
           7       0.46      0.56      0.51       473
           8       0.65      0.84      0.73       746
           9       0.55      0.72      0.62       689
          10       0.75      0.77      0.76       670
          11       0.68      0.76      0.72       312
          12       0.65      0.81      0.72       665
          13       0.83      0.85      0.84       314
          14       0.85      0.76      0.81       756
          15       0.97      0.96      0.97      1607

    accuracy   

(0.3968077456594038,
 {'bert.encoder.layer.0.attention.self.query.weight': 0.4000006781684028,
  'bert.encoder.layer.0.attention.self.query.bias': 0.0,
  'bert.encoder.layer.0.attention.self.key.weight': 0.4000006781684028,
  'bert.encoder.layer.0.attention.self.key.bias': 0.0,
  'bert.encoder.layer.0.attention.self.value.weight': 0.4001346164279514,
  'bert.encoder.layer.0.attention.self.value.bias': 0.0,
  'bert.encoder.layer.0.attention.output.dense.weight': 0.4000006781684028,
  'bert.encoder.layer.0.attention.output.dense.bias': 0.0,
  'bert.encoder.layer.0.intermediate.dense.weight': 0.40000025431315106,
  'bert.encoder.layer.0.intermediate.dense.bias': 0.0,
  'bert.encoder.layer.0.output.dense.weight': 0.40000025431315106,
  'bert.encoder.layer.0.output.dense.bias': 0.0,
  'bert.encoder.layer.1.attention.self.query.weight': 0.4000006781684028,
  'bert.encoder.layer.1.attention.self.query.bias': 0.0,
  'bert.encoder.layer.1.attention.self.key.weight': 0.4000006781684028,
  'bert.