In [1]:
import copy
import os
import sys
import torch
sys.path.append("../../../../")

In [2]:
import utils
from utils.model_utils.load_model import load_model
from utils.model_utils.evaluate import evaluate_model, get_sparsity
from utils.helper import ModelConfig
from utils.dataset_utils.load_dataset import load_data

In [3]:
name= "OSDG"
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
model_config = ModelConfig(name, device)

In [5]:
model, tokenizer, _ = load_model(model_config)

train_dataloader, valid_dataloader, test_dataloader = load_data(
        name, batch_size=32
)

Loading the model.
{'model_name': 'sadickam/sdg-classification-bert', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'OSDG', 'num_labels': 16, 'cache_dir': 'Models'}
The model sadickam/sdg-classification-bert is loaded.
{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}
Loading cached dataset OSDG.
The dataset OSDG is loaded


In [6]:
from utils.prune_utils.prune import prune_norm_distribution

In [7]:
prune_norm_distribution(model, include_layers=["attention", "intermediate", "output"], sparsity_ratio=0.4)

In [8]:
result = evaluate_model(model, model_config, test_dataloader)

Evaluating: 100%|██████████| 200/200 [02:37<00:00,  1.27it/s]


Loss: 0.9370
Precision: 0.7743, Recall: 0.7774, F1-Score: 0.7718
              precision    recall  f1-score   support

           0       0.76      0.65      0.70       797
           1       0.84      0.71      0.77       775
           2       0.88      0.88      0.88       795
           3       0.87      0.81      0.84      1110
           4       0.84      0.81      0.82      1260
           5       0.89      0.69      0.78       882
           6       0.86      0.77      0.81       940
           7       0.47      0.56      0.51       473
           8       0.66      0.84      0.74       746
           9       0.56      0.74      0.64       689
          10       0.75      0.78      0.76       670
          11       0.69      0.79      0.73       312
          12       0.68      0.81      0.74       665
          13       0.82      0.86      0.84       314
          14       0.85      0.78      0.81       756
          15       0.98      0.96      0.97      1607

    accuracy   

In [9]:
get_sparsity(model)

(0.3967962647635211,
 {'bert.encoder.layer.0.attention.self.query.weight': 0.4000006781684028,
  'bert.encoder.layer.0.attention.self.query.bias': 0.0,
  'bert.encoder.layer.0.attention.self.key.weight': 0.4000006781684028,
  'bert.encoder.layer.0.attention.self.key.bias': 0.0,
  'bert.encoder.layer.0.attention.self.value.weight': 0.4000006781684028,
  'bert.encoder.layer.0.attention.self.value.bias': 0.0,
  'bert.encoder.layer.0.attention.output.dense.weight': 0.4000006781684028,
  'bert.encoder.layer.0.attention.output.dense.bias': 0.0,
  'bert.encoder.layer.0.intermediate.dense.weight': 0.40000025431315106,
  'bert.encoder.layer.0.intermediate.dense.bias': 0.0,
  'bert.encoder.layer.0.output.dense.weight': 0.40000025431315106,
  'bert.encoder.layer.0.output.dense.bias': 0.0,
  'bert.encoder.layer.1.attention.self.query.weight': 0.4000006781684028,
  'bert.encoder.layer.1.attention.self.query.bias': 0.0,
  'bert.encoder.layer.1.attention.self.key.weight': 0.4000006781684028,
  'bert.