In [1]:
import os
import sys
sys.path.append("../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from utils.helper import ModelConfig, color_print
from utils.dataset_utils.load_dataset import (
    load_data,
)
from utils.model_utils.save_module import save_module
from utils.model_utils.load_model import load_model
from utils.model_utils.evaluate import evaluate_model, get_sparsity, similar
from utils.dataset_utils.sampling import SamplingDataset
from utils.prune_utils.prune import (
    prune_magnitude
)

In [3]:
name= "OSDG"
device = torch.device("cuda:0")
checkpoint = None
batch_size=32
num_workers=48
num_samples=16
magnitude_ratio=0.3
seed=44
include_layers=["attention", "intermediate", "output"]
exclude_layers=None

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-08-19 22:50:47


In [5]:
model_config = ModelConfig(name, device)
num_labels = model_config.config["num_labels"]
model, tokenizer, checkpoint = load_model(model_config)

Loading the model.




{'model_name': 'sadickam/sdg-classification-bert', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'OSDG', 'num_labels': 16, 'cache_dir': 'Models'}




The model sadickam/sdg-classification-bert is loaded.




In [6]:
train_dataloader, valid_dataloader, test_dataloader = load_data(
    name, batch_size=batch_size, num_workers=num_workers, do_cache=True, seed=seed
)

{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




In [7]:
all_samples = SamplingDataset(
    train_dataloader, 200, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
)

In [8]:
# print("Evaluate the original model")
# result = evaluate_model(model, model_config, test_dataloader)

In [9]:
# Evaluate the original model
# Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [03:16<00:00,  1.02it/s]
# Loss: 0.9485
# Precision: 0.7801, Recall: 0.7867, F1-Score: 0.7793
#               precision    recall  f1-score   support

#            0       0.77      0.66      0.71       797
#            1       0.84      0.72      0.78       775
#            2       0.88      0.87      0.88       795
#            3       0.87      0.83      0.85      1110
#            4       0.86      0.80      0.83      1260
#            5       0.88      0.69      0.77       882
#            6       0.85      0.80      0.83       940
#            7       0.49      0.61      0.54       473
#            8       0.66      0.85      0.74       746
#            9       0.62      0.73      0.67       689
#           10       0.75      0.79      0.77       670
#           11       0.62      0.81      0.70       312
#           12       0.73      0.81      0.77       665
#           13       0.83      0.85      0.84       314
#           14       0.85      0.78      0.81       756
#           15       0.97      0.98      0.97      1607

#     accuracy                           0.80     12791
#    macro avg       0.78      0.79      0.78     12791
# weighted avg       0.81      0.80      0.80     12791

In [10]:
module = copy.deepcopy(model)
prune_magnitude(module, sparsity_ratio=magnitude_ratio, include_layers=include_layers, exclude_layers=exclude_layers)
print("Evaluate the pruned model")
result = evaluate_model(model, model_config, test_dataloader)
# save_module(module, "Modules/", f"magnitude_{name}_{magnitude_ratio}p.pt")

Evaluate the pruned model




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<03:06,  1.07it/s]

Evaluating:   1%|          | 2/200 [00:01<02:06,  1.57it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:47,  1.84it/s]

Evaluating:   2%|?둞         | 4/200 [00:02<01:38,  1.99it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:32,  2.10it/s]

Evaluating:   3%|?둝         | 6/200 [00:03<01:29,  2.16it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:27,  2.20it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:25,  2.23it/s]

Evaluating:   4%|?둜         | 9/200 [00:04<01:24,  2.25it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:23,  2.26it/s]

Evaluating:   6%|?둛         | 11/200 [00:05<01:23,  2.27it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:22,  2.28it/s]

Evaluating:   6%|?둚         | 13/200 [00:06<01:21,  2.28it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:21,  2.28it/s]

Evaluating:   8%|?둙         | 15/200 [00:07<01:21,  2.26it/s]

Evaluating:   8%|?둙         | 16/200 [00:07<01:22,  2.22it/s]

Evaluating:   8%|?둙         | 17/200 [00:08<01:24,  2.15it/s]

Evaluating:   9%|?둘         | 18/200 [00:08<01:27,  2.07it/s]

Evaluating:  10%|?둘         | 19/200 [00:09<01:31,  1.98it/s]

Evaluating:  10%|?둗         | 20/200 [00:09<01:36,  1.87it/s]

Evaluating:  10%|?둗         | 21/200 [00:10<01:41,  1.76it/s]

Evaluating:  11%|?둗         | 22/200 [00:10<01:45,  1.69it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:11<01:47,  1.64it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:12<01:53,  1.55it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:13<01:59,  1.46it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:13<02:05,  1.39it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:14<02:18,  1.25it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:15<02:28,  1.16it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:16<02:34,  1.11it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:17<02:38,  1.07it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:18<02:40,  1.05it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:19<02:42,  1.03it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:20<02:43,  1.02it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:21<02:43,  1.02it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:22<02:43,  1.01it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:23<02:43,  1.01it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:24<02:42,  1.00it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:25<02:41,  1.00it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:26<02:41,  1.00s/it]

Evaluating:  20%|?둗?둗        | 40/200 [00:27<02:41,  1.01s/it]

Evaluating:  20%|?둗?둗        | 41/200 [00:29<02:50,  1.07s/it]

Evaluating:  21%|?둗?둗        | 42/200 [00:30<02:56,  1.12s/it]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:31<03:00,  1.15s/it]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:32<03:02,  1.17s/it]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:34<03:03,  1.18s/it]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:35<03:03,  1.19s/it]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:36<03:03,  1.20s/it]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:37<03:03,  1.21s/it]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:38<03:02,  1.21s/it]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:40<03:01,  1.21s/it]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:41<03:00,  1.21s/it]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:42<02:59,  1.21s/it]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:43<02:58,  1.21s/it]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:45<02:57,  1.22s/it]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:46<02:55,  1.21s/it]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:47<02:54,  1.21s/it]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:48<02:53,  1.21s/it]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:49<02:52,  1.22s/it]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:51<02:51,  1.22s/it]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:52<02:50,  1.22s/it]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:53<02:48,  1.21s/it]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:54<02:46,  1.21s/it]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:55<02:45,  1.21s/it]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:57<02:44,  1.21s/it]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:58<02:43,  1.21s/it]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:59<02:41,  1.21s/it]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [01:00<02:40,  1.21s/it]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [01:01<02:39,  1.20s/it]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [01:03<02:37,  1.21s/it]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [01:04<02:36,  1.21s/it]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [01:05<02:32,  1.18s/it]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [01:06<02:28,  1.16s/it]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [01:07<02:25,  1.15s/it]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [01:08<02:23,  1.14s/it]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [01:09<02:20,  1.13s/it]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [01:11<02:19,  1.12s/it]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [01:12<02:17,  1.12s/it]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [01:13<02:16,  1.12s/it]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [01:14<02:14,  1.11s/it]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [01:15<02:13,  1.11s/it]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [01:16<02:12,  1.11s/it]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [01:17<02:10,  1.11s/it]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [01:18<02:09,  1.11s/it]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [01:19<02:08,  1.11s/it]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [01:21<02:07,  1.11s/it]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [01:22<02:06,  1.11s/it]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [01:23<02:05,  1.11s/it]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [01:24<02:04,  1.11s/it]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [01:25<02:02,  1.11s/it]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [01:26<02:02,  1.11s/it]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [01:27<02:01,  1.11s/it]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [01:28<02:00,  1.11s/it]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [01:29<01:59,  1.11s/it]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [01:31<01:58,  1.11s/it]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [01:32<01:57,  1.11s/it]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [01:33<01:55,  1.12s/it]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [01:34<01:54,  1.11s/it]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [01:35<01:53,  1.11s/it]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [01:36<01:52,  1.11s/it]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:37<01:51,  1.12s/it]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:38<01:50,  1.12s/it]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:39<01:49,  1.12s/it]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:41<01:48,  1.12s/it]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:42<01:47,  1.12s/it]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:43<01:46,  1.12s/it]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:44<01:50,  1.18s/it]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:46<01:56,  1.26s/it]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:47<02:00,  1.31s/it]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:48<02:02,  1.35s/it]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:50<02:03,  1.38s/it]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:51<02:04,  1.40s/it]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:53<02:03,  1.41s/it]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:54<02:03,  1.42s/it]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:56<02:02,  1.43s/it]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:57<01:28,  1.03s/it]




KeyboardInterrupt: 

In [None]:
for concern in range(num_labels):
    print(f"--{concern}--")
    positive_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, True, 4, device=device, resample=False, seed=seed
    )
    negative_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
    )
    similar(model, module, valid_dataloader, concern, num_samples, num_labels, device=device, seed=seed)

In [None]:
get_sparsity(module)