In [1]:
import copy
import os.path
import sys

In [2]:
pwd = os.getcwd()
sys.path.append(os.path.dirname(pwd))

In [3]:
from utils.model_utils.evaluate import evaluate_model
from utils.model_utils.load_model import *
from utils.model_utils.model_config import ModelConfig
from utils.dataset_utils.load_dataset import load_data
from utils.decompose_utils.weight_remover import WeightRemoverBert
from utils.decompose_utils.concern_identification import ConcernIdentificationBert
from utils.decompose_utils.tangling_identification import TanglingIdentification
from transformers import AutoConfig
from utils.model_utils.save_module import save_module
from datetime import datetime
from utils.decompose_utils.concern_modularization import ConcernModularizationBert
from utils.decompose_utils.sampling import sampling_class
from utils.dataset_utils.load_dataset import convert_dataset_labels_to_binary, extract_and_convert_dataloader
import torch

In [4]:
model_name = "sadickam/sdg-classification-bert"
model_type = "pretrained"
data = "OSDG"
num_labels = 16


# model_name = "textattack/bert-base-uncased-imdb"
# model_type = "pretrained"
# data = "IMDb"
# num_labels = 2

# model_name = "fabriceyhc/bert-base-uncased-yahoo_answers_topics"
# model_type = "pretrained"
# data = "Yahoo"
# num_labels = 10

In [5]:
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")

In [6]:
checkpoint_name = None
config = AutoConfig.from_pretrained(model_name, num_labels=num_labels)
model_config = ModelConfig(
    _model_name=model_name,
    _model_type=model_type,
    _data=data,
    _transformer_config=config,
    _checkpoint_name=checkpoint_name,
    _device=device,
)

In [7]:
def pppp(module1, ci1, ti1, model_config):
    

In [8]:
for i in range(num_labels):
    model, tokenizer, checkpoint = load_classification_model(model_config, train_mode=False)

    train_dataloader, valid_dataloader, test_dataloader = load_data(
        model_config, batch_size=32, test_size=0.3
    )
    print("Start Time:" + datetime.now().strftime("%H:%M:%S"))
    print("#Module " + str(i) + " in progress....")
    num_samples = 64
    
    positive_samples = sampling_class(
        train_dataloader, i, num_samples, num_labels, True, 4, device=device
    )
    negative_samples = sampling_class(
        train_dataloader, i, num_samples, num_labels, False, 4, device=device
    )
    
    all_samples = sampling_class(
        train_dataloader, 200, 20, num_labels, False, 4, device=device
    )
    
    module1 = copy.deepcopy(model)
    w = WeightRemoverBert(model, p=0.9)
    ci1 = ConcernIdentificationBert(model, p=0.4)
    ti1 = TanglingIdentification(model, p=0.5)
    
    ff1 = [
        [torch.sum(model.bert.encoder.layer[num].intermediate.dense.weight != 0).item()]
        for num in range(config.num_hidden_layers)
    ]
    ff2 = [
        [torch.sum(model.bert.encoder.layer[num].output.dense.weight != 0).item()]
        for num in range(config.num_hidden_layers)
    ]
    pooler = [torch.sum(model.bert.pooler.dense.weight != 0).item()]
    classifier = [torch.sum(model.classifier.weight != 0).item()]
    print("origin")
    # result = evaluate_model(model, model_config, test_dataloader)
    
    print("Start Positive CI sparse")
    
    for batch in all_samples:
        input_ids, attn_mask, _, total_sampled = batch
        with torch.no_grad():
            t1 = w.propagate(module1, input_ids)
        for num in range(config.num_hidden_layers):
            ff1[num].append(
                torch.sum(
                    module1.bert.encoder.layer[num].intermediate.dense.weight != 0
                ).item()
            )
            ff2[num].append(
                torch.sum(
                    module1.bert.encoder.layer[num].output.dense.weight != 0
                ).item()
            )
        pooler.append(torch.sum(module1.bert.pooler.dense.weight != 0).item())
        classifier.append(torch.sum(module1.classifier.weight != 0).item())
    
        # result = evaluate_model(module1, model_config, test_dataloader)
    
    print("Start Positive CI after sparse")
    
    for batch in positive_samples:
        input_ids, attn_mask, _, total_sampled = batch
        with torch.no_grad():
            t1 = ci1.propagate(module1, input_ids)
        for num in range(config.num_hidden_layers):
            ff1[num].append(
                torch.sum(
                    module1.bert.encoder.layer[num].intermediate.dense.weight != 0
                ).item()
            )
            ff2[num].append(
                torch.sum(
                    module1.bert.encoder.layer[num].output.dense.weight != 0
                ).item()
            )
        pooler.append(torch.sum(module1.bert.pooler.dense.weight != 0).item())
        classifier.append(torch.sum(module1.classifier.weight != 0).item())
    
        # result = evaluate_model(module1, model_config, test_dataloader)
    
    print("Start Negative TI")
    
    for batch in negative_samples:
        input_ids, attn_mask, _, total_sampled = batch
        with torch.no_grad():
            t = ti1.propagate(module1, input_ids)
        for num in range(config.num_hidden_layers):
            ff1[num].append(
                torch.sum(
                    module1.bert.encoder.layer[num].intermediate.dense.weight != 0
                ).item()
            )
            ff2[num].append(
                torch.sum(
                    module1.bert.encoder.layer[num].output.dense.weight != 0
                ).item()
            )
        pooler.append(torch.sum(module1.bert.pooler.dense.weight != 0).item())
        classifier.append(torch.sum(module1.classifier.weight != 0).item())
        # result = evaluate_model(module1, model_config, test_dataloader)

    collected_input_ids = []
    collected_attention_mask = []
    collected_labels = []
    count = 0
    
    for batch in test_dataloader:
        if count >= 100:
            break
    
        input_ids = batch["input_ids"].to(model_config.device)
        attention_mask = batch["attention_mask"].to(model_config.device)
        labels = batch["labels"].to(model_config.device)
    
        # Add data to lists
        collected_input_ids.append(input_ids)
        collected_attention_mask.append(attention_mask)
        collected_labels.append(labels)
    
        # Increment the count by the batch size
        count += input_ids.size(0)
    
    converted_train_dataloader = convert_dataset_labels_to_binary(train_dataloader, i, True)
    converted_valid_dataloader = convert_dataset_labels_to_binary(valid_dataloader, i, True)
    converted_test_dataloader = convert_dataset_labels_to_binary(test_dataloader, i, True)
    
    module2 = pppp(module1, ci1, ti1, model_config)
    result = evaluate_model(module2, model_config, converted_test_dataloader)


Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:47:07
#Module 0 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[1, 2, 7, 9, 10, 12, 13, 14, 15]
[0]
[0, 12, 10, 0, 0, 0, 0, 11, 0, 16, 14, 0, 8, 10, 8, 16]
[16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


Evaluating: 100%|██████████| 50/50 [00:10<00:00,  4.62it/s]


Loss: 0.5246
Precision: 0.8194, Recall: 0.7742, F1-Score: 0.7659
              precision    recall  f1-score   support

           0       0.94      0.59      0.72       797
           1       0.70      0.96      0.81       797

    accuracy                           0.77      1594
   macro avg       0.82      0.77      0.77      1594
weighted avg       0.82      0.77      0.77      1594

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:47:48
#Module 1 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 2, 7, 8, 9, 10, 12, 15]
[1, 13]
[15, 0, 8, 0, 0, 0, 0, 11, 9, 16, 14, 0, 11, 0, 0, 16]
[0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 0, 0]


Evaluating: 100%|██████████| 49/49 [00:11<00:00,  4.36it/s]


Loss: 0.4987
Precision: 0.8162, Recall: 0.8155, F1-Score: 0.8154
              precision    recall  f1-score   support

           0       0.83      0.79      0.81       775
           1       0.80      0.84      0.82       775

    accuracy                           0.82      1550
   macro avg       0.82      0.82      0.82      1550
weighted avg       0.82      0.82      0.82      1550

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:48:30
#Module 2 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 7, 8, 9, 10, 12, 15]
[2]
[16, 10, 0, 0, 0, 0, 0, 12, 10, 16, 15, 0, 8, 0, 0, 16]
[0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


Evaluating: 100%|██████████| 50/50 [00:11<00:00,  4.36it/s]


Loss: 0.3963
Precision: 0.8417, Recall: 0.8220, F1-Score: 0.8194
              precision    recall  f1-score   support

           0       0.92      0.70      0.80       795
           1       0.76      0.94      0.84       795

    accuracy                           0.82      1590
   macro avg       0.84      0.82      0.82      1590
weighted avg       0.84      0.82      0.82      1590

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:49:07
#Module 3 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 2, 5, 7, 8, 9, 10, 12, 14, 15]
[3]
[16, 10, 14, 0, 0, 8, 0, 10, 9, 16, 14, 0, 10, 0, 8, 16]
[0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


Evaluating: 100%|██████████| 70/70 [00:16<00:00,  4.30it/s]


Loss: 0.3878
Precision: 0.8526, Recall: 0.8441, F1-Score: 0.8432
              precision    recall  f1-score   support

           0       0.91      0.77      0.83      1110
           1       0.80      0.92      0.86      1110

    accuracy                           0.84      2220
   macro avg       0.85      0.84      0.84      2220
weighted avg       0.85      0.84      0.84      2220

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:49:50
#Module 4 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 7, 8, 9, 10, 12, 15]
[2, 4]
[16, 9, 0, 0, 0, 0, 0, 15, 9, 16, 14, 0, 9, 0, 0, 16]
[0, 0, 11, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


Evaluating: 100%|██████████| 79/79 [00:18<00:00,  4.24it/s]


Loss: 0.6225
Precision: 0.7861, Recall: 0.7754, F1-Score: 0.7733
              precision    recall  f1-score   support

           0       0.84      0.68      0.75      1260
           1       0.73      0.87      0.80      1260

    accuracy                           0.78      2520
   macro avg       0.79      0.78      0.77      2520
weighted avg       0.79      0.78      0.77      2520

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:50:38
#Module 5 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 2, 7, 9, 10, 12, 15]
[5]
[16, 11, 11, 0, 0, 0, 0, 10, 0, 16, 15, 0, 9, 0, 0, 16]
[0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


Evaluating: 100%|██████████| 56/56 [00:12<00:00,  4.31it/s]


Loss: 0.4209
Precision: 0.8236, Recall: 0.8152, F1-Score: 0.8140
              precision    recall  f1-score   support

           0       0.88      0.73      0.80       882
           1       0.77      0.90      0.83       882

    accuracy                           0.82      1764
   macro avg       0.82      0.82      0.81      1764
weighted avg       0.82      0.82      0.81      1764

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:51:21
#Module 6 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 7, 8, 9, 10, 12, 15]
[11, 6]
[15, 11, 0, 0, 0, 0, 0, 14, 14, 16, 13, 0, 12, 0, 0, 16]
[0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 9, 0, 0, 0, 0]


Evaluating: 100%|██████████| 59/59 [00:13<00:00,  4.26it/s]


Loss: 0.4260
Precision: 0.8386, Recall: 0.8314, F1-Score: 0.8305
              precision    recall  f1-score   support

           0       0.89      0.76      0.82       940
           1       0.79      0.90      0.84       940

    accuracy                           0.83      1880
   macro avg       0.84      0.83      0.83      1880
weighted avg       0.84      0.83      0.83      1880

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:52:02
#Module 7 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 8, 9, 10, 12, 15]
[7]
[16, 0, 0, 0, 0, 0, 0, 0, 10, 16, 14, 0, 8, 0, 0, 16]
[0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 0, 0]


Evaluating: 100%|██████████| 30/30 [00:06<00:00,  4.29it/s]


Loss: 0.5624
Precision: 0.7400, Recall: 0.7135, F1-Score: 0.7054
              precision    recall  f1-score   support

           0       0.82      0.55      0.66       473
           1       0.66      0.88      0.75       473

    accuracy                           0.71       946
   macro avg       0.74      0.71      0.71       946
weighted avg       0.74      0.71      0.71       946

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:52:38
#Module 8 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 7, 9, 10, 15]
[8]
[16, 9, 0, 0, 0, 0, 0, 12, 0, 16, 16, 0, 0, 0, 0, 16]
[0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0, 0, 0]


Evaluating: 100%|██████████| 47/47 [00:10<00:00,  4.29it/s]


Loss: 0.4757
Precision: 0.8347, Recall: 0.7909, F1-Score: 0.7838
              precision    recall  f1-score   support

           0       0.96      0.61      0.74       746
           1       0.71      0.97      0.82       746

    accuracy                           0.79      1492
   macro avg       0.83      0.79      0.78      1492
weighted avg       0.83      0.79      0.78      1492

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:53:19
#Module 9 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 2, 7, 10, 12, 15]
[8, 9]
[16, 0, 8, 0, 0, 0, 0, 13, 0, 0, 13, 0, 9, 0, 0, 16]
[0, 0, 0, 0, 0, 0, 0, 0, 9, 16, 0, 0, 0, 0, 0, 0]


Evaluating: 100%|██████████| 44/44 [00:10<00:00,  4.36it/s]


Loss: 0.6339
Precision: 0.7874, Recall: 0.7199, F1-Score: 0.7024
              precision    recall  f1-score   support

           0       0.93      0.48      0.63       689
           1       0.65      0.96      0.77       689

    accuracy                           0.72      1378
   macro avg       0.79      0.72      0.70      1378
weighted avg       0.79      0.72      0.70      1378

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:53:56
#Module 10 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 2, 8, 9, 12, 15]
[10]
[16, 9, 8, 0, 0, 0, 0, 0, 11, 16, 0, 0, 10, 0, 0, 16]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 0, 0]


Evaluating: 100%|██████████| 42/42 [00:09<00:00,  4.26it/s]


Loss: 0.4882
Precision: 0.8035, Recall: 0.7828, F1-Score: 0.7791
              precision    recall  f1-score   support

           0       0.88      0.65      0.75       670
           1       0.72      0.91      0.81       670

    accuracy                           0.78      1340
   macro avg       0.80      0.78      0.78      1340
weighted avg       0.80      0.78      0.78      1340

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:54:37
#Module 11 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 7, 9, 10, 15]
[2, 8, 11, 12, 13, 14]
[16, 10, 0, 0, 0, 0, 0, 13, 0, 16, 12, 0, 0, 0, 0, 16]
[0, 0, 8, 0, 0, 0, 0, 0, 14, 0, 0, 16, 8, 10, 9, 0]


Evaluating: 100%|██████████| 20/20 [00:04<00:00,  4.37it/s]


Loss: 0.5731
Precision: 0.8243, Recall: 0.7949, F1-Score: 0.7901
              precision    recall  f1-score   support

           0       0.92      0.64      0.76       312
           1       0.73      0.95      0.82       312

    accuracy                           0.79       624
   macro avg       0.82      0.79      0.79       624
weighted avg       0.82      0.79      0.79       624

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:55:18
#Module 12 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 7, 8, 9, 10, 15]
[13, 12, 5, 14]
[16, 12, 0, 0, 0, 0, 0, 9, 14, 16, 15, 0, 0, 0, 0, 16]
[0, 0, 0, 0, 0, 13, 0, 0, 0, 0, 0, 0, 16, 13, 10, 0]


Evaluating: 100%|██████████| 42/42 [00:09<00:00,  4.28it/s]


Loss: 0.6199
Precision: 0.8446, Recall: 0.8391, F1-Score: 0.8385
              precision    recall  f1-score   support

           0       0.89      0.78      0.83       665
           1       0.80      0.90      0.85       665

    accuracy                           0.84      1330
   macro avg       0.84      0.84      0.84      1330
weighted avg       0.84      0.84      0.84      1330

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:55:59
#Module 13 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 2, 7, 9, 10, 12, 14, 15]
[8, 13, 5]
[16, 13, 8, 0, 0, 0, 0, 10, 0, 16, 11, 0, 10, 0, 8, 16]
[0, 0, 0, 0, 0, 11, 0, 0, 9, 0, 0, 0, 0, 16, 0, 0]


Evaluating: 100%|██████████| 20/20 [00:04<00:00,  4.38it/s]


Loss: 0.4293
Precision: 0.8486, Recall: 0.8248, F1-Score: 0.8218
              precision    recall  f1-score   support

           0       0.94      0.69      0.80       314
           1       0.76      0.96      0.85       314

    accuracy                           0.82       628
   macro avg       0.85      0.82      0.82       628
weighted avg       0.85      0.82      0.82       628

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:56:31
#Module 14 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 1, 5, 7, 8, 9, 10, 13, 15]
[12, 14]
[15, 10, 0, 0, 0, 8, 0, 11, 9, 16, 14, 0, 0, 8, 0, 16]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 16, 0]


Evaluating: 100%|██████████| 48/48 [00:11<00:00,  4.33it/s]


Loss: 0.5449
Precision: 0.8415, Recall: 0.8320, F1-Score: 0.8308
              precision    recall  f1-score   support

           0       0.90      0.75      0.82       756
           1       0.78      0.92      0.84       756

    accuracy                           0.83      1512
   macro avg       0.84      0.83      0.83      1512
weighted avg       0.84      0.83      0.83      1512

Directory /home/Minwoo/LESN/Decompose/DecomposeBERT/Models/Configs/pretrained/sadickam/sdg-classification-bert exists.
Loading the model.
Start Time:15:57:13
#Module 15 in progress....
origin
Start Positive CI sparse
Start Positive CI after sparse
Start Negative TI
[0, 2, 4, 7, 9, 10, 12, 14]
[13, 15]
[15, 0, 10, 0, 10, 0, 0, 11, 0, 16, 15, 0, 14, 0, 12, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 16]


Evaluating: 100%|██████████| 101/101 [00:23<00:00,  4.22it/s]

Loss: 0.1580
Precision: 0.9719, Recall: 0.9711, F1-Score: 0.9711
              precision    recall  f1-score   support

           0       0.99      0.95      0.97      1607
           1       0.95      0.99      0.97      1607

    accuracy                           0.97      3214
   macro avg       0.97      0.97      0.97      3214
weighted avg       0.97      0.97      0.97      3214




