In [1]:
import os
import sys
sys.path.append("../../../../")
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import copy
import torch
from datetime import datetime
from utils.helper import ModelConfig, color_print
from utils.dataset_utils.load_dataset import (
    load_data,
)
from utils.model_utils.load_model import load_model
from utils.model_utils.save_module import save_module
from utils.model_utils.evaluate import evaluate_model, get_sparsity, similar
from utils.dataset_utils.sampling import SamplingDataset
from utils.prune_utils.prune import (
    prune_concern_identification,
    recover_tangling_identification,
)

In [3]:
name = "OSDG"
device = torch.device("cuda:0")
checkpoint = None
batch_size=32
num_workers=48
num_samples=16
ci_ratio=0.3
seed=44
include_layers=["attention", "intermediate", "output"]
exclude_layers=None

In [4]:
script_start_time = datetime.now()
print(f"Script started at: {script_start_time.strftime('%Y-%m-%d %H:%M:%S')}")

Script started at: 2024-08-19 18:41:06


In [5]:
model_config = ModelConfig(name, device)
num_labels = model_config.config["num_labels"]
model, tokenizer, checkpoint = load_model(model_config)

Loading the model.




{'model_name': 'sadickam/sdg-classification-bert', 'task_type': 'classification', 'architectures': 'bert', 'dataset_name': 'OSDG', 'num_labels': 16, 'cache_dir': 'Models'}




The model sadickam/sdg-classification-bert is loaded.




In [6]:
# print("Evaluate the original model")
# result = evaluate_model(model, model_config, test_dataloader)

In [7]:
# Evaluate the original model
# Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [03:16<00:00,  1.02it/s]
# Loss: 0.9485
# Precision: 0.7801, Recall: 0.7867, F1-Score: 0.7793
#               precision    recall  f1-score   support

#            0       0.77      0.66      0.71       797
#            1       0.84      0.72      0.78       775
#            2       0.88      0.87      0.88       795
#            3       0.87      0.83      0.85      1110
#            4       0.86      0.80      0.83      1260
#            5       0.88      0.69      0.77       882
#            6       0.85      0.80      0.83       940
#            7       0.49      0.61      0.54       473
#            8       0.66      0.85      0.74       746
#            9       0.62      0.73      0.67       689
#           10       0.75      0.79      0.77       670
#           11       0.62      0.81      0.70       312
#           12       0.73      0.81      0.77       665
#           13       0.83      0.85      0.84       314
#           14       0.85      0.78      0.81       756
#           15       0.97      0.98      0.97      1607

#     accuracy                           0.80     12791
#    macro avg       0.78      0.79      0.78     12791
# weighted avg       0.81      0.80      0.80     12791

In [8]:
for concern in range(num_labels):
    train_dataloader, valid_dataloader, test_dataloader = load_data(
    name, batch_size=batch_size, num_workers=num_workers, do_cache=True
    )
    
    positive_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, True, 4, device=device, resample=False, seed=seed
    )
    negative_samples = SamplingDataset(
        train_dataloader, concern, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
    )
    all_samples = SamplingDataset(
        train_dataloader, 200, num_samples, num_labels, False, 4, device=device, resample=False, seed=seed
    )
    
    module = copy.deepcopy(model)
    
    prune_concern_identification(
        module,
        model_config,
        positive_samples,
        negative_samples,
        include_layers=include_layers,
        exclude_layers=exclude_layers,
        sparsity_ratio=ci_ratio,
    )
    
    print(f"Evaluate the pruned model {concern}")
    result = evaluate_model(module, model_config, test_dataloader)
    get_sparsity(module)
    
    similar(model, module, valid_dataloader, concern, num_samples, num_labels, device=device, seed=seed)
    
    # save_module(module, "Modules/", f"ci_{name}_{ci_ratio}p.pt")

{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 0




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:01<03:45,  1.13s/it]

Evaluating:   1%|          | 2/200 [00:02<04:02,  1.22s/it]

Evaluating:   2%|?둞         | 3/200 [00:03<03:44,  1.14s/it]

Evaluating:   2%|?둞         | 4/200 [00:04<03:37,  1.11s/it]

Evaluating:   2%|?둝         | 5/200 [00:05<03:39,  1.13s/it]

Evaluating:   3%|?둝         | 6/200 [00:06<03:33,  1.10s/it]

Evaluating:   4%|?둝         | 7/200 [00:07<03:29,  1.09s/it]

Evaluating:   4%|?둜         | 8/200 [00:08<03:35,  1.12s/it]

Evaluating:   4%|?둜         | 9/200 [00:10<03:30,  1.10s/it]

Evaluating:   5%|?둛         | 10/200 [00:11<03:27,  1.09s/it]

Evaluating:   6%|?둛         | 11/200 [00:12<03:25,  1.09s/it]

Evaluating:   6%|?둛         | 12/200 [00:13<03:22,  1.08s/it]

Evaluating:   6%|?둚         | 13/200 [00:14<03:20,  1.07s/it]

Evaluating:   7%|?둚         | 14/200 [00:15<03:25,  1.10s/it]

Evaluating:   8%|?둙         | 15/200 [00:16<03:09,  1.03s/it]

Evaluating:   8%|?둙         | 16/200 [00:17<02:51,  1.07it/s]

Evaluating:   8%|?둙         | 17/200 [00:17<02:38,  1.15it/s]

Evaluating:   9%|?둘         | 18/200 [00:18<02:29,  1.22it/s]

Evaluating:  10%|?둘         | 19/200 [00:19<02:22,  1.27it/s]

Evaluating:  10%|?둗         | 20/200 [00:19<02:18,  1.30it/s]

Evaluating:  10%|?둗         | 21/200 [00:20<02:14,  1.33it/s]

Evaluating:  11%|?둗         | 22/200 [00:21<02:12,  1.34it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:22<02:10,  1.36it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:22<02:08,  1.37it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:23<02:07,  1.37it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:24<02:05,  1.38it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:24<02:04,  1.39it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:25<02:03,  1.39it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:26<02:02,  1.39it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:27<02:02,  1.39it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:27<02:01,  1.39it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:28<02:00,  1.39it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:29<02:00,  1.39it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:29<01:59,  1.39it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:30<01:58,  1.39it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:31<01:58,  1.39it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:32<01:57,  1.39it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:32<01:56,  1.39it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:33<01:55,  1.39it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:34<01:55,  1.39it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:35<01:54,  1.39it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:35<01:53,  1.39it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:36<01:52,  1.39it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:37<01:52,  1.39it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:37<01:51,  1.39it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:38<01:50,  1.39it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:39<01:50,  1.39it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:40<01:49,  1.39it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:40<01:48,  1.39it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:41<01:47,  1.39it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:42<01:47,  1.39it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:42<01:46,  1.39it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:43<01:45,  1.39it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:44<01:45,  1.39it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:45<01:44,  1.39it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:45<01:43,  1.39it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:46<01:42,  1.39it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:47<01:42,  1.39it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:47<01:41,  1.39it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:48<01:40,  1.39it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:49<01:40,  1.38it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:50<01:39,  1.39it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:50<01:39,  1.38it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:51<01:38,  1.38it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:52<01:37,  1.38it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:53<01:36,  1.39it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:53<01:35,  1.39it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:54<01:35,  1.39it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:55<01:34,  1.39it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:55<01:33,  1.39it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:56<01:33,  1.37it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:57<01:36,  1.33it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:58<01:39,  1.28it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:59<01:49,  1.15it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [01:00<01:55,  1.08it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [01:01<02:06,  1.02s/it]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [01:02<02:07,  1.03s/it]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [01:03<02:07,  1.04s/it]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [01:04<02:07,  1.05s/it]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [01:05<02:07,  1.06s/it]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [01:07<02:06,  1.06s/it]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [01:08<02:07,  1.08s/it]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [01:09<02:13,  1.14s/it]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [01:10<02:08,  1.11s/it]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [01:11<02:06,  1.10s/it]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [01:12<01:56,  1.02s/it]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [01:13<01:45,  1.08it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [01:13<01:38,  1.14it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [01:14<01:33,  1.19it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [01:15<01:29,  1.23it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [01:16<01:26,  1.26it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [01:16<01:24,  1.28it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [01:17<01:22,  1.30it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [01:18<01:21,  1.31it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [01:19<01:19,  1.32it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [01:19<01:18,  1.32it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [01:20<01:17,  1.33it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [01:21<01:16,  1.33it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [01:22<01:16,  1.33it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:22<01:15,  1.32it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:23<01:14,  1.33it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:24<01:13,  1.34it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:25<01:12,  1.34it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:25<01:11,  1.34it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:26<01:10,  1.35it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:27<01:09,  1.35it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:28<01:08,  1.35it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:28<01:08,  1.35it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:29<01:07,  1.35it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:30<01:06,  1.35it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:30<01:05,  1.35it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:31<01:05,  1.35it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:32<01:04,  1.35it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:33<01:03,  1.35it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:33<01:02,  1.35it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:34<01:02,  1.35it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:35<01:01,  1.35it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:36<01:00,  1.35it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:36<00:59,  1.35it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:37<00:59,  1.35it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:38<00:58,  1.35it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:39<00:57,  1.35it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:39<00:57,  1.35it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:40<00:56,  1.35it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:41<00:55,  1.35it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:42<00:54,  1.35it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:42<00:53,  1.35it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:43<00:53,  1.35it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:44<00:52,  1.35it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:45<00:51,  1.35it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:45<00:50,  1.35it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:46<00:50,  1.35it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:47<00:49,  1.35it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:48<00:48,  1.35it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:48<00:48,  1.35it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:49<00:47,  1.35it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:50<00:46,  1.35it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:50<00:45,  1.35it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:51<00:45,  1.35it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:52<00:44,  1.35it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:53<00:43,  1.35it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:53<00:42,  1.35it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:54<00:42,  1.35it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:55<00:41,  1.35it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:56<00:40,  1.35it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:56<00:39,  1.35it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:57<00:39,  1.35it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:58<00:38,  1.35it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:59<00:37,  1.35it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:59<00:36,  1.35it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [02:00<00:36,  1.35it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [02:01<00:35,  1.35it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [02:02<00:34,  1.35it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [02:02<00:34,  1.35it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [02:03<00:33,  1.35it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [02:04<00:32,  1.35it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [02:05<00:31,  1.35it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [02:05<00:31,  1.35it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [02:06<00:30,  1.36it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [02:07<00:29,  1.36it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [02:07<00:28,  1.36it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [02:08<00:28,  1.36it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [02:09<00:27,  1.35it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [02:10<00:26,  1.35it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [02:10<00:25,  1.35it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [02:11<00:25,  1.35it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [02:12<00:24,  1.35it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [02:13<00:23,  1.35it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [02:13<00:22,  1.35it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [02:14<00:22,  1.35it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [02:15<00:21,  1.35it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [02:16<00:20,  1.35it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [02:16<00:19,  1.35it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [02:17<00:19,  1.35it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [02:18<00:18,  1.35it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [02:19<00:17,  1.35it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [02:19<00:17,  1.35it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [02:20<00:16,  1.35it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [02:21<00:15,  1.36it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [02:21<00:14,  1.36it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [02:22<00:13,  1.36it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [02:23<00:13,  1.36it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [02:24<00:12,  1.35it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [02:24<00:11,  1.35it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:25<00:11,  1.35it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:26<00:10,  1.35it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:27<00:09,  1.35it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:27<00:08,  1.35it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:28<00:08,  1.35it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:29<00:07,  1.35it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:30<00:06,  1.35it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:30<00:05,  1.35it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:31<00:05,  1.35it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:32<00:04,  1.35it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:33<00:03,  1.35it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:33<00:02,  1.35it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:34<00:02,  1.35it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:35<00:01,  1.35it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:36<00:00,  1.35it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:36<00:00,  1.41it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:36<00:00,  1.28it/s]




Loss: 0.9439




Precision: 0.7776, Recall: 0.7839, F1-Score: 0.7766




              precision    recall  f1-score   support

           0       0.76      0.67      0.71       797
           1       0.85      0.71      0.78       775
           2       0.88      0.87      0.87       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.88      0.68      0.77       882
           6       0.85      0.80      0.83       940
           7       0.48      0.60      0.53       473
           8       0.66      0.85      0.75       746
           9       0.59      0.73      0.66       689
          10       0.75      0.79      0.77       670
          11       0.61      0.80      0.69       312
          12       0.72      0.81      0.76       665
          13       0.84      0.86      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9131958871211384, 0.9131958871211384)




CCA coefficients mean non-concern: (0.9179786619596061, 0.9179786619596061)




Linear CKA concern: 0.9924484064334164




Linear CKA non-concern: 0.9859977665848112




Kernel CKA concern: 0.9903306181023317




Kernel CKA non-concern: 0.9862956683925856




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 1




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:35,  2.09it/s]

Evaluating:   1%|          | 2/200 [00:00<01:36,  2.06it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:35,  2.06it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:35,  2.05it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:35,  2.05it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:34,  2.04it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:34,  2.04it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:33,  2.05it/s]

Evaluating:   4%|?둜         | 9/200 [00:04<01:33,  2.04it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:33,  2.03it/s]

Evaluating:   6%|?둛         | 11/200 [00:05<01:33,  2.03it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:33,  2.02it/s]

Evaluating:   6%|?둚         | 13/200 [00:06<01:32,  2.01it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:32,  2.01it/s]

Evaluating:   8%|?둙         | 15/200 [00:07<01:32,  2.01it/s]

Evaluating:   8%|?둙         | 16/200 [00:07<01:31,  2.01it/s]

Evaluating:   8%|?둙         | 17/200 [00:08<01:31,  2.01it/s]

Evaluating:   9%|?둘         | 18/200 [00:08<01:30,  2.00it/s]

Evaluating:  10%|?둘         | 19/200 [00:09<01:30,  2.01it/s]

Evaluating:  10%|?둗         | 20/200 [00:09<01:29,  2.01it/s]

Evaluating:  10%|?둗         | 21/200 [00:10<01:29,  2.01it/s]

Evaluating:  11%|?둗         | 22/200 [00:10<01:28,  2.01it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:11<01:28,  2.01it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:11<01:27,  2.01it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:12<01:27,  2.00it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:12<01:26,  2.00it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:13<01:26,  2.00it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:13<01:25,  2.00it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:14<01:25,  2.00it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:14<01:24,  2.00it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:15<01:24,  2.00it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:15<01:23,  2.00it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:16<01:24,  1.98it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:16<01:24,  1.97it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:17<01:24,  1.96it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:17<01:24,  1.95it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:18<01:24,  1.94it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:19<01:25,  1.89it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:19<01:26,  1.86it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:20<01:27,  1.83it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:20<01:27,  1.82it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:21<01:27,  1.81it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:21<01:27,  1.80it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:22<01:28,  1.77it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:23<01:29,  1.73it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:23<01:30,  1.71it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:24<01:31,  1.68it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:24<01:31,  1.66it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:25<01:30,  1.66it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:26<01:30,  1.66it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:26<01:32,  1.62it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:27<01:33,  1.59it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:28<01:34,  1.55it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:28<01:36,  1.51it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:29<01:38,  1.48it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:30<01:38,  1.46it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:30<01:38,  1.45it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:31<01:38,  1.44it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:32<01:38,  1.43it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:32<01:37,  1.43it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:33<01:37,  1.43it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:34<01:36,  1.43it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:35<01:35,  1.43it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:35<01:35,  1.42it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:36<01:34,  1.42it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:37<01:34,  1.43it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:37<01:33,  1.42it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:38<01:33,  1.42it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:39<01:32,  1.42it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:40<01:31,  1.42it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:40<01:30,  1.42it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:41<01:29,  1.42it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:42<01:29,  1.42it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:42<01:28,  1.42it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:43<01:28,  1.42it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:44<01:27,  1.41it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:44<01:27,  1.41it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:45<01:26,  1.41it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:46<01:25,  1.41it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:47<01:25,  1.41it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:47<01:24,  1.41it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:48<01:23,  1.42it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:49<01:22,  1.42it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:49<01:21,  1.42it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:50<01:21,  1.42it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:51<01:20,  1.42it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:52<01:19,  1.42it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:52<01:18,  1.42it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:53<01:18,  1.42it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:54<01:17,  1.42it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:54<01:16,  1.42it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:55<01:16,  1.42it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:56<01:15,  1.42it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:57<01:16,  1.38it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:57<01:18,  1.34it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:58<01:19,  1.31it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:59<01:19,  1.29it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [01:00<01:19,  1.28it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [01:01<01:19,  1.27it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:01<01:18,  1.27it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:02<01:18,  1.26it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:03<01:17,  1.26it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:04<01:17,  1.26it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:04<01:16,  1.26it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:05<01:15,  1.26it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:06<01:14,  1.26it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:07<01:13,  1.26it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:08<01:13,  1.26it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:08<01:12,  1.26it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:09<01:11,  1.26it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:10<01:10,  1.26it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:11<01:09,  1.26it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:12<01:09,  1.26it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:12<01:08,  1.26it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:13<01:07,  1.26it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:14<01:06,  1.26it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:15<01:06,  1.26it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:16<01:05,  1.26it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:16<01:04,  1.26it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:17<01:03,  1.25it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:18<01:03,  1.25it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:19<01:02,  1.25it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:20<01:01,  1.26it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:20<01:00,  1.26it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:21<00:59,  1.26it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:22<00:58,  1.26it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:23<00:57,  1.26it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:24<00:57,  1.26it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:24<00:56,  1.26it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:25<00:55,  1.27it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:26<00:53,  1.28it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:27<00:52,  1.30it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:27<00:51,  1.31it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:28<00:49,  1.32it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:29<00:48,  1.33it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:30<00:48,  1.33it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:30<00:47,  1.33it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:31<00:45,  1.35it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:32<00:44,  1.37it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:33<00:43,  1.38it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:33<00:42,  1.39it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:34<00:41,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:35<00:40,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:35<00:40,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:36<00:39,  1.40it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:37<00:38,  1.40it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:38<00:37,  1.40it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:38<00:37,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:39<00:36,  1.39it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:40<00:35,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:40<00:35,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:41<00:34,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:42<00:33,  1.39it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:43<00:33,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:43<00:32,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:44<00:31,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:45<00:30,  1.39it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:45<00:30,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:46<00:29,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:47<00:28,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:48<00:28,  1.39it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:48<00:27,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:49<00:26,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:50<00:25,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:50<00:25,  1.39it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:51<00:24,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:52<00:23,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:53<00:23,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:53<00:22,  1.39it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:54<00:21,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:55<00:20,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:56<00:20,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:56<00:19,  1.39it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:57<00:18,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:58<00:17,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:58<00:17,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:59<00:16,  1.39it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [02:00<00:15,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [02:01<00:15,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [02:01<00:14,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [02:02<00:13,  1.39it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [02:03<00:12,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [02:03<00:12,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [02:04<00:11,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:05<00:10,  1.39it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:06<00:10,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:06<00:09,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:07<00:08,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:08<00:07,  1.39it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:08<00:07,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:09<00:06,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:10<00:05,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:11<00:05,  1.39it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:11<00:04,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:12<00:03,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:13<00:02,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:14<00:02,  1.39it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:14<00:01,  1.39it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:15<00:00,  1.39it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:16<00:00,  1.45it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:16<00:00,  1.47it/s]




Loss: 0.9419




Precision: 0.7774, Recall: 0.7835, F1-Score: 0.7764




              precision    recall  f1-score   support

           0       0.76      0.66      0.71       797
           1       0.84      0.71      0.77       775
           2       0.88      0.87      0.88       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.68      0.77       882
           6       0.85      0.80      0.82       940
           7       0.48      0.59      0.53       473
           8       0.66      0.86      0.75       746
           9       0.60      0.73      0.66       689
          10       0.75      0.78      0.77       670
          11       0.62      0.80      0.70       312
          12       0.72      0.80      0.76       665
          13       0.84      0.86      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9162529864720084, 0.9162529864720084)




CCA coefficients mean non-concern: (0.9193388733438361, 0.9193388733438361)




Linear CKA concern: 0.9896019889959103




Linear CKA non-concern: 0.9857991215251943




Kernel CKA concern: 0.9881429145811608




Kernel CKA non-concern: 0.9862807095739208




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 2




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:35,  2.08it/s]

Evaluating:   1%|          | 2/200 [00:00<01:36,  2.05it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:36,  2.05it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:36,  2.04it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:35,  2.04it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:35,  2.04it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:34,  2.04it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:34,  2.03it/s]

Evaluating:   4%|?둜         | 9/200 [00:04<01:34,  2.03it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:34,  2.02it/s]

Evaluating:   6%|?둛         | 11/200 [00:05<01:34,  2.01it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:33,  2.01it/s]

Evaluating:   6%|?둚         | 13/200 [00:06<01:33,  2.00it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:32,  2.00it/s]

Evaluating:   8%|?둙         | 15/200 [00:07<01:32,  2.00it/s]

Evaluating:   8%|?둙         | 16/200 [00:07<01:32,  2.00it/s]

Evaluating:   8%|?둙         | 17/200 [00:08<01:31,  2.00it/s]

Evaluating:   9%|?둘         | 18/200 [00:08<01:31,  2.00it/s]

Evaluating:  10%|?둘         | 19/200 [00:09<01:30,  2.00it/s]

Evaluating:  10%|?둗         | 20/200 [00:09<01:30,  2.00it/s]

Evaluating:  10%|?둗         | 21/200 [00:10<01:29,  2.00it/s]

Evaluating:  11%|?둗         | 22/200 [00:10<01:29,  2.00it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:11<01:28,  2.00it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:11<01:28,  2.00it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:12<01:27,  2.00it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:12<01:27,  1.99it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:13<01:26,  1.99it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:13<01:26,  1.99it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:14<01:25,  1.99it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:14<01:25,  1.99it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:15<01:25,  1.98it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:15<01:25,  1.97it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:16<01:25,  1.94it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:17<01:26,  1.92it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:17<01:27,  1.88it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:18<01:28,  1.85it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:18<01:29,  1.82it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:19<01:29,  1.80it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:19<01:29,  1.79it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:20<01:29,  1.78it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:21<01:30,  1.76it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:21<01:32,  1.71it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:22<01:33,  1.68it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:22<01:35,  1.63it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:23<01:37,  1.58it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:24<01:39,  1.55it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:24<01:40,  1.53it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:25<01:40,  1.51it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:26<01:40,  1.50it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:26<01:39,  1.50it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:27<01:39,  1.50it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:28<01:38,  1.50it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:29<01:42,  1.44it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:29<01:44,  1.40it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:30<01:45,  1.37it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:31<01:46,  1.35it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:32<01:47,  1.34it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:32<01:46,  1.33it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:33<01:46,  1.32it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:34<01:46,  1.32it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:35<01:45,  1.32it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:35<01:45,  1.31it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:36<01:44,  1.31it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:37<01:44,  1.31it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:38<01:43,  1.31it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:38<01:42,  1.31it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:39<01:41,  1.31it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:40<01:40,  1.31it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:41<01:40,  1.31it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:42<01:39,  1.31it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:42<01:38,  1.31it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:43<01:37,  1.31it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:44<01:36,  1.31it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:45<01:36,  1.31it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:45<01:35,  1.31it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:46<01:34,  1.31it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:47<01:33,  1.31it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:48<01:32,  1.31it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:48<01:32,  1.31it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:49<01:31,  1.31it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:50<01:30,  1.31it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:51<01:30,  1.31it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:51<01:29,  1.31it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:52<01:28,  1.31it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:53<01:27,  1.31it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:54<01:27,  1.31it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:55<01:26,  1.31it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:55<01:25,  1.31it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:56<01:24,  1.31it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:57<01:23,  1.31it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:58<01:23,  1.31it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:58<01:22,  1.31it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:59<01:21,  1.31it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [01:00<01:20,  1.32it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [01:01<01:19,  1.32it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [01:01<01:18,  1.32it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [01:02<01:18,  1.32it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [01:03<01:17,  1.32it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [01:04<01:16,  1.32it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:04<01:15,  1.32it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:05<01:15,  1.31it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:06<01:14,  1.32it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:07<01:13,  1.32it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:07<01:13,  1.31it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:08<01:12,  1.31it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:09<01:11,  1.32it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:10<01:10,  1.31it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:10<01:10,  1.31it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:11<01:09,  1.31it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:12<01:08,  1.31it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:13<01:07,  1.31it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:14<01:06,  1.32it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:14<01:06,  1.32it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:15<01:05,  1.32it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:16<01:04,  1.32it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:17<01:03,  1.32it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:17<01:03,  1.32it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:18<01:02,  1.32it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:19<01:01,  1.32it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:20<01:00,  1.32it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:20<01:00,  1.31it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:21<00:59,  1.31it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:22<00:58,  1.31it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:23<00:57,  1.32it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:23<00:57,  1.32it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:24<00:56,  1.31it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:25<00:55,  1.31it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:26<00:54,  1.31it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:26<00:54,  1.31it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:27<00:53,  1.31it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:28<00:52,  1.31it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:29<00:51,  1.31it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:30<00:51,  1.31it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:30<00:50,  1.31it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:31<00:49,  1.31it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:32<00:49,  1.31it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:33<00:48,  1.31it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:33<00:47,  1.31it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:34<00:46,  1.31it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:35<00:45,  1.31it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:36<00:44,  1.31it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:36<00:44,  1.31it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:37<00:43,  1.32it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:38<00:42,  1.31it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:39<00:41,  1.31it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:39<00:41,  1.31it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:40<00:40,  1.31it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:41<00:39,  1.31it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:42<00:38,  1.31it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:42<00:38,  1.31it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:43<00:37,  1.32it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:44<00:36,  1.31it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:45<00:35,  1.32it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:46<00:34,  1.32it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:46<00:34,  1.32it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:47<00:33,  1.32it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:48<00:32,  1.32it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:49<00:31,  1.32it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:49<00:31,  1.32it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:50<00:30,  1.32it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:51<00:29,  1.32it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:52<00:28,  1.32it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:52<00:28,  1.32it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:53<00:27,  1.32it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:54<00:26,  1.32it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:55<00:25,  1.32it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:55<00:25,  1.31it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:56<00:24,  1.31it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:57<00:23,  1.31it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:58<00:22,  1.31it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:58<00:22,  1.31it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:59<00:21,  1.31it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [02:00<00:20,  1.30it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [02:01<00:19,  1.31it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [02:02<00:19,  1.31it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [02:02<00:18,  1.31it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [02:03<00:17,  1.31it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [02:04<00:16,  1.31it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [02:05<00:16,  1.31it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [02:05<00:15,  1.31it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [02:06<00:14,  1.31it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [02:07<00:13,  1.31it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [02:08<00:12,  1.31it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [02:08<00:12,  1.31it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:09<00:11,  1.31it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:10<00:10,  1.31it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:11<00:09,  1.30it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:11<00:09,  1.30it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:12<00:08,  1.30it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:13<00:07,  1.31it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:14<00:06,  1.30it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:15<00:06,  1.30it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:15<00:05,  1.31it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:16<00:04,  1.31it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:17<00:03,  1.31it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:18<00:03,  1.31it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:18<00:02,  1.31it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:19<00:01,  1.31it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:20<00:00,  1.31it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:21<00:00,  1.36it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:21<00:00,  1.42it/s]




Loss: 0.9437




Precision: 0.7778, Recall: 0.7838, F1-Score: 0.7766




              precision    recall  f1-score   support

           0       0.75      0.66      0.71       797
           1       0.85      0.71      0.78       775
           2       0.88      0.87      0.87       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.88      0.68      0.77       882
           6       0.86      0.80      0.82       940
           7       0.47      0.60      0.53       473
           8       0.67      0.85      0.75       746
           9       0.60      0.73      0.66       689
          10       0.76      0.78      0.77       670
          11       0.62      0.80      0.70       312
          12       0.72      0.80      0.76       665
          13       0.84      0.86      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9144215939798642, 0.9144215939798642)




CCA coefficients mean non-concern: (0.9180734722085134, 0.9180734722085134)




Linear CKA concern: 0.9928452125278048




Linear CKA non-concern: 0.9855993177990924




Kernel CKA concern: 0.9905648159105322




Kernel CKA non-concern: 0.9860908577120168




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 3




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:35,  2.08it/s]

Evaluating:   1%|          | 2/200 [00:00<01:36,  2.05it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:36,  2.05it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:35,  2.04it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:35,  2.05it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:35,  2.04it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:34,  2.04it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:34,  2.03it/s]

Evaluating:   4%|?둜         | 9/200 [00:04<01:34,  2.03it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:33,  2.02it/s]

Evaluating:   6%|?둛         | 11/200 [00:05<01:33,  2.02it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:33,  2.01it/s]

Evaluating:   6%|?둚         | 13/200 [00:06<01:33,  2.00it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:32,  2.00it/s]

Evaluating:   8%|?둙         | 15/200 [00:07<01:32,  2.00it/s]

Evaluating:   8%|?둙         | 16/200 [00:07<01:32,  2.00it/s]

Evaluating:   8%|?둙         | 17/200 [00:08<01:31,  1.99it/s]

Evaluating:   9%|?둘         | 18/200 [00:08<01:31,  1.99it/s]

Evaluating:  10%|?둘         | 19/200 [00:09<01:30,  2.00it/s]

Evaluating:  10%|?둗         | 20/200 [00:09<01:30,  2.00it/s]

Evaluating:  10%|?둗         | 21/200 [00:10<01:29,  2.00it/s]

Evaluating:  11%|?둗         | 22/200 [00:10<01:29,  2.00it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:11<01:28,  2.00it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:11<01:28,  2.00it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:12<01:27,  2.00it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:12<01:27,  2.00it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:13<01:26,  2.00it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:13<01:26,  1.99it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:14<01:26,  1.99it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:14<01:26,  1.97it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:15<01:26,  1.96it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:16<01:27,  1.93it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:16<01:27,  1.91it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:17<01:27,  1.89it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:17<01:29,  1.85it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:18<01:29,  1.83it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:18<01:30,  1.81it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:19<01:30,  1.79it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:19<01:30,  1.78it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:20<01:31,  1.76it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:21<01:33,  1.71it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:21<01:34,  1.67it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:22<01:35,  1.65it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:23<01:35,  1.64it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:23<01:38,  1.57it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:24<01:41,  1.51it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:25<01:43,  1.48it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:25<01:44,  1.45it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:26<01:44,  1.44it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:27<01:44,  1.43it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:27<01:44,  1.42it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:28<01:44,  1.42it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:29<01:44,  1.41it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:30<01:43,  1.41it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:30<01:43,  1.41it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:31<01:42,  1.41it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:32<01:42,  1.40it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:32<01:41,  1.40it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:33<01:40,  1.40it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:34<01:40,  1.40it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:35<01:39,  1.40it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:35<01:38,  1.40it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:36<01:37,  1.40it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:37<01:37,  1.40it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:37<01:36,  1.40it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:38<01:38,  1.36it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:39<01:41,  1.31it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:40<01:43,  1.27it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:41<01:45,  1.25it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:42<01:45,  1.23it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:42<01:46,  1.22it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:43<01:48,  1.18it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:44<01:46,  1.20it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:45<01:47,  1.17it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:46<01:45,  1.19it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:47<01:45,  1.17it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:48<01:43,  1.19it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:48<01:41,  1.21it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:49<01:39,  1.22it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:50<01:38,  1.22it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:51<01:37,  1.23it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:52<01:35,  1.23it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:52<01:34,  1.23it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:53<01:34,  1.23it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:54<01:33,  1.23it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:55<01:32,  1.24it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:56<01:31,  1.24it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:56<01:30,  1.24it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:57<01:29,  1.24it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:58<01:28,  1.24it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:59<01:27,  1.24it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [01:00<01:26,  1.25it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [01:00<01:24,  1.27it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [01:01<01:22,  1.29it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [01:02<01:21,  1.29it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [01:03<01:19,  1.30it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [01:03<01:18,  1.31it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [01:04<01:17,  1.31it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [01:05<01:16,  1.32it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:06<01:15,  1.32it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:06<01:15,  1.32it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:07<01:14,  1.32it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:08<01:13,  1.32it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:09<01:12,  1.32it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:10<01:12,  1.32it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:10<01:11,  1.32it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:11<01:10,  1.32it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:12<01:09,  1.32it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:13<01:08,  1.32it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:13<01:08,  1.32it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:14<01:07,  1.32it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:15<01:06,  1.32it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:16<01:05,  1.33it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:16<01:04,  1.33it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:17<01:04,  1.33it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:18<01:03,  1.33it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:19<01:02,  1.33it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:19<01:01,  1.33it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:20<01:01,  1.32it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:21<01:00,  1.32it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:22<00:59,  1.33it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:22<00:58,  1.33it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:23<00:57,  1.34it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:24<00:56,  1.33it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:25<00:55,  1.35it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:25<00:54,  1.36it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:26<00:53,  1.37it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:27<00:52,  1.37it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:27<00:51,  1.38it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:28<00:50,  1.38it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:29<00:49,  1.38it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:30<00:49,  1.39it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:30<00:48,  1.39it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:31<00:47,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:32<00:46,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:32<00:46,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:33<00:45,  1.39it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:34<00:44,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:35<00:43,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:35<00:43,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:36<00:42,  1.39it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:37<00:41,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:37<00:41,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:38<00:40,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:39<00:39,  1.39it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:40<00:38,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:40<00:38,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:41<00:37,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:42<00:36,  1.39it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:43<00:35,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:43<00:35,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:44<00:34,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:45<00:33,  1.39it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:45<00:33,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:46<00:32,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:47<00:31,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:48<00:30,  1.39it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:48<00:30,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:49<00:29,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:50<00:28,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:50<00:28,  1.39it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:51<00:27,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:52<00:26,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:53<00:25,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:53<00:25,  1.39it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:54<00:24,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:55<00:23,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:55<00:23,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:56<00:22,  1.39it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:57<00:21,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:58<00:20,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:58<00:20,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:59<00:19,  1.39it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [02:00<00:18,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [02:01<00:17,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [02:01<00:17,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [02:02<00:16,  1.39it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [02:03<00:15,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [02:03<00:15,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [02:04<00:14,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [02:05<00:13,  1.39it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [02:06<00:12,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [02:06<00:12,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [02:07<00:11,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:08<00:10,  1.39it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:08<00:10,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:09<00:09,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:10<00:08,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:11<00:07,  1.39it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:11<00:07,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:12<00:06,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:13<00:05,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:13<00:05,  1.39it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:14<00:04,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:15<00:03,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:16<00:02,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:16<00:02,  1.39it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:17<00:01,  1.39it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:18<00:00,  1.39it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:18<00:00,  1.45it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:18<00:00,  1.44it/s]




Loss: 0.9433




Precision: 0.7780, Recall: 0.7842, F1-Score: 0.7769




              precision    recall  f1-score   support

           0       0.76      0.66      0.71       797
           1       0.85      0.71      0.77       775
           2       0.88      0.87      0.87       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.88      0.68      0.77       882
           6       0.86      0.80      0.83       940
           7       0.48      0.60      0.53       473
           8       0.67      0.85      0.75       746
           9       0.60      0.73      0.66       689
          10       0.76      0.79      0.77       670
          11       0.62      0.80      0.70       312
          12       0.72      0.81      0.76       665
          13       0.84      0.86      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9163156477307943, 0.9163156477307943)




CCA coefficients mean non-concern: (0.9178847923917189, 0.9178847923917189)




Linear CKA concern: 0.9906536070591734




Linear CKA non-concern: 0.985558276890214




Kernel CKA concern: 0.9895862475304144




Kernel CKA non-concern: 0.9862795238992658




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 4




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:23,  2.37it/s]

Evaluating:   1%|          | 2/200 [00:00<01:24,  2.34it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:25,  2.32it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:24,  2.31it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:24,  2.30it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:24,  2.31it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:23,  2.30it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:23,  2.30it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:22,  2.30it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:22,  2.30it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:22,  2.30it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:21,  2.30it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:21,  2.30it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:21,  2.29it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:20,  2.29it/s]

Evaluating:   8%|?둙         | 16/200 [00:06<01:20,  2.29it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:19,  2.29it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:19,  2.29it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:18,  2.29it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:18,  2.29it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:18,  2.29it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:17,  2.29it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:10<01:17,  2.29it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:16,  2.29it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:10<01:16,  2.29it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:16,  2.29it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:15,  2.28it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:15,  2.28it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:15,  2.28it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:15,  2.25it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:15,  2.23it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:14<01:16,  2.21it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:16,  2.19it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:14<01:16,  2.17it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:17,  2.12it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:15<01:18,  2.08it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:16<01:20,  2.03it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:17<01:21,  1.98it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:17<01:22,  1.94it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:18<01:23,  1.91it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:18<01:25,  1.86it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:19<01:27,  1.80it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:19<01:30,  1.74it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:20<01:31,  1.71it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:21<01:32,  1.68it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:21<01:32,  1.66it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:22<01:33,  1.64it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:22<01:33,  1.62it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:23<01:35,  1.58it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:24<01:36,  1.56it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:24<01:36,  1.54it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:25<01:36,  1.53it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:26<01:36,  1.52it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:26<01:36,  1.52it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:27<01:35,  1.51it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:28<01:35,  1.51it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:28<01:34,  1.51it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:29<01:35,  1.49it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:30<01:35,  1.47it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:31<01:36,  1.46it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:31<01:35,  1.45it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:32<01:35,  1.44it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:33<01:35,  1.44it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:33<01:34,  1.43it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:34<01:34,  1.43it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:35<01:33,  1.43it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:35<01:33,  1.43it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:36<01:32,  1.43it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:37<01:31,  1.43it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:38<01:31,  1.43it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:38<01:30,  1.42it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:39<01:34,  1.35it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:40<01:37,  1.31it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:41<01:38,  1.27it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:42<01:39,  1.26it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:42<01:39,  1.24it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:43<01:39,  1.23it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:44<01:39,  1.23it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:45<01:38,  1.22it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:46<01:38,  1.22it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:47<01:37,  1.22it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:47<01:36,  1.22it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:48<01:36,  1.22it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:49<01:33,  1.24it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:50<01:31,  1.25it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:50<01:30,  1.27it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:51<01:28,  1.28it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:52<01:27,  1.28it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:53<01:26,  1.28it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:54<01:25,  1.28it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:54<01:24,  1.29it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:55<01:23,  1.29it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:56<01:21,  1.31it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:57<01:19,  1.33it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:57<01:17,  1.35it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:58<01:16,  1.36it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:59<01:15,  1.37it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:59<01:14,  1.37it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [01:00<01:13,  1.37it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:01<01:12,  1.38it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:02<01:11,  1.38it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:02<01:11,  1.37it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:03<01:10,  1.37it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:04<01:10,  1.37it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:05<01:09,  1.37it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:05<01:08,  1.37it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:06<01:07,  1.38it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:07<01:06,  1.38it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:07<01:06,  1.37it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:08<01:05,  1.38it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:09<01:04,  1.38it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:10<01:03,  1.38it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:10<01:03,  1.38it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:11<01:02,  1.38it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:12<01:01,  1.38it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:13<01:00,  1.38it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:13<01:00,  1.38it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:14<00:59,  1.38it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:15<00:58,  1.38it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:15<00:58,  1.37it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:16<00:57,  1.38it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:17<00:56,  1.38it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:18<00:56,  1.37it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:18<00:55,  1.37it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:19<00:54,  1.37it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:20<00:53,  1.38it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:21<00:53,  1.37it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:21<00:52,  1.37it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:22<00:51,  1.37it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:23<00:51,  1.37it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:23<00:50,  1.37it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:24<00:49,  1.37it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:25<00:48,  1.37it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:26<00:48,  1.36it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:26<00:47,  1.37it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:27<00:46,  1.37it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:28<00:45,  1.37it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:29<00:45,  1.37it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:29<00:44,  1.37it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:30<00:43,  1.37it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:31<00:43,  1.37it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:32<00:42,  1.38it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:32<00:41,  1.38it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:33<00:40,  1.37it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:34<00:39,  1.38it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:34<00:39,  1.37it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:35<00:38,  1.37it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:36<00:37,  1.37it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:37<00:37,  1.37it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:37<00:36,  1.37it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:38<00:35,  1.37it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:39<00:35,  1.36it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:40<00:34,  1.36it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:40<00:33,  1.37it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:41<00:32,  1.37it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:42<00:32,  1.37it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:42<00:31,  1.37it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:43<00:30,  1.37it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:44<00:29,  1.37it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:45<00:29,  1.38it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:45<00:28,  1.37it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:46<00:27,  1.37it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:47<00:26,  1.37it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:48<00:26,  1.37it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:48<00:25,  1.37it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:49<00:24,  1.37it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:50<00:24,  1.37it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:50<00:23,  1.37it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:51<00:22,  1.38it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:52<00:21,  1.37it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:53<00:21,  1.38it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:53<00:20,  1.37it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:54<00:19,  1.37it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:55<00:18,  1.38it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:56<00:18,  1.37it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:56<00:17,  1.37it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:57<00:16,  1.37it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:58<00:16,  1.37it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:58<00:15,  1.37it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:59<00:14,  1.37it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [02:00<00:13,  1.37it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [02:01<00:13,  1.38it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [02:01<00:12,  1.38it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [02:02<00:11,  1.38it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:03<00:10,  1.38it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:04<00:10,  1.38it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:04<00:09,  1.38it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:05<00:08,  1.37it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:06<00:07,  1.38it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:06<00:07,  1.37it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:07<00:06,  1.37it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:08<00:05,  1.37it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:09<00:05,  1.37it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:09<00:04,  1.37it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:10<00:03,  1.38it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:11<00:02,  1.38it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:12<00:02,  1.37it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:12<00:01,  1.38it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:13<00:00,  1.38it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:14<00:00,  1.43it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:14<00:00,  1.49it/s]




Loss: 0.9438




Precision: 0.7774, Recall: 0.7839, F1-Score: 0.7765




              precision    recall  f1-score   support

           0       0.75      0.66      0.70       797
           1       0.85      0.71      0.77       775
           2       0.88      0.87      0.87       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.68      0.77       882
           6       0.85      0.80      0.83       940
           7       0.48      0.60      0.53       473
           8       0.67      0.85      0.75       746
           9       0.60      0.73      0.66       689
          10       0.75      0.79      0.77       670
          11       0.62      0.80      0.70       312
          12       0.72      0.81      0.76       665
          13       0.84      0.86      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9223493562164323, 0.9223493562164323)




CCA coefficients mean non-concern: (0.9182266836901366, 0.9182266836901366)




Linear CKA concern: 0.9934767080051486




Linear CKA non-concern: 0.9857459276114039




Kernel CKA concern: 0.9920484827836986




Kernel CKA non-concern: 0.9864297753268148




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 5




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:23,  2.38it/s]

Evaluating:   1%|          | 2/200 [00:00<01:24,  2.34it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:24,  2.32it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:24,  2.32it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:24,  2.31it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:24,  2.31it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:23,  2.30it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:23,  2.30it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:22,  2.30it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:22,  2.30it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:22,  2.30it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:21,  2.30it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:21,  2.30it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:20,  2.30it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:20,  2.30it/s]

Evaluating:   8%|?둙         | 16/200 [00:06<01:20,  2.30it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:19,  2.29it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:19,  2.29it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:18,  2.29it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:18,  2.29it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:18,  2.29it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:17,  2.29it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:09<01:17,  2.29it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:17,  2.28it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:10<01:16,  2.28it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:16,  2.28it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:15,  2.28it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:15,  2.28it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:14,  2.28it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:14,  2.28it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:14,  2.28it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:13<01:14,  2.27it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:14,  2.23it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:14<01:15,  2.21it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:15,  2.19it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:15<01:15,  2.16it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:16<01:16,  2.14it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:16<01:18,  2.06it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:17<01:20,  2.01it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:17<01:21,  1.97it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:18<01:21,  1.95it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:18<01:23,  1.90it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:19<01:26,  1.82it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:20<01:28,  1.76it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:20<01:30,  1.71it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:21<01:31,  1.68it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:22<01:31,  1.66it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:22<01:32,  1.65it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:23<01:32,  1.64it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:23<01:32,  1.61it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:24<01:34,  1.58it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:25<01:34,  1.56it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:25<01:35,  1.55it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:26<01:34,  1.54it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:27<01:34,  1.53it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:27<01:34,  1.52it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:28<01:33,  1.53it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:29<01:32,  1.53it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:29<01:34,  1.49it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:30<01:35,  1.46it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:31<01:36,  1.44it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:32<01:36,  1.43it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:32<01:36,  1.42it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:33<01:36,  1.41it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:34<01:36,  1.40it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:34<01:35,  1.40it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:35<01:35,  1.40it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:36<01:34,  1.39it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:37<01:34,  1.39it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:37<01:33,  1.39it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:38<01:32,  1.39it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:39<01:32,  1.39it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:39<01:31,  1.39it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:40<01:30,  1.39it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:41<01:29,  1.39it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:42<01:29,  1.39it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:42<01:28,  1.39it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:43<01:27,  1.39it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:44<01:27,  1.39it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:45<01:26,  1.39it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:45<01:25,  1.39it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:46<01:24,  1.39it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:47<01:24,  1.39it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:47<01:23,  1.39it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:48<01:22,  1.39it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:49<01:22,  1.39it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:50<01:21,  1.39it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:50<01:20,  1.39it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:51<01:19,  1.39it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:52<01:19,  1.39it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:52<01:18,  1.39it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:53<01:17,  1.39it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:54<01:17,  1.39it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:55<01:16,  1.39it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:55<01:15,  1.39it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:56<01:14,  1.39it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:57<01:14,  1.39it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:57<01:13,  1.39it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [00:58<01:12,  1.39it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [00:59<01:12,  1.39it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:00<01:11,  1.39it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:00<01:10,  1.39it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:01<01:09,  1.39it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:02<01:09,  1.39it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:03<01:08,  1.39it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:03<01:07,  1.39it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:04<01:06,  1.39it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:05<01:06,  1.39it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:05<01:05,  1.39it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:06<01:04,  1.39it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:07<01:04,  1.39it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:08<01:03,  1.39it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:08<01:02,  1.39it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:09<01:01,  1.39it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:10<01:01,  1.39it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:10<01:00,  1.39it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:11<00:59,  1.39it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:12<00:59,  1.39it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:13<00:58,  1.39it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:13<00:57,  1.39it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:14<00:56,  1.39it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:15<00:56,  1.39it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:15<00:55,  1.39it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:16<00:54,  1.39it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:17<00:53,  1.39it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:18<00:53,  1.39it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:18<00:52,  1.39it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:19<00:51,  1.39it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:20<00:51,  1.39it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:20<00:50,  1.39it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:21<00:49,  1.39it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:22<00:48,  1.39it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:23<00:48,  1.39it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:23<00:47,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:24<00:46,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:25<00:46,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:26<00:45,  1.39it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:26<00:44,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:27<00:43,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:28<00:43,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:28<00:42,  1.39it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:29<00:41,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:30<00:41,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:31<00:40,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:31<00:39,  1.39it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:32<00:38,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:33<00:38,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:33<00:37,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:34<00:36,  1.39it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:35<00:35,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:36<00:35,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:36<00:34,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:37<00:33,  1.39it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:38<00:33,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:38<00:32,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:39<00:31,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:40<00:30,  1.39it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:41<00:30,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:41<00:29,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:42<00:28,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:43<00:28,  1.39it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:44<00:27,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:44<00:26,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:45<00:25,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:46<00:25,  1.39it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:46<00:24,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:47<00:23,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:48<00:23,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:49<00:22,  1.39it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:49<00:21,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:50<00:20,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:51<00:20,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:51<00:19,  1.39it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:52<00:18,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:53<00:17,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:54<00:17,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:54<00:16,  1.39it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:55<00:15,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:56<00:15,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:56<00:14,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [01:57<00:13,  1.39it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [01:58<00:12,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [01:59<00:12,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [01:59<00:11,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:00<00:10,  1.39it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:01<00:10,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:02<00:09,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:02<00:08,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:03<00:07,  1.39it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:04<00:07,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:04<00:06,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:05<00:05,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:06<00:05,  1.39it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:07<00:04,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:07<00:03,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:08<00:02,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:09<00:02,  1.39it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:09<00:01,  1.39it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:10<00:00,  1.39it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:11<00:00,  1.45it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:11<00:00,  1.52it/s]




Loss: 0.9424




Precision: 0.7775, Recall: 0.7835, F1-Score: 0.7765




              precision    recall  f1-score   support

           0       0.76      0.66      0.71       797
           1       0.84      0.71      0.77       775
           2       0.88      0.87      0.87       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.88      0.69      0.77       882
           6       0.85      0.80      0.83       940
           7       0.48      0.60      0.53       473
           8       0.66      0.85      0.75       746
           9       0.60      0.73      0.66       689
          10       0.75      0.79      0.77       670
          11       0.63      0.80      0.70       312
          12       0.72      0.81      0.76       665
          13       0.83      0.86      0.85       314
          14       0.86      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9153142056981338, 0.9153142056981338)




CCA coefficients mean non-concern: (0.9184638035785471, 0.9184638035785471)




Linear CKA concern: 0.9907629107109578




Linear CKA non-concern: 0.9855504997045141




Kernel CKA concern: 0.9886280650467378




Kernel CKA non-concern: 0.9863627063654354




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 6




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:23,  2.37it/s]

Evaluating:   1%|          | 2/200 [00:00<01:24,  2.33it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:25,  2.32it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:24,  2.31it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:24,  2.30it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:24,  2.30it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:24,  2.30it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:23,  2.29it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:23,  2.29it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:22,  2.29it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:22,  2.29it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:22,  2.29it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:21,  2.29it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:21,  2.29it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:20,  2.29it/s]

Evaluating:   8%|?둙         | 16/200 [00:06<01:20,  2.29it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:20,  2.29it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:19,  2.28it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:19,  2.29it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:18,  2.28it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:18,  2.28it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:18,  2.28it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:10<01:17,  2.28it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:17,  2.28it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:10<01:16,  2.28it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:16,  2.28it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:15,  2.28it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:15,  2.28it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:15,  2.27it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:14,  2.27it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:14,  2.27it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:14<01:14,  2.25it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:14,  2.23it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:14<01:15,  2.21it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:15,  2.18it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:15<01:16,  2.14it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:16<01:17,  2.10it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:16<01:19,  2.04it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:17<01:22,  1.94it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:18<01:26,  1.85it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:18<01:28,  1.80it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:19<01:29,  1.76it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:19<01:30,  1.73it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:20<01:31,  1.71it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:21<01:31,  1.70it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:21<01:31,  1.69it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:22<01:31,  1.68it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:22<01:33,  1.62it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:23<01:35,  1.59it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:24<01:36,  1.56it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:24<01:36,  1.54it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:25<01:36,  1.53it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:26<01:36,  1.53it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:26<01:36,  1.52it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:27<01:35,  1.51it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:28<01:35,  1.51it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:28<01:34,  1.51it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:29<01:35,  1.49it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:30<01:36,  1.46it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:31<01:37,  1.44it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:31<01:37,  1.42it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:32<01:37,  1.41it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:33<01:37,  1.41it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:33<01:37,  1.40it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:34<01:36,  1.40it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:35<01:36,  1.39it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:36<01:35,  1.39it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:36<01:34,  1.39it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:37<01:34,  1.39it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:38<01:33,  1.39it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:38<01:32,  1.39it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:39<01:32,  1.39it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:40<01:31,  1.39it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:41<01:30,  1.39it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:41<01:29,  1.39it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:42<01:29,  1.39it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:43<01:28,  1.39it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:44<01:27,  1.39it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:44<01:27,  1.39it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:45<01:26,  1.39it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:46<01:25,  1.39it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:46<01:24,  1.39it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:47<01:24,  1.39it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:48<01:23,  1.39it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:49<01:22,  1.39it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:49<01:22,  1.39it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:50<01:21,  1.39it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:51<01:20,  1.39it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:51<01:19,  1.39it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:52<01:19,  1.39it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:53<01:18,  1.39it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:54<01:17,  1.39it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:54<01:17,  1.39it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:55<01:16,  1.39it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:56<01:15,  1.39it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:56<01:14,  1.39it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:57<01:14,  1.39it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:58<01:13,  1.39it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [00:59<01:12,  1.39it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [00:59<01:11,  1.39it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:00<01:12,  1.36it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:01<01:13,  1.34it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:02<01:13,  1.32it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:02<01:13,  1.31it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:03<01:12,  1.30it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:04<01:12,  1.30it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:05<01:11,  1.29it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:06<01:11,  1.29it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:06<01:10,  1.29it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:07<01:09,  1.29it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:08<01:09,  1.29it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:09<01:08,  1.29it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:09<01:07,  1.28it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:10<01:07,  1.28it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:11<01:06,  1.28it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:12<01:05,  1.29it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:13<01:04,  1.29it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:13<01:03,  1.29it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:14<01:02,  1.29it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:15<01:02,  1.29it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:16<01:01,  1.29it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:16<01:00,  1.29it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:17<00:59,  1.29it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:18<00:58,  1.29it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:19<00:58,  1.29it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:20<00:57,  1.29it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:20<00:56,  1.29it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:21<00:55,  1.29it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:22<00:55,  1.29it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:23<00:54,  1.29it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:23<00:53,  1.29it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:24<00:52,  1.29it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:25<00:51,  1.29it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:26<00:50,  1.30it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:26<00:50,  1.30it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:27<00:49,  1.30it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:28<00:47,  1.32it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:29<00:46,  1.34it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:29<00:45,  1.35it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:30<00:44,  1.36it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:31<00:43,  1.37it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:32<00:42,  1.37it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:32<00:41,  1.37it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:33<00:40,  1.37it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:34<00:40,  1.37it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:35<00:39,  1.37it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:35<00:38,  1.37it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:36<00:37,  1.38it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:37<00:36,  1.38it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:37<00:36,  1.38it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:38<00:35,  1.38it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:39<00:34,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:40<00:34,  1.38it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:40<00:33,  1.38it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:41<00:32,  1.37it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:42<00:31,  1.38it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:42<00:31,  1.38it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:43<00:30,  1.38it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:44<00:29,  1.38it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:45<00:28,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:45<00:28,  1.39it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:46<00:27,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:47<00:26,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:48<00:25,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:48<00:25,  1.39it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:49<00:24,  1.38it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:50<00:23,  1.38it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:50<00:23,  1.37it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:51<00:22,  1.37it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:52<00:21,  1.37it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:53<00:21,  1.37it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:53<00:20,  1.37it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:54<00:19,  1.37it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:55<00:19,  1.36it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:56<00:18,  1.37it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:56<00:17,  1.36it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:57<00:16,  1.36it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:58<00:16,  1.36it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:59<00:15,  1.36it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:59<00:14,  1.36it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [02:00<00:14,  1.35it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [02:01<00:13,  1.35it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [02:01<00:12,  1.35it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [02:02<00:11,  1.36it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:03<00:11,  1.36it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:04<00:10,  1.36it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:04<00:09,  1.36it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:05<00:08,  1.36it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:06<00:08,  1.36it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:07<00:07,  1.36it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:07<00:06,  1.36it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:08<00:05,  1.36it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:09<00:05,  1.36it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:10<00:04,  1.36it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:10<00:03,  1.36it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:11<00:02,  1.36it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:12<00:02,  1.36it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:12<00:01,  1.37it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:13<00:00,  1.37it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:14<00:00,  1.42it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:14<00:00,  1.49it/s]




Loss: 0.9438




Precision: 0.7779, Recall: 0.7836, F1-Score: 0.7767




              precision    recall  f1-score   support

           0       0.75      0.67      0.71       797
           1       0.85      0.71      0.77       775
           2       0.88      0.87      0.88       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.68      0.77       882
           6       0.84      0.80      0.82       940
           7       0.48      0.59      0.53       473
           8       0.66      0.85      0.74       746
           9       0.59      0.73      0.66       689
          10       0.76      0.78      0.77       670
          11       0.63      0.80      0.70       312
          12       0.72      0.80      0.76       665
          13       0.83      0.86      0.84       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.915108075696245, 0.915108075696245)




CCA coefficients mean non-concern: (0.9179831736715174, 0.9179831736715174)




Linear CKA concern: 0.9913633858295647




Linear CKA non-concern: 0.9856593634579166




Kernel CKA concern: 0.9898822699879019




Kernel CKA non-concern: 0.9864132309571088




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 7




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:23,  2.37it/s]

Evaluating:   1%|          | 2/200 [00:00<01:24,  2.33it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:25,  2.32it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:24,  2.31it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:24,  2.30it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:24,  2.30it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:24,  2.30it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:23,  2.29it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:23,  2.29it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:22,  2.29it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:22,  2.29it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:22,  2.29it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:21,  2.29it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:21,  2.29it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:20,  2.29it/s]

Evaluating:   8%|?둙         | 16/200 [00:06<01:20,  2.29it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:20,  2.29it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:19,  2.28it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:19,  2.28it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:18,  2.28it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:18,  2.28it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:18,  2.28it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:10<01:17,  2.28it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:17,  2.28it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:10<01:16,  2.28it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:16,  2.28it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:15,  2.28it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:15,  2.28it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:15,  2.28it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:14,  2.27it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:14,  2.28it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:13<01:14,  2.27it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:14,  2.23it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:14<01:15,  2.19it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:16,  2.17it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:15<01:16,  2.15it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:16<01:16,  2.13it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:16<01:18,  2.07it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:17<01:19,  2.03it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:17<01:20,  2.00it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:18<01:20,  1.98it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:19<01:23,  1.90it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:19<01:25,  1.83it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:20<01:27,  1.78it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:20<01:28,  1.75it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:21<01:29,  1.72it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:22<01:31,  1.67it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:22<01:32,  1.64it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:23<01:33,  1.62it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:23<01:33,  1.61it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:24<01:33,  1.60it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:25<01:33,  1.59it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:25<01:37,  1.51it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:26<01:40,  1.45it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:27<01:43,  1.40it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:28<01:44,  1.38it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:28<01:45,  1.36it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:29<01:45,  1.34it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:30<01:45,  1.33it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:31<01:45,  1.33it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:32<01:45,  1.32it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:32<01:44,  1.32it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:33<01:43,  1.32it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:34<01:43,  1.32it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:35<01:42,  1.32it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:35<01:41,  1.32it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:36<01:41,  1.32it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:37<01:40,  1.31it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:38<01:39,  1.31it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:38<01:39,  1.31it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:39<01:38,  1.31it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:40<01:37,  1.31it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:41<01:36,  1.31it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:41<01:35,  1.31it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:42<01:35,  1.31it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:43<01:34,  1.32it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:44<01:33,  1.32it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:44<01:32,  1.31it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:45<01:32,  1.31it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:46<01:31,  1.31it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:47<01:31,  1.31it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:48<01:30,  1.31it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:48<01:29,  1.31it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:49<01:28,  1.31it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:50<01:27,  1.31it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:51<01:27,  1.31it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:51<01:26,  1.31it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:52<01:25,  1.31it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:53<01:24,  1.31it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:54<01:23,  1.31it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:54<01:23,  1.31it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:55<01:22,  1.31it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:56<01:21,  1.31it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:57<01:21,  1.31it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:57<01:20,  1.31it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:58<01:19,  1.31it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:59<01:18,  1.32it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [01:00<01:17,  1.31it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [01:01<01:16,  1.31it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:01<01:16,  1.32it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:02<01:15,  1.32it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:03<01:14,  1.32it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:04<01:13,  1.32it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:04<01:12,  1.32it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:05<01:12,  1.32it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:06<01:11,  1.32it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:07<01:10,  1.31it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:07<01:09,  1.32it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:08<01:09,  1.32it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:09<01:08,  1.32it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:10<01:06,  1.33it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:10<01:05,  1.35it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:11<01:04,  1.36it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:12<01:02,  1.37it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:12<01:01,  1.37it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:13<01:00,  1.38it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:14<01:00,  1.38it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:15<00:59,  1.38it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:15<00:58,  1.39it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:16<00:57,  1.39it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:17<00:56,  1.39it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:18<00:56,  1.39it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:18<00:55,  1.39it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:19<00:54,  1.39it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:20<00:54,  1.39it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:20<00:53,  1.39it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:21<00:52,  1.39it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:22<00:51,  1.39it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:23<00:51,  1.39it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:23<00:50,  1.39it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:24<00:49,  1.39it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:25<00:48,  1.39it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:25<00:48,  1.39it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:26<00:47,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:27<00:46,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:28<00:46,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:28<00:45,  1.39it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:29<00:44,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:30<00:43,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:30<00:43,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:31<00:42,  1.39it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:32<00:41,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:33<00:41,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:33<00:40,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:34<00:39,  1.39it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:35<00:38,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:36<00:38,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:36<00:37,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:37<00:36,  1.39it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:38<00:35,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:38<00:35,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:39<00:34,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:40<00:33,  1.39it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:41<00:33,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:41<00:32,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:42<00:31,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:43<00:30,  1.39it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:43<00:30,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:44<00:29,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:45<00:28,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:46<00:28,  1.39it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:46<00:27,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:47<00:26,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:48<00:25,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:48<00:25,  1.39it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:49<00:24,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:50<00:23,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:51<00:23,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:51<00:22,  1.39it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:52<00:21,  1.38it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:53<00:20,  1.38it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:54<00:20,  1.38it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:54<00:19,  1.39it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:55<00:18,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:56<00:18,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:56<00:17,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:57<00:16,  1.39it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:58<00:15,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:59<00:15,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:59<00:14,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [02:00<00:13,  1.39it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [02:01<00:12,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [02:01<00:12,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [02:02<00:11,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:03<00:10,  1.39it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:04<00:10,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:04<00:09,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:05<00:08,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:06<00:07,  1.39it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:06<00:07,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:07<00:06,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:08<00:05,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:09<00:05,  1.39it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:09<00:04,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:10<00:03,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:11<00:02,  1.39it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:12<00:02,  1.39it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:12<00:01,  1.39it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:13<00:00,  1.39it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:14<00:00,  1.44it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:14<00:00,  1.49it/s]




Loss: 0.9438




Precision: 0.7781, Recall: 0.7839, F1-Score: 0.7768




              precision    recall  f1-score   support

           0       0.76      0.66      0.71       797
           1       0.85      0.71      0.78       775
           2       0.88      0.87      0.88       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.68      0.77       882
           6       0.85      0.80      0.83       940
           7       0.47      0.60      0.53       473
           8       0.67      0.85      0.75       746
           9       0.59      0.73      0.65       689
          10       0.75      0.79      0.77       670
          11       0.62      0.80      0.70       312
          12       0.72      0.80      0.76       665
          13       0.84      0.86      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9193721758818886, 0.9193721758818886)




CCA coefficients mean non-concern: (0.9177789335478214, 0.9177789335478214)




Linear CKA concern: 0.9894248814267471




Linear CKA non-concern: 0.9864574283420963




Kernel CKA concern: 0.9886022425693719




Kernel CKA non-concern: 0.9871229263164104




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 8




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:24,  2.37it/s]

Evaluating:   1%|          | 2/200 [00:00<01:25,  2.32it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:25,  2.30it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:25,  2.30it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:25,  2.29it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:24,  2.29it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:24,  2.29it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:23,  2.29it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:23,  2.28it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:23,  2.28it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:22,  2.28it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:22,  2.28it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:22,  2.28it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:21,  2.28it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:21,  2.28it/s]

Evaluating:   8%|?둙         | 16/200 [00:06<01:20,  2.28it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:20,  2.28it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:19,  2.28it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:19,  2.28it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:19,  2.28it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:18,  2.27it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:18,  2.27it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:10<01:17,  2.28it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:17,  2.28it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:10<01:16,  2.28it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:16,  2.28it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:15,  2.28it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:15,  2.28it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:15,  2.27it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:14,  2.27it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:14,  2.26it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:14<01:15,  2.23it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:15,  2.20it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:14<01:16,  2.17it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:17,  2.13it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:16<01:19,  2.07it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:16<01:20,  2.03it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:17<01:22,  1.96it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:17<01:26,  1.87it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:18<01:28,  1.81it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:18<01:29,  1.77it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:19<01:30,  1.74it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:20<01:30,  1.73it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:20<01:30,  1.71it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:21<01:32,  1.68it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:21<01:34,  1.64it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:22<01:35,  1.61it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:23<01:35,  1.59it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:23<01:35,  1.58it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:24<01:35,  1.57it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:25<01:35,  1.56it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:25<01:34,  1.56it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:26<01:34,  1.56it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:27<01:35,  1.53it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:27<01:36,  1.51it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:28<01:36,  1.49it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:29<01:36,  1.48it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:29<01:36,  1.48it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:30<01:35,  1.47it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:31<01:36,  1.45it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:31<01:37,  1.43it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:32<01:37,  1.42it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:33<01:36,  1.41it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:34<01:36,  1.41it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:34<01:36,  1.41it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:35<01:35,  1.40it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:36<01:34,  1.40it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:36<01:33,  1.40it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:37<01:33,  1.41it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:38<01:32,  1.41it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:39<01:31,  1.41it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:39<01:31,  1.41it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:40<01:30,  1.40it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:41<01:30,  1.40it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:41<01:29,  1.40it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:42<01:28,  1.40it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:43<01:27,  1.40it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:44<01:27,  1.40it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:44<01:26,  1.40it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:45<01:25,  1.40it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:46<01:24,  1.40it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:46<01:24,  1.40it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:47<01:23,  1.40it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:48<01:23,  1.40it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:49<01:22,  1.40it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:49<01:21,  1.40it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:50<01:20,  1.40it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:51<01:20,  1.40it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:51<01:19,  1.40it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:52<01:18,  1.40it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:53<01:17,  1.40it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:54<01:16,  1.40it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:54<01:16,  1.40it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:55<01:15,  1.40it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:56<01:14,  1.40it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:56<01:14,  1.40it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:57<01:13,  1.41it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:58<01:12,  1.40it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [00:59<01:12,  1.40it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [00:59<01:11,  1.41it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:00<01:10,  1.40it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:01<01:09,  1.41it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:01<01:09,  1.41it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:02<01:08,  1.41it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:03<01:07,  1.41it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:04<01:06,  1.40it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:04<01:06,  1.41it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:05<01:05,  1.40it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:06<01:04,  1.40it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:06<01:04,  1.41it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:07<01:03,  1.40it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:08<01:02,  1.41it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:09<01:01,  1.41it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:09<01:01,  1.41it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:10<01:00,  1.41it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:11<00:59,  1.41it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:11<00:59,  1.41it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:12<00:58,  1.40it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:13<00:57,  1.40it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:14<00:57,  1.40it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:14<00:56,  1.40it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:15<00:55,  1.40it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:16<00:54,  1.40it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:16<00:54,  1.40it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:17<00:53,  1.40it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:18<00:52,  1.40it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:19<00:51,  1.40it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:19<00:51,  1.40it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:20<00:50,  1.40it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:21<00:49,  1.40it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:21<00:49,  1.41it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:22<00:48,  1.41it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:23<00:47,  1.40it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:24<00:46,  1.41it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:24<00:46,  1.40it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:25<00:45,  1.40it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:26<00:44,  1.40it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:26<00:44,  1.40it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:27<00:43,  1.41it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:28<00:42,  1.41it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:28<00:42,  1.40it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:29<00:41,  1.41it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:30<00:40,  1.40it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:31<00:39,  1.41it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:31<00:39,  1.41it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:32<00:38,  1.41it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:33<00:37,  1.41it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:33<00:36,  1.41it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:34<00:36,  1.41it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:35<00:35,  1.41it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:36<00:34,  1.41it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:36<00:34,  1.41it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:37<00:33,  1.41it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:38<00:32,  1.41it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:38<00:32,  1.41it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:39<00:31,  1.41it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:40<00:30,  1.41it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:41<00:29,  1.41it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:41<00:29,  1.41it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:42<00:28,  1.40it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:43<00:27,  1.41it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:43<00:27,  1.40it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:44<00:26,  1.40it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:45<00:25,  1.40it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:46<00:24,  1.40it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:46<00:24,  1.40it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:47<00:23,  1.40it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:48<00:22,  1.41it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:48<00:22,  1.41it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:49<00:21,  1.40it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:50<00:20,  1.41it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:51<00:19,  1.40it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:51<00:19,  1.40it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:52<00:18,  1.40it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:53<00:17,  1.40it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:53<00:17,  1.40it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:54<00:16,  1.40it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:55<00:15,  1.40it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:56<00:15,  1.40it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:56<00:14,  1.40it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [01:57<00:13,  1.40it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [01:58<00:12,  1.40it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [01:58<00:12,  1.40it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [01:59<00:11,  1.40it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:00<00:10,  1.40it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:01<00:09,  1.40it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:01<00:09,  1.40it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:02<00:08,  1.40it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:03<00:07,  1.41it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:03<00:07,  1.40it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:04<00:06,  1.41it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:05<00:05,  1.40it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:06<00:04,  1.40it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:06<00:04,  1.41it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:07<00:03,  1.40it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:08<00:02,  1.40it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:08<00:02,  1.40it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:09<00:01,  1.40it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:10<00:00,  1.40it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:10<00:00,  1.46it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:10<00:00,  1.53it/s]




Loss: 0.9441




Precision: 0.7777, Recall: 0.7835, F1-Score: 0.7764




              precision    recall  f1-score   support

           0       0.75      0.66      0.71       797
           1       0.85      0.71      0.77       775
           2       0.88      0.87      0.88       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.68      0.77       882
           6       0.85      0.80      0.82       940
           7       0.48      0.59      0.53       473
           8       0.66      0.85      0.75       746
           9       0.60      0.73      0.66       689
          10       0.75      0.78      0.77       670
          11       0.62      0.80      0.70       312
          12       0.73      0.81      0.77       665
          13       0.84      0.86      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.98      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9176607645845222, 0.9176607645845222)




CCA coefficients mean non-concern: (0.9172857328659937, 0.9172857328659937)




Linear CKA concern: 0.9909561402333511




Linear CKA non-concern: 0.9847721742575822




Kernel CKA concern: 0.9894073154225246




Kernel CKA non-concern: 0.9856890759115742




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 9




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:24,  2.37it/s]

Evaluating:   1%|          | 2/200 [00:00<01:25,  2.32it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:25,  2.30it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:25,  2.29it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:25,  2.29it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:24,  2.29it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:24,  2.29it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:24,  2.29it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:23,  2.28it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:23,  2.28it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:22,  2.28it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:22,  2.28it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:22,  2.28it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:21,  2.28it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:21,  2.28it/s]

Evaluating:   8%|?둙         | 16/200 [00:07<01:20,  2.27it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:20,  2.27it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:20,  2.27it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:19,  2.27it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:19,  2.27it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:18,  2.27it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:18,  2.27it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:10<01:18,  2.27it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:17,  2.27it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:10<01:17,  2.27it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:16,  2.27it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:16,  2.27it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:15,  2.26it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:16,  2.25it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:16,  2.23it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:16,  2.21it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:14<01:16,  2.18it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:17,  2.15it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:15<01:19,  2.08it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:21,  2.03it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:16<01:22,  2.00it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:16<01:23,  1.94it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:17<01:25,  1.89it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:17<01:26,  1.85it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:18<01:27,  1.82it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:19<01:28,  1.79it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:19<01:29,  1.76it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:20<01:30,  1.73it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:20<01:33,  1.68it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:21<01:34,  1.64it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:22<01:34,  1.62it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:22<01:35,  1.61it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:23<01:35,  1.60it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:24<01:35,  1.59it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:24<01:34,  1.58it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:25<01:34,  1.58it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:25<01:36,  1.54it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:26<01:37,  1.51it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:27<01:38,  1.49it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:28<01:38,  1.48it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:28<01:38,  1.47it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:29<01:37,  1.46it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:30<01:37,  1.46it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:30<01:37,  1.45it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:31<01:36,  1.45it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:32<01:35,  1.45it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:32<01:35,  1.45it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:33<01:34,  1.45it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:34<01:34,  1.44it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:34<01:33,  1.45it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:35<01:32,  1.45it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:36<01:31,  1.45it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:37<01:31,  1.45it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:37<01:30,  1.45it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:38<01:29,  1.45it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:39<01:31,  1.42it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:39<01:31,  1.40it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:40<01:31,  1.38it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:41<01:31,  1.38it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:42<01:30,  1.38it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:42<01:30,  1.37it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:43<01:29,  1.37it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:44<01:29,  1.37it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:45<01:27,  1.38it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:45<01:27,  1.37it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:46<01:26,  1.37it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:47<01:25,  1.38it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:47<01:25,  1.37it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:48<01:24,  1.37it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:49<01:24,  1.37it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:50<01:23,  1.36it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:50<01:22,  1.37it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:51<01:22,  1.37it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:52<01:21,  1.36it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:53<01:20,  1.36it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:53<01:19,  1.36it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:54<01:19,  1.36it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:55<01:18,  1.36it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:56<01:17,  1.36it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:56<01:17,  1.36it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:57<01:16,  1.36it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:58<01:15,  1.37it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:58<01:14,  1.36it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [00:59<01:14,  1.36it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:00<01:13,  1.37it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:01<01:12,  1.36it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:01<01:11,  1.36it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:02<01:10,  1.37it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:03<01:10,  1.36it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:04<01:09,  1.37it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:04<01:08,  1.37it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:05<01:08,  1.37it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:06<01:07,  1.37it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:06<01:06,  1.36it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:07<01:05,  1.37it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:08<01:04,  1.38it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:09<01:04,  1.37it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:09<01:03,  1.37it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:10<01:02,  1.37it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:11<01:02,  1.37it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:12<01:01,  1.37it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:12<01:00,  1.37it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:13<01:00,  1.36it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:14<00:59,  1.37it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:15<00:58,  1.37it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:15<00:57,  1.37it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:16<00:57,  1.36it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:17<00:56,  1.36it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:17<00:55,  1.37it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:18<00:54,  1.37it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:19<00:54,  1.37it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:20<00:53,  1.37it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:20<00:52,  1.37it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:21<00:51,  1.37it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:22<00:51,  1.37it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:23<00:50,  1.37it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:23<00:49,  1.37it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:24<00:49,  1.37it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:25<00:48,  1.37it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:25<00:47,  1.37it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:26<00:46,  1.37it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:27<00:46,  1.37it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:28<00:45,  1.37it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:28<00:44,  1.37it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:29<00:43,  1.37it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:30<00:43,  1.37it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:31<00:42,  1.37it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:31<00:41,  1.37it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:32<00:40,  1.37it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:33<00:40,  1.37it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:34<00:39,  1.37it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:34<00:38,  1.37it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:35<00:37,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:36<00:36,  1.40it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:36<00:35,  1.41it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:37<00:34,  1.42it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:38<00:33,  1.42it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:38<00:32,  1.43it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:39<00:32,  1.43it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:40<00:31,  1.44it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:41<00:30,  1.44it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:41<00:29,  1.44it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:42<00:29,  1.44it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:43<00:28,  1.44it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:43<00:27,  1.44it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:44<00:27,  1.44it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:45<00:26,  1.44it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:45<00:25,  1.44it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:46<00:25,  1.44it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:47<00:24,  1.44it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:47<00:23,  1.44it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:48<00:22,  1.44it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:49<00:22,  1.44it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:50<00:21,  1.44it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:50<00:20,  1.44it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:51<00:20,  1.44it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:52<00:19,  1.44it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:52<00:18,  1.44it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:53<00:18,  1.44it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:54<00:17,  1.44it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:54<00:16,  1.44it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:55<00:16,  1.44it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:56<00:15,  1.44it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:57<00:14,  1.44it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:57<00:13,  1.44it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [01:58<00:13,  1.44it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [01:59<00:12,  1.44it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [01:59<00:11,  1.44it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [02:00<00:11,  1.44it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:01<00:10,  1.44it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:01<00:09,  1.44it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:02<00:09,  1.44it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:03<00:08,  1.44it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:03<00:07,  1.44it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:04<00:06,  1.43it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:05<00:06,  1.44it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:06<00:05,  1.44it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:06<00:04,  1.44it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:07<00:04,  1.44it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:08<00:03,  1.44it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:08<00:02,  1.44it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:09<00:02,  1.44it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:10<00:01,  1.44it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:10<00:00,  1.44it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:11<00:00,  1.49it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:11<00:00,  1.52it/s]




Loss: 0.9445




Precision: 0.7775, Recall: 0.7836, F1-Score: 0.7763




              precision    recall  f1-score   support

           0       0.76      0.66      0.71       797
           1       0.85      0.71      0.78       775
           2       0.88      0.87      0.88       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.68      0.77       882
           6       0.85      0.80      0.82       940
           7       0.48      0.60      0.53       473
           8       0.66      0.85      0.74       746
           9       0.59      0.73      0.66       689
          10       0.76      0.78      0.77       670
          11       0.62      0.80      0.70       312
          12       0.72      0.81      0.76       665
          13       0.84      0.86      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9173982949202032, 0.9173982949202032)




CCA coefficients mean non-concern: (0.9168343362284701, 0.9168343362284701)




Linear CKA concern: 0.9920294752371019




Linear CKA non-concern: 0.9846127625520404




Kernel CKA concern: 0.9899920653000724




Kernel CKA non-concern: 0.9856480694128665




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 10




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:23,  2.37it/s]

Evaluating:   1%|          | 2/200 [00:00<01:24,  2.35it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:24,  2.33it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:24,  2.32it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:24,  2.32it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:23,  2.32it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:23,  2.32it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:22,  2.31it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:22,  2.31it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:22,  2.31it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:21,  2.31it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:21,  2.31it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:20,  2.31it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:20,  2.31it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:20,  2.31it/s]

Evaluating:   8%|?둙         | 16/200 [00:06<01:19,  2.31it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:19,  2.31it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:18,  2.31it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:18,  2.30it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:18,  2.30it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:17,  2.30it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:17,  2.30it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:09<01:16,  2.30it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:16,  2.30it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:10<01:16,  2.30it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:15,  2.30it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:15,  2.30it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:14,  2.30it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:14,  2.30it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:14,  2.29it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:13,  2.29it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:13<01:13,  2.29it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:12,  2.29it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:14<01:13,  2.26it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:13,  2.24it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:15<01:13,  2.22it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:16<01:14,  2.20it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:16<01:15,  2.16it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:17<01:17,  2.08it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:17<01:18,  2.03it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:18<01:20,  1.97it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:18<01:21,  1.93it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:19<01:22,  1.89it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:19<01:24,  1.85it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:20<01:25,  1.82it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:21<01:27,  1.76it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:21<01:29,  1.71it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:22<01:30,  1.69it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:22<01:30,  1.67it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:23<01:30,  1.65it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:24<01:30,  1.64it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:24<01:32,  1.61it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:25<01:33,  1.57it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:26<01:34,  1.55it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:26<01:34,  1.53it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:27<01:34,  1.52it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:28<01:34,  1.51it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:28<01:34,  1.51it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:29<01:33,  1.50it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:30<01:34,  1.48it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:30<01:36,  1.44it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:31<01:36,  1.42it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:32<01:36,  1.41it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:33<01:36,  1.41it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:33<01:36,  1.40it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:34<01:36,  1.39it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:35<01:35,  1.39it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:36<01:35,  1.38it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:36<01:35,  1.37it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:37<01:35,  1.36it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:38<01:34,  1.36it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:38<01:34,  1.36it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:39<01:33,  1.36it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:40<01:33,  1.35it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:41<01:32,  1.35it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:41<01:31,  1.35it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:42<01:30,  1.35it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:43<01:30,  1.35it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:44<01:29,  1.35it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:44<01:28,  1.35it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:45<01:27,  1.35it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:46<01:27,  1.35it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:47<01:26,  1.36it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:47<01:25,  1.36it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:48<01:24,  1.36it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:49<01:23,  1.36it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:50<01:23,  1.36it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:50<01:22,  1.36it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:51<01:21,  1.36it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:52<01:20,  1.36it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:52<01:20,  1.36it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:53<01:19,  1.36it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:54<01:18,  1.36it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:55<01:18,  1.35it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:55<01:17,  1.35it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:56<01:16,  1.35it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:57<01:16,  1.35it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:58<01:15,  1.36it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [00:58<01:14,  1.36it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [00:59<01:13,  1.36it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:00<01:12,  1.36it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:01<01:12,  1.36it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:01<01:11,  1.36it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:02<01:10,  1.36it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:03<01:09,  1.36it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:04<01:09,  1.36it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:04<01:08,  1.36it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:05<01:07,  1.37it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:06<01:06,  1.36it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:06<01:06,  1.36it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:07<01:05,  1.37it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:08<01:04,  1.36it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:09<01:04,  1.36it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:09<01:03,  1.36it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:10<01:02,  1.36it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:11<01:01,  1.36it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:12<01:00,  1.37it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:12<00:59,  1.37it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:13<00:59,  1.37it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:14<00:58,  1.37it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:15<00:57,  1.37it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:15<00:56,  1.37it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:16<00:56,  1.37it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:17<00:55,  1.36it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:17<00:55,  1.36it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:18<00:54,  1.36it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:19<00:53,  1.36it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:20<00:52,  1.36it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:20<00:52,  1.36it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:21<00:51,  1.37it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:22<00:50,  1.38it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:23<00:49,  1.38it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:23<00:48,  1.37it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:24<00:47,  1.38it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:25<00:47,  1.37it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:25<00:46,  1.37it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:26<00:45,  1.38it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:27<00:45,  1.37it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:28<00:44,  1.38it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:28<00:43,  1.38it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:29<00:42,  1.37it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:30<00:42,  1.38it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:31<00:41,  1.38it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:31<00:40,  1.38it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:32<00:39,  1.38it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:33<00:38,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:33<00:37,  1.41it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:34<00:36,  1.42it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:35<00:35,  1.42it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:35<00:35,  1.43it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:36<00:34,  1.43it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:37<00:33,  1.43it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:38<00:32,  1.43it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:38<00:32,  1.43it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:39<00:31,  1.43it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:40<00:30,  1.43it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:40<00:30,  1.43it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:41<00:29,  1.43it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:42<00:28,  1.43it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:42<00:27,  1.43it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:43<00:27,  1.43it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:44<00:26,  1.43it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:45<00:25,  1.44it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:45<00:25,  1.43it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:46<00:24,  1.43it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:47<00:23,  1.43it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:47<00:23,  1.43it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:48<00:22,  1.43it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:49<00:21,  1.43it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:49<00:20,  1.43it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:50<00:20,  1.43it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:51<00:19,  1.44it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:52<00:18,  1.43it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:52<00:18,  1.43it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:53<00:17,  1.43it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:54<00:16,  1.43it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:54<00:16,  1.43it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:55<00:15,  1.43it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:56<00:14,  1.43it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:56<00:13,  1.43it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [01:57<00:13,  1.43it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [01:58<00:12,  1.43it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [01:59<00:11,  1.43it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [01:59<00:11,  1.43it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:00<00:10,  1.43it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:01<00:09,  1.43it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:01<00:09,  1.43it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:02<00:08,  1.43it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:03<00:07,  1.43it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:03<00:06,  1.43it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:04<00:06,  1.43it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:05<00:05,  1.43it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:06<00:04,  1.43it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:06<00:04,  1.43it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:07<00:03,  1.43it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:08<00:02,  1.43it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:08<00:02,  1.43it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:09<00:01,  1.43it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:10<00:00,  1.43it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:10<00:00,  1.49it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:10<00:00,  1.53it/s]




Loss: 0.9441




Precision: 0.7777, Recall: 0.7836, F1-Score: 0.7765




              precision    recall  f1-score   support

           0       0.75      0.66      0.71       797
           1       0.85      0.71      0.77       775
           2       0.88      0.87      0.88       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.68      0.77       882
           6       0.85      0.80      0.83       940
           7       0.48      0.60      0.53       473
           8       0.66      0.85      0.75       746
           9       0.60      0.73      0.66       689
          10       0.75      0.78      0.77       670
          11       0.62      0.80      0.70       312
          12       0.72      0.81      0.76       665
          13       0.84      0.86      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9188937176187417, 0.9188937176187417)




CCA coefficients mean non-concern: (0.9181894529317338, 0.9181894529317338)




Linear CKA concern: 0.9904006477270663




Linear CKA non-concern: 0.9855195746727906




Kernel CKA concern: 0.9884228180475138




Kernel CKA non-concern: 0.9864554696008283




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 11




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:23,  2.38it/s]

Evaluating:   1%|          | 2/200 [00:00<01:24,  2.33it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:25,  2.31it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:25,  2.30it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:25,  2.29it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:24,  2.29it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:24,  2.28it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:24,  2.28it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:23,  2.29it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:23,  2.29it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:22,  2.29it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:22,  2.28it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:21,  2.28it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:21,  2.28it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:21,  2.28it/s]

Evaluating:   8%|?둙         | 16/200 [00:06<01:20,  2.28it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:20,  2.28it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:19,  2.28it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:19,  2.28it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:19,  2.28it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:18,  2.27it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:18,  2.27it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:10<01:17,  2.27it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:17,  2.27it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:10<01:17,  2.27it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:16,  2.27it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:16,  2.27it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:15,  2.27it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:15,  2.27it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:15,  2.27it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:14,  2.27it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:14<01:14,  2.26it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:14,  2.25it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:15<01:27,  1.89it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:34,  1.74it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:16<01:37,  1.68it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:17<01:39,  1.64it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:17<01:40,  1.61it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:18<01:40,  1.60it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:19<01:40,  1.59it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:19<01:40,  1.58it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:20<01:40,  1.57it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:21<01:40,  1.57it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:21<01:39,  1.56it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:22<01:39,  1.56it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:22<01:38,  1.56it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:23<01:38,  1.56it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:24<01:37,  1.56it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:24<01:37,  1.55it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:25<01:36,  1.55it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:26<01:35,  1.56it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:26<01:34,  1.57it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:27<01:33,  1.57it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:28<01:32,  1.58it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:28<01:31,  1.59it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:29<01:30,  1.60it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:29<01:29,  1.60it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:30<01:28,  1.61it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:31<01:27,  1.61it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:31<01:26,  1.61it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:32<01:26,  1.61it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:33<01:25,  1.61it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:33<01:25,  1.61it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:34<01:25,  1.59it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:34<01:26,  1.56it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:35<01:26,  1.54it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:36<01:27,  1.53it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:36<01:27,  1.52it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:37<01:26,  1.51it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:38<01:27,  1.49it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:39<01:29,  1.45it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:39<01:30,  1.42it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:40<01:31,  1.39it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:41<01:31,  1.37it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:42<01:31,  1.36it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:42<01:31,  1.35it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:43<01:31,  1.35it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:44<01:30,  1.35it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:45<01:29,  1.35it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:45<01:28,  1.35it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:46<01:27,  1.35it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:47<01:27,  1.35it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:47<01:26,  1.35it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:48<01:25,  1.35it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:49<01:25,  1.35it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:50<01:24,  1.35it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:50<01:23,  1.35it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:51<01:22,  1.35it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:52<01:22,  1.35it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:53<01:21,  1.35it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:53<01:20,  1.35it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:54<01:20,  1.35it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:55<01:19,  1.35it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:56<01:18,  1.35it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:56<01:17,  1.35it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:57<01:16,  1.35it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:58<01:16,  1.35it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:59<01:15,  1.35it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [00:59<01:14,  1.35it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:00<01:13,  1.35it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:01<01:13,  1.35it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:02<01:12,  1.35it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:02<01:11,  1.35it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:03<01:10,  1.35it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:04<01:10,  1.35it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:04<01:09,  1.35it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:05<01:08,  1.35it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:06<01:08,  1.35it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:07<01:07,  1.35it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:07<01:06,  1.35it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:08<01:05,  1.35it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:09<01:05,  1.35it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:10<01:04,  1.35it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:10<01:03,  1.35it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:11<01:02,  1.35it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:12<01:02,  1.35it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:13<01:01,  1.35it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:13<01:00,  1.35it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:14<01:00,  1.35it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:15<00:59,  1.35it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:16<00:58,  1.35it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:16<00:57,  1.35it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:17<00:56,  1.35it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:18<00:55,  1.37it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:18<00:54,  1.39it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:19<00:52,  1.40it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:20<00:51,  1.41it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:21<00:51,  1.41it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:21<00:50,  1.41it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:22<00:49,  1.41it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:23<00:48,  1.41it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:23<00:48,  1.41it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:24<00:47,  1.41it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:25<00:46,  1.42it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:26<00:45,  1.41it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:26<00:45,  1.41it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:27<00:44,  1.41it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:28<00:44,  1.41it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:28<00:43,  1.41it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:29<00:42,  1.41it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:30<00:41,  1.42it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:30<00:40,  1.42it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:31<00:40,  1.42it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:32<00:39,  1.42it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:33<00:38,  1.42it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:33<00:38,  1.42it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:34<00:37,  1.42it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:35<00:36,  1.42it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:35<00:35,  1.42it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:36<00:35,  1.42it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:37<00:34,  1.42it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:38<00:33,  1.42it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:38<00:33,  1.42it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:39<00:32,  1.42it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:40<00:31,  1.41it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:40<00:31,  1.41it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:41<00:30,  1.41it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:42<00:29,  1.41it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:42<00:28,  1.42it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:43<00:28,  1.42it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:44<00:27,  1.42it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:45<00:26,  1.42it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:45<00:26,  1.42it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:46<00:25,  1.41it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:47<00:24,  1.41it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:47<00:24,  1.42it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:48<00:23,  1.42it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:49<00:22,  1.42it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:50<00:21,  1.42it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:50<00:21,  1.42it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:51<00:20,  1.42it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:52<00:19,  1.42it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:52<00:19,  1.42it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:53<00:18,  1.42it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:54<00:17,  1.42it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:54<00:16,  1.42it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:55<00:16,  1.42it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:56<00:15,  1.42it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:57<00:14,  1.42it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:57<00:14,  1.42it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [01:58<00:13,  1.42it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [01:59<00:12,  1.42it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [01:59<00:11,  1.42it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [02:00<00:11,  1.42it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:01<00:10,  1.41it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:02<00:09,  1.42it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:02<00:09,  1.42it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:03<00:08,  1.42it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:04<00:07,  1.42it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:04<00:07,  1.42it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:05<00:06,  1.42it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:06<00:05,  1.42it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:06<00:04,  1.42it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:07<00:04,  1.42it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:08<00:03,  1.42it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:09<00:02,  1.42it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:09<00:02,  1.42it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:10<00:01,  1.42it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:11<00:00,  1.42it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:11<00:00,  1.48it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:11<00:00,  1.52it/s]




Loss: 0.9435




Precision: 0.7778, Recall: 0.7841, F1-Score: 0.7768




              precision    recall  f1-score   support

           0       0.76      0.66      0.71       797
           1       0.85      0.71      0.78       775
           2       0.88      0.87      0.87       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.88      0.69      0.77       882
           6       0.85      0.80      0.83       940
           7       0.48      0.60      0.53       473
           8       0.67      0.85      0.75       746
           9       0.60      0.73      0.66       689
          10       0.75      0.78      0.77       670
          11       0.61      0.81      0.70       312
          12       0.72      0.81      0.76       665
          13       0.84      0.85      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9193390059294791, 0.9193390059294791)




CCA coefficients mean non-concern: (0.9177821632676089, 0.9177821632676089)




Linear CKA concern: 0.9902710957135447




Linear CKA non-concern: 0.986474395177252




Kernel CKA concern: 0.9885467738591716




Kernel CKA non-concern: 0.987259225657475




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 12




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:23,  2.38it/s]

Evaluating:   1%|          | 2/200 [00:00<01:25,  2.32it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:25,  2.30it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:25,  2.29it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:25,  2.29it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:25,  2.28it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:24,  2.28it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:24,  2.28it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:23,  2.28it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:23,  2.28it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:23,  2.28it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:22,  2.28it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:22,  2.27it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:21,  2.27it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:21,  2.27it/s]

Evaluating:   8%|?둙         | 16/200 [00:07<01:21,  2.27it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:20,  2.27it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:20,  2.27it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:19,  2.27it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:19,  2.27it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:18,  2.27it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:18,  2.27it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:10<01:18,  2.27it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:17,  2.26it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:10<01:17,  2.26it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:17,  2.26it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:16,  2.26it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:16,  2.26it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:15,  2.25it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:15,  2.25it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:15,  2.25it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:14<01:15,  2.23it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:16,  2.20it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:15<01:16,  2.16it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:16,  2.15it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:16<01:17,  2.12it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:16<01:19,  2.04it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:17<01:21,  1.99it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:17<01:22,  1.95it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:18<01:26,  1.86it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:18<01:28,  1.80it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:19<01:30,  1.75it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:20<01:31,  1.72it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:20<01:31,  1.71it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:21<01:31,  1.69it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:21<01:31,  1.68it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:22<01:31,  1.68it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:23<01:30,  1.67it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:23<01:30,  1.66it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:24<01:30,  1.65it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:25<01:43,  1.45it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:26<01:51,  1.33it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:26<01:57,  1.26it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:27<02:00,  1.21it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:28<02:00,  1.20it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:29<02:00,  1.20it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:30<01:59,  1.20it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:31<01:56,  1.22it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:31<01:54,  1.23it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:32<01:52,  1.24it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:33<01:50,  1.25it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:34<01:49,  1.26it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:35<01:48,  1.26it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:35<01:45,  1.29it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:36<01:43,  1.31it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:37<01:41,  1.32it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:38<01:40,  1.33it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:38<01:38,  1.34it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:39<01:36,  1.36it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:40<01:34,  1.37it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:40<01:33,  1.38it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:41<01:32,  1.39it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:42<01:30,  1.40it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:43<01:29,  1.40it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:43<01:29,  1.40it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:44<01:28,  1.41it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:45<01:27,  1.41it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:45<01:26,  1.41it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:46<01:25,  1.41it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:47<01:24,  1.41it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:47<01:24,  1.41it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:48<01:22,  1.43it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:49<01:20,  1.45it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:50<01:19,  1.46it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:50<01:18,  1.46it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:51<01:17,  1.47it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:52<01:16,  1.47it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:52<01:15,  1.47it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:53<01:15,  1.48it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:54<01:14,  1.48it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:54<01:13,  1.48it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:55<01:13,  1.48it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:56<01:12,  1.48it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:56<01:11,  1.48it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:57<01:11,  1.47it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:58<01:10,  1.47it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:58<01:10,  1.47it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:59<01:09,  1.47it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [01:00<01:08,  1.47it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [01:00<01:07,  1.48it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:01<01:06,  1.48it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:02<01:06,  1.48it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:02<01:05,  1.48it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:03<01:05,  1.47it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:04<01:04,  1.47it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:04<01:03,  1.47it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:05<01:03,  1.47it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:06<01:02,  1.47it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:06<01:01,  1.48it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:07<01:00,  1.48it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:08<01:00,  1.48it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:08<00:59,  1.48it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:09<00:58,  1.48it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:10<00:58,  1.48it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:11<00:57,  1.48it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:11<00:56,  1.48it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:12<00:56,  1.48it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:13<00:55,  1.48it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:13<00:54,  1.48it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:14<00:54,  1.48it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:15<00:53,  1.48it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:15<00:53,  1.47it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:16<00:52,  1.47it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:17<00:51,  1.47it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:17<00:51,  1.45it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:18<00:51,  1.43it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:19<00:51,  1.42it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:19<00:51,  1.41it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:20<00:50,  1.40it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:21<00:50,  1.40it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:22<00:49,  1.40it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:22<00:48,  1.40it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:23<00:48,  1.39it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:24<00:47,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:25<00:46,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:25<00:46,  1.39it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:26<00:45,  1.39it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:27<00:44,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:27<00:43,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:28<00:43,  1.39it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:29<00:42,  1.39it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:30<00:41,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:30<00:41,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:31<00:40,  1.39it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:32<00:39,  1.39it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:32<00:38,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:33<00:38,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:34<00:37,  1.39it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:35<00:36,  1.39it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:35<00:35,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:36<00:35,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:37<00:34,  1.39it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:37<00:33,  1.39it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:38<00:33,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:39<00:32,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:40<00:31,  1.39it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:40<00:30,  1.39it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:41<00:30,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:42<00:29,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:43<00:28,  1.39it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:43<00:28,  1.39it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:44<00:27,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:45<00:26,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:45<00:25,  1.39it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:46<00:25,  1.39it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:47<00:24,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:48<00:23,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:48<00:23,  1.39it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:49<00:22,  1.39it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:50<00:21,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:50<00:20,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:51<00:20,  1.39it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:52<00:19,  1.39it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:53<00:18,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:53<00:17,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:54<00:17,  1.39it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:55<00:16,  1.39it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:55<00:15,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:56<00:15,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:57<00:14,  1.39it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [01:58<00:13,  1.39it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [01:58<00:12,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [01:59<00:12,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [02:00<00:11,  1.39it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [02:01<00:10,  1.39it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:01<00:10,  1.39it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:02<00:09,  1.40it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:03<00:08,  1.40it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:03<00:07,  1.39it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:04<00:07,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:05<00:06,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:06<00:05,  1.39it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:06<00:05,  1.39it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:07<00:04,  1.40it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:08<00:03,  1.42it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:08<00:02,  1.43it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:09<00:02,  1.44it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:10<00:01,  1.45it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:10<00:00,  1.46it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:11<00:00,  1.52it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:11<00:00,  1.52it/s]




Loss: 0.9429




Precision: 0.7775, Recall: 0.7839, F1-Score: 0.7765




              precision    recall  f1-score   support

           0       0.75      0.66      0.71       797
           1       0.85      0.71      0.78       775
           2       0.88      0.87      0.88       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.68      0.77       882
           6       0.86      0.80      0.83       940
           7       0.48      0.60      0.53       473
           8       0.67      0.85      0.75       746
           9       0.60      0.73      0.66       689
          10       0.75      0.78      0.77       670
          11       0.62      0.80      0.70       312
          12       0.72      0.81      0.76       665
          13       0.83      0.86      0.84       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9174170346798085, 0.9174170346798085)




CCA coefficients mean non-concern: (0.9181418602515559, 0.9181418602515559)




Linear CKA concern: 0.9879893637072323




Linear CKA non-concern: 0.9858724532974463




Kernel CKA concern: 0.9866287124016708




Kernel CKA non-concern: 0.9868525573672275




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 13




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:23,  2.37it/s]

Evaluating:   1%|          | 2/200 [00:00<01:25,  2.31it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:25,  2.30it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:25,  2.29it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:25,  2.28it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:25,  2.28it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:24,  2.28it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:24,  2.27it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:24,  2.27it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:23,  2.27it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:23,  2.27it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:22,  2.27it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:22,  2.27it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:21,  2.27it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:21,  2.27it/s]

Evaluating:   8%|?둙         | 16/200 [00:07<01:21,  2.26it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:20,  2.26it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:20,  2.26it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:20,  2.26it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:19,  2.26it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:19,  2.26it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:18,  2.26it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:10<01:18,  2.26it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:18,  2.25it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:11<01:17,  2.25it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:17,  2.25it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:16,  2.25it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:16,  2.25it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:16,  2.25it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:15,  2.24it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:15,  2.24it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:14<01:15,  2.22it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:15,  2.21it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:15<01:15,  2.18it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:16,  2.16it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:16<01:16,  2.14it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:16<01:17,  2.11it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:17<01:18,  2.07it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:17<01:22,  1.95it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:18<01:26,  1.86it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:18<01:28,  1.81it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:19<01:29,  1.77it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:19<01:30,  1.74it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:20<01:30,  1.73it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:21<01:30,  1.71it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:21<01:30,  1.70it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:22<01:32,  1.65it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:23<01:34,  1.61it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:23<01:35,  1.58it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:24<01:36,  1.55it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:25<01:36,  1.54it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:25<01:36,  1.53it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:26<01:36,  1.52it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:27<01:36,  1.52it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:27<01:35,  1.51it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:28<01:35,  1.51it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:29<01:34,  1.51it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:29<01:34,  1.51it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:30<01:33,  1.51it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:31<01:33,  1.50it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:31<01:32,  1.50it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:32<01:32,  1.50it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:33<01:31,  1.50it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:33<01:30,  1.50it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:34<01:30,  1.50it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:35<01:29,  1.50it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:35<01:28,  1.50it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:36<01:28,  1.49it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:37<01:30,  1.45it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:37<01:30,  1.43it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:38<01:31,  1.41it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:39<01:31,  1.40it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:40<01:31,  1.39it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:40<01:30,  1.39it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:41<01:30,  1.38it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:42<01:29,  1.38it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:42<01:28,  1.38it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:43<01:28,  1.38it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:44<01:28,  1.37it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:45<01:27,  1.38it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:45<01:26,  1.37it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:46<01:25,  1.37it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:47<01:24,  1.38it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:48<01:24,  1.37it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:48<01:23,  1.38it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:49<01:22,  1.37it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:50<01:22,  1.37it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:50<01:21,  1.37it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:51<01:20,  1.37it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:52<01:20,  1.37it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:53<01:19,  1.37it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:53<01:18,  1.37it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:54<01:18,  1.37it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:55<01:17,  1.37it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:56<01:16,  1.37it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:56<01:15,  1.37it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:57<01:15,  1.37it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:58<01:14,  1.37it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [00:59<01:13,  1.37it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [00:59<01:13,  1.37it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:00<01:11,  1.39it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:01<01:09,  1.40it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:01<01:08,  1.42it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:02<01:07,  1.43it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:03<01:06,  1.43it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:03<01:05,  1.43it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:04<01:04,  1.44it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:05<01:03,  1.44it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:05<01:03,  1.44it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:06<01:02,  1.44it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:07<01:01,  1.44it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:08<01:01,  1.44it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:08<01:00,  1.44it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:09<00:59,  1.44it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:10<00:59,  1.44it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:10<00:58,  1.44it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:11<00:57,  1.44it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:12<00:56,  1.44it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:12<00:56,  1.44it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:13<00:55,  1.44it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:14<00:54,  1.44it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:15<00:54,  1.44it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:15<00:53,  1.44it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:16<00:52,  1.44it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:17<00:51,  1.44it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:17<00:51,  1.44it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:18<00:50,  1.44it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:19<00:49,  1.44it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:19<00:49,  1.44it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:20<00:48,  1.44it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:21<00:47,  1.45it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:21<00:47,  1.45it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:22<00:46,  1.44it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:23<00:45,  1.45it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:24<00:44,  1.45it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:24<00:44,  1.45it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:25<00:43,  1.45it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:26<00:42,  1.45it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:26<00:42,  1.45it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:27<00:41,  1.45it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:28<00:40,  1.45it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:28<00:40,  1.45it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:29<00:39,  1.45it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:30<00:38,  1.45it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:30<00:38,  1.44it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:31<00:37,  1.44it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:32<00:36,  1.44it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:32<00:36,  1.44it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:33<00:35,  1.44it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:34<00:34,  1.44it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:35<00:34,  1.44it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:35<00:33,  1.44it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:36<00:32,  1.44it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:37<00:31,  1.44it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:37<00:31,  1.44it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:38<00:30,  1.44it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:39<00:29,  1.44it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:39<00:29,  1.45it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:40<00:28,  1.44it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:41<00:27,  1.45it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:42<00:27,  1.44it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:42<00:26,  1.44it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:43<00:25,  1.44it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:44<00:24,  1.44it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:44<00:24,  1.44it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:45<00:23,  1.44it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:46<00:22,  1.44it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:46<00:22,  1.44it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:47<00:21,  1.44it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:48<00:20,  1.44it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:48<00:20,  1.44it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:49<00:19,  1.44it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:50<00:18,  1.44it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:51<00:18,  1.44it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:51<00:17,  1.44it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:52<00:16,  1.44it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:53<00:15,  1.44it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:53<00:15,  1.44it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:54<00:14,  1.44it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:55<00:13,  1.44it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [01:55<00:13,  1.44it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [01:56<00:12,  1.44it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [01:57<00:11,  1.44it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [01:57<00:11,  1.44it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [01:58<00:10,  1.44it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [01:59<00:09,  1.44it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:00<00:09,  1.44it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:00<00:08,  1.44it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:01<00:07,  1.44it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:02<00:06,  1.44it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:02<00:06,  1.43it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:03<00:05,  1.43it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:04<00:04,  1.43it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:04<00:04,  1.43it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:05<00:03,  1.43it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:06<00:02,  1.43it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:07<00:02,  1.44it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:07<00:01,  1.44it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:08<00:00,  1.44it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:09<00:00,  1.50it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:09<00:00,  1.55it/s]




Loss: 0.9432




Precision: 0.7779, Recall: 0.7842, F1-Score: 0.7768




              precision    recall  f1-score   support

           0       0.76      0.66      0.71       797
           1       0.85      0.71      0.77       775
           2       0.87      0.88      0.87       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.69      0.77       882
           6       0.86      0.80      0.83       940
           7       0.48      0.59      0.53       473
           8       0.66      0.85      0.74       746
           9       0.59      0.73      0.66       689
          10       0.76      0.78      0.77       670
          11       0.61      0.80      0.70       312
          12       0.72      0.81      0.76       665
          13       0.83      0.86      0.85       314
          14       0.86      0.78      0.81       756
          15       0.98      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9204318160965856, 0.9204318160965856)




CCA coefficients mean non-concern: (0.919274487094692, 0.919274487094692)




Linear CKA concern: 0.9919897115129007




Linear CKA non-concern: 0.9860339117639955




Kernel CKA concern: 0.9898724600260577




Kernel CKA non-concern: 0.9866564033294849




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 14




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:23,  2.37it/s]

Evaluating:   1%|          | 2/200 [00:00<01:25,  2.32it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:25,  2.30it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:25,  2.29it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:25,  2.28it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:25,  2.28it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:24,  2.28it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:24,  2.27it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:23,  2.27it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:23,  2.27it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:23,  2.27it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:22,  2.27it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:22,  2.27it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:22,  2.27it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:21,  2.27it/s]

Evaluating:   8%|?둙         | 16/200 [00:07<01:21,  2.26it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:20,  2.26it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:20,  2.27it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:19,  2.26it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:19,  2.26it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:19,  2.26it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:18,  2.26it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:10<01:18,  2.26it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:17,  2.26it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:11<01:17,  2.26it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:17,  2.25it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:16,  2.26it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:16,  2.26it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:15,  2.25it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:15,  2.25it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:15,  2.25it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:14<01:15,  2.22it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:16,  2.20it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:15<01:16,  2.18it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:15<01:16,  2.16it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:16<01:18,  2.10it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:16<01:19,  2.05it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:17<01:20,  2.02it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:17<01:20,  1.99it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:18<01:21,  1.97it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:18<01:22,  1.92it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:19<01:24,  1.87it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:19<01:25,  1.83it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:20<01:26,  1.81it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:20<01:26,  1.78it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:21<01:28,  1.74it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:22<01:29,  1.71it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:22<01:29,  1.69it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:23<01:29,  1.68it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:23<01:30,  1.67it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:24<01:31,  1.63it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:25<01:31,  1.61it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:25<01:32,  1.59it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:26<01:33,  1.56it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:27<01:33,  1.55it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:27<01:34,  1.53it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:28<01:33,  1.53it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:29<01:33,  1.52it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:29<01:33,  1.51it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:30<01:32,  1.51it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:31<01:32,  1.51it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:31<01:31,  1.51it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:32<01:31,  1.50it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:33<01:29,  1.51it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:33<01:29,  1.51it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:34<01:28,  1.51it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:35<01:29,  1.49it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:35<01:30,  1.45it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:36<01:32,  1.42it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:37<01:32,  1.40it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:38<01:33,  1.39it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:38<01:33,  1.38it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:39<01:32,  1.37it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:40<01:32,  1.36it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:41<01:31,  1.36it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:41<01:31,  1.36it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:42<01:30,  1.36it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:43<01:30,  1.36it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:44<01:29,  1.35it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:44<01:28,  1.35it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:45<01:28,  1.35it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:46<01:27,  1.35it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:47<01:26,  1.35it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:47<01:25,  1.35it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:48<01:25,  1.35it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:49<01:24,  1.35it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:50<01:23,  1.35it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:50<01:22,  1.35it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:51<01:22,  1.35it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:52<01:21,  1.35it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:52<01:20,  1.35it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:53<01:19,  1.35it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:54<01:17,  1.37it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:55<01:16,  1.39it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:55<01:14,  1.40it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:56<01:13,  1.41it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:57<01:12,  1.41it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:57<01:12,  1.42it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [00:58<01:11,  1.42it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [00:59<01:10,  1.42it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:00<01:09,  1.42it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:00<01:08,  1.42it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:01<01:08,  1.42it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:02<01:07,  1.43it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:02<01:06,  1.42it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:03<01:06,  1.42it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:04<01:05,  1.43it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:04<01:04,  1.43it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:05<01:03,  1.43it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:06<01:03,  1.43it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:07<01:02,  1.43it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:07<01:01,  1.43it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:08<01:00,  1.43it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:09<01:00,  1.43it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:09<00:59,  1.43it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:10<00:58,  1.43it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:11<00:58,  1.43it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:11<00:57,  1.43it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:12<00:56,  1.43it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:13<00:56,  1.43it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:14<00:55,  1.43it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:14<00:54,  1.43it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:15<00:54,  1.43it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:16<00:53,  1.43it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:16<00:52,  1.42it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:17<00:51,  1.42it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:18<00:51,  1.43it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:18<00:50,  1.42it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:19<00:49,  1.43it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:20<00:49,  1.43it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:21<00:48,  1.43it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:21<00:47,  1.43it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:22<00:47,  1.42it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:23<00:46,  1.42it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:23<00:45,  1.42it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:24<00:44,  1.43it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:25<00:44,  1.43it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:25<00:43,  1.43it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:26<00:42,  1.43it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:27<00:42,  1.43it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:28<00:41,  1.43it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:28<00:40,  1.43it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:29<00:39,  1.43it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:30<00:39,  1.43it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:30<00:38,  1.43it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:31<00:37,  1.43it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:32<00:37,  1.43it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:32<00:36,  1.43it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:33<00:35,  1.42it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:34<00:35,  1.42it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:35<00:34,  1.43it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:35<00:33,  1.44it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:36<00:32,  1.46it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:37<00:31,  1.47it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:37<00:30,  1.48it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:38<00:29,  1.48it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:39<00:28,  1.49it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:39<00:28,  1.49it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:40<00:27,  1.49it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:41<00:26,  1.49it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:41<00:26,  1.49it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:42<00:25,  1.49it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:43<00:24,  1.49it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:43<00:24,  1.49it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:44<00:23,  1.49it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:45<00:22,  1.49it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:45<00:22,  1.50it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:46<00:21,  1.49it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:47<00:20,  1.50it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:47<00:20,  1.50it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:48<00:19,  1.49it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:49<00:18,  1.50it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:49<00:18,  1.50it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:50<00:17,  1.50it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:51<00:16,  1.50it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:51<00:16,  1.50it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:52<00:15,  1.49it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:53<00:14,  1.49it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:53<00:14,  1.49it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:54<00:13,  1.50it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [01:55<00:12,  1.49it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [01:55<00:12,  1.49it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [01:56<00:11,  1.49it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [01:57<00:11,  1.43it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [01:58<00:10,  1.40it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [01:58<00:10,  1.38it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [01:59<00:09,  1.36it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:00<00:08,  1.35it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:01<00:08,  1.34it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:01<00:07,  1.34it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:02<00:06,  1.33it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:03<00:06,  1.33it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:04<00:05,  1.33it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:04<00:04,  1.33it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:05<00:03,  1.33it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:06<00:03,  1.33it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:07<00:02,  1.33it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:07<00:01,  1.33it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:08<00:00,  1.33it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:09<00:00,  1.38it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:09<00:00,  1.55it/s]




Loss: 0.9437




Precision: 0.7782, Recall: 0.7847, F1-Score: 0.7772




              precision    recall  f1-score   support

           0       0.76      0.66      0.71       797
           1       0.85      0.71      0.77       775
           2       0.87      0.88      0.87       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.68      0.77       882
           6       0.85      0.80      0.83       940
           7       0.48      0.60      0.53       473
           8       0.66      0.86      0.75       746
           9       0.60      0.74      0.66       689
          10       0.76      0.79      0.77       670
          11       0.62      0.81      0.70       312
          12       0.72      0.80      0.76       665
          13       0.84      0.86      0.85       314
          14       0.85      0.78      0.81       756
          15       0.97      0.97      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9201176372009959, 0.9201176372009959)




CCA coefficients mean non-concern: (0.9178258586171898, 0.9178258586171898)




Linear CKA concern: 0.9907760074154024




Linear CKA non-concern: 0.9854545972540433




Kernel CKA concern: 0.9893007314456447




Kernel CKA non-concern: 0.9865451852286604




{'dataset_name': 'OSDG', 'path': 'albertmartinez/OSDG', 'config_name': '2024-01-01', 'text_column': 'text', 'label_column': 'labels', 'cache_dir': 'Datasets/OSDG', 'task_type': 'classification'}




Loading cached dataset OSDG.




The dataset OSDG is loaded




Evaluate the pruned model 15




Evaluating:   0%|          | 0/200 [00:00<?, ?it/s]

Evaluating:   0%|          | 1/200 [00:00<01:23,  2.37it/s]

Evaluating:   1%|          | 2/200 [00:00<01:25,  2.32it/s]

Evaluating:   2%|?둞         | 3/200 [00:01<01:25,  2.30it/s]

Evaluating:   2%|?둞         | 4/200 [00:01<01:25,  2.28it/s]

Evaluating:   2%|?둝         | 5/200 [00:02<01:25,  2.28it/s]

Evaluating:   3%|?둝         | 6/200 [00:02<01:25,  2.27it/s]

Evaluating:   4%|?둝         | 7/200 [00:03<01:24,  2.27it/s]

Evaluating:   4%|?둜         | 8/200 [00:03<01:24,  2.27it/s]

Evaluating:   4%|?둜         | 9/200 [00:03<01:24,  2.27it/s]

Evaluating:   5%|?둛         | 10/200 [00:04<01:23,  2.27it/s]

Evaluating:   6%|?둛         | 11/200 [00:04<01:23,  2.27it/s]

Evaluating:   6%|?둛         | 12/200 [00:05<01:22,  2.27it/s]

Evaluating:   6%|?둚         | 13/200 [00:05<01:22,  2.26it/s]

Evaluating:   7%|?둚         | 14/200 [00:06<01:22,  2.26it/s]

Evaluating:   8%|?둙         | 15/200 [00:06<01:21,  2.26it/s]

Evaluating:   8%|?둙         | 16/200 [00:07<01:21,  2.26it/s]

Evaluating:   8%|?둙         | 17/200 [00:07<01:21,  2.26it/s]

Evaluating:   9%|?둘         | 18/200 [00:07<01:20,  2.26it/s]

Evaluating:  10%|?둘         | 19/200 [00:08<01:20,  2.26it/s]

Evaluating:  10%|?둗         | 20/200 [00:08<01:19,  2.26it/s]

Evaluating:  10%|?둗         | 21/200 [00:09<01:19,  2.26it/s]

Evaluating:  11%|?둗         | 22/200 [00:09<01:18,  2.25it/s]

Evaluating:  12%|?둗?둞        | 23/200 [00:10<01:18,  2.26it/s]

Evaluating:  12%|?둗?둞        | 24/200 [00:10<01:18,  2.26it/s]

Evaluating:  12%|?둗?둝        | 25/200 [00:11<01:17,  2.25it/s]

Evaluating:  13%|?둗?둝        | 26/200 [00:11<01:17,  2.25it/s]

Evaluating:  14%|?둗?둝        | 27/200 [00:11<01:16,  2.25it/s]

Evaluating:  14%|?둗?둜        | 28/200 [00:12<01:16,  2.25it/s]

Evaluating:  14%|?둗?둜        | 29/200 [00:12<01:16,  2.25it/s]

Evaluating:  15%|?둗?둛        | 30/200 [00:13<01:15,  2.24it/s]

Evaluating:  16%|?둗?둛        | 31/200 [00:13<01:15,  2.25it/s]

Evaluating:  16%|?둗?둛        | 32/200 [00:14<01:14,  2.25it/s]

Evaluating:  16%|?둗?둚        | 33/200 [00:14<01:14,  2.25it/s]

Evaluating:  17%|?둗?둚        | 34/200 [00:16<02:28,  1.12it/s]

Evaluating:  18%|?둗?둙        | 35/200 [00:17<02:33,  1.07it/s]

Evaluating:  18%|?둗?둙        | 36/200 [00:18<02:24,  1.13it/s]

Evaluating:  18%|?둗?둙        | 37/200 [00:18<02:11,  1.24it/s]

Evaluating:  19%|?둗?둘        | 38/200 [00:19<01:59,  1.35it/s]

Evaluating:  20%|?둗?둘        | 39/200 [00:20<01:48,  1.49it/s]

Evaluating:  20%|?둗?둗        | 40/200 [00:20<01:38,  1.62it/s]

Evaluating:  20%|?둗?둗        | 41/200 [00:21<01:31,  1.73it/s]

Evaluating:  21%|?둗?둗        | 42/200 [00:21<01:26,  1.82it/s]

Evaluating:  22%|?둗?둗?둞       | 43/200 [00:22<01:23,  1.88it/s]

Evaluating:  22%|?둗?둗?둞       | 44/200 [00:22<01:20,  1.93it/s]

Evaluating:  22%|?둗?둗?둝       | 45/200 [00:22<01:18,  1.96it/s]

Evaluating:  23%|?둗?둗?둝       | 46/200 [00:23<01:17,  1.99it/s]

Evaluating:  24%|?둗?둗?둝       | 47/200 [00:23<01:16,  2.01it/s]

Evaluating:  24%|?둗?둗?둜       | 48/200 [00:24<01:15,  2.02it/s]

Evaluating:  24%|?둗?둗?둜       | 49/200 [00:24<01:14,  2.02it/s]

Evaluating:  25%|?둗?둗?둛       | 50/200 [00:25<01:15,  2.00it/s]

Evaluating:  26%|?둗?둗?둛       | 51/200 [00:25<01:15,  1.96it/s]

Evaluating:  26%|?둗?둗?둛       | 52/200 [00:26<01:17,  1.92it/s]

Evaluating:  26%|?둗?둗?둚       | 53/200 [00:27<01:18,  1.88it/s]

Evaluating:  27%|?둗?둗?둚       | 54/200 [00:27<01:19,  1.84it/s]

Evaluating:  28%|?둗?둗?둙       | 55/200 [00:28<01:20,  1.80it/s]

Evaluating:  28%|?둗?둗?둙       | 56/200 [00:28<01:22,  1.75it/s]

Evaluating:  28%|?둗?둗?둙       | 57/200 [00:29<01:23,  1.71it/s]

Evaluating:  29%|?둗?둗?둘       | 58/200 [00:30<01:24,  1.68it/s]

Evaluating:  30%|?둗?둗?둘       | 59/200 [00:30<01:24,  1.67it/s]

Evaluating:  30%|?둗?둗?둗       | 60/200 [00:31<01:25,  1.64it/s]

Evaluating:  30%|?둗?둗?둗       | 61/200 [00:32<01:28,  1.56it/s]

Evaluating:  31%|?둗?둗?둗       | 62/200 [00:32<01:31,  1.51it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 63/200 [00:33<01:32,  1.48it/s]

Evaluating:  32%|?둗?둗?둗?둞      | 64/200 [00:34<01:33,  1.46it/s]

Evaluating:  32%|?둗?둗?둗?둝      | 65/200 [00:34<01:33,  1.44it/s]

Evaluating:  33%|?둗?둗?둗?둝      | 66/200 [00:35<01:33,  1.43it/s]

Evaluating:  34%|?둗?둗?둗?둝      | 67/200 [00:36<01:33,  1.43it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 68/200 [00:36<01:32,  1.42it/s]

Evaluating:  34%|?둗?둗?둗?둜      | 69/200 [00:37<01:32,  1.42it/s]

Evaluating:  35%|?둗?둗?둗?둛      | 70/200 [00:38<01:31,  1.42it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 71/200 [00:39<01:31,  1.42it/s]

Evaluating:  36%|?둗?둗?둗?둛      | 72/200 [00:39<01:30,  1.41it/s]

Evaluating:  36%|?둗?둗?둗?둚      | 73/200 [00:40<01:29,  1.41it/s]

Evaluating:  37%|?둗?둗?둗?둚      | 74/200 [00:41<01:29,  1.41it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 75/200 [00:41<01:28,  1.41it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 76/200 [00:42<01:28,  1.41it/s]

Evaluating:  38%|?둗?둗?둗?둙      | 77/200 [00:43<01:27,  1.41it/s]

Evaluating:  39%|?둗?둗?둗?둘      | 78/200 [00:44<01:26,  1.42it/s]

Evaluating:  40%|?둗?둗?둗?둘      | 79/200 [00:44<01:25,  1.41it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 80/200 [00:45<01:24,  1.41it/s]

Evaluating:  40%|?둗?둗?둗?둗      | 81/200 [00:46<01:24,  1.41it/s]

Evaluating:  41%|?둗?둗?둗?둗      | 82/200 [00:46<01:23,  1.41it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 83/200 [00:47<01:22,  1.41it/s]

Evaluating:  42%|?둗?둗?둗?둗?둞     | 84/200 [00:48<01:22,  1.41it/s]

Evaluating:  42%|?둗?둗?둗?둗?둝     | 85/200 [00:49<01:21,  1.41it/s]

Evaluating:  43%|?둗?둗?둗?둗?둝     | 86/200 [00:49<01:20,  1.41it/s]

Evaluating:  44%|?둗?둗?둗?둗?둝     | 87/200 [00:50<01:20,  1.41it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 88/200 [00:51<01:19,  1.41it/s]

Evaluating:  44%|?둗?둗?둗?둗?둜     | 89/200 [00:51<01:18,  1.41it/s]

Evaluating:  45%|?둗?둗?둗?둗?둛     | 90/200 [00:52<01:17,  1.41it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 91/200 [00:53<01:16,  1.42it/s]

Evaluating:  46%|?둗?둗?둗?둗?둛     | 92/200 [00:53<01:16,  1.42it/s]

Evaluating:  46%|?둗?둗?둗?둗?둚     | 93/200 [00:54<01:15,  1.42it/s]

Evaluating:  47%|?둗?둗?둗?둗?둚     | 94/200 [00:55<01:14,  1.42it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 95/200 [00:56<01:14,  1.42it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 96/200 [00:56<01:13,  1.41it/s]

Evaluating:  48%|?둗?둗?둗?둗?둙     | 97/200 [00:57<01:12,  1.42it/s]

Evaluating:  49%|?둗?둗?둗?둗?둘     | 98/200 [00:58<01:12,  1.41it/s]

Evaluating:  50%|?둗?둗?둗?둗?둘     | 99/200 [00:58<01:11,  1.42it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 100/200 [00:59<01:10,  1.42it/s]

Evaluating:  50%|?둗?둗?둗?둗?둗     | 101/200 [01:00<01:10,  1.41it/s]

Evaluating:  51%|?둗?둗?둗?둗?둗     | 102/200 [01:01<01:09,  1.41it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 103/200 [01:01<01:08,  1.41it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둞    | 104/200 [01:02<01:08,  1.41it/s]

Evaluating:  52%|?둗?둗?둗?둗?둗?둝    | 105/200 [01:03<01:07,  1.41it/s]

Evaluating:  53%|?둗?둗?둗?둗?둗?둝    | 106/200 [01:03<01:06,  1.41it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둝    | 107/200 [01:04<01:06,  1.41it/s]

Evaluating:  54%|?둗?둗?둗?둗?둗?둜    | 108/200 [01:05<01:05,  1.41it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둜    | 109/200 [01:06<01:04,  1.41it/s]

Evaluating:  55%|?둗?둗?둗?둗?둗?둛    | 110/200 [01:06<01:03,  1.41it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 111/200 [01:07<01:03,  1.41it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둛    | 112/200 [01:08<01:02,  1.41it/s]

Evaluating:  56%|?둗?둗?둗?둗?둗?둚    | 113/200 [01:08<01:01,  1.41it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둚    | 114/200 [01:09<01:00,  1.41it/s]

Evaluating:  57%|?둗?둗?둗?둗?둗?둙    | 115/200 [01:10<01:00,  1.41it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 116/200 [01:10<00:59,  1.41it/s]

Evaluating:  58%|?둗?둗?둗?둗?둗?둙    | 117/200 [01:11<00:58,  1.41it/s]

Evaluating:  59%|?둗?둗?둗?둗?둗?둘    | 118/200 [01:12<00:58,  1.41it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둘    | 119/200 [01:13<00:57,  1.41it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 120/200 [01:13<00:56,  1.42it/s]

Evaluating:  60%|?둗?둗?둗?둗?둗?둗    | 121/200 [01:14<00:55,  1.42it/s]

Evaluating:  61%|?둗?둗?둗?둗?둗?둗    | 122/200 [01:15<00:55,  1.42it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 123/200 [01:15<00:54,  1.42it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둞   | 124/200 [01:16<00:53,  1.42it/s]

Evaluating:  62%|?둗?둗?둗?둗?둗?둗?둝   | 125/200 [01:17<00:53,  1.41it/s]

Evaluating:  63%|?둗?둗?둗?둗?둗?둗?둝   | 126/200 [01:18<00:52,  1.42it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둝   | 127/200 [01:18<00:51,  1.42it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 128/200 [01:19<00:50,  1.41it/s]

Evaluating:  64%|?둗?둗?둗?둗?둗?둗?둜   | 129/200 [01:20<00:50,  1.42it/s]

Evaluating:  65%|?둗?둗?둗?둗?둗?둗?둛   | 130/200 [01:20<00:49,  1.41it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 131/200 [01:21<00:48,  1.41it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둛   | 132/200 [01:22<00:48,  1.41it/s]

Evaluating:  66%|?둗?둗?둗?둗?둗?둗?둚   | 133/200 [01:23<00:47,  1.41it/s]

Evaluating:  67%|?둗?둗?둗?둗?둗?둗?둚   | 134/200 [01:23<00:46,  1.41it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 135/200 [01:24<00:45,  1.42it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 136/200 [01:25<00:45,  1.42it/s]

Evaluating:  68%|?둗?둗?둗?둗?둗?둗?둙   | 137/200 [01:25<00:44,  1.42it/s]

Evaluating:  69%|?둗?둗?둗?둗?둗?둗?둘   | 138/200 [01:26<00:43,  1.42it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둘   | 139/200 [01:27<00:42,  1.42it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 140/200 [01:27<00:42,  1.42it/s]

Evaluating:  70%|?둗?둗?둗?둗?둗?둗?둗   | 141/200 [01:28<00:41,  1.42it/s]

Evaluating:  71%|?둗?둗?둗?둗?둗?둗?둗   | 142/200 [01:29<00:41,  1.41it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 143/200 [01:30<00:40,  1.42it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둞  | 144/200 [01:30<00:39,  1.42it/s]

Evaluating:  72%|?둗?둗?둗?둗?둗?둗?둗?둝  | 145/200 [01:31<00:38,  1.41it/s]

Evaluating:  73%|?둗?둗?둗?둗?둗?둗?둗?둝  | 146/200 [01:32<00:38,  1.41it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둝  | 147/200 [01:32<00:37,  1.41it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 148/200 [01:33<00:36,  1.41it/s]

Evaluating:  74%|?둗?둗?둗?둗?둗?둗?둗?둜  | 149/200 [01:34<00:36,  1.41it/s]

Evaluating:  75%|?둗?둗?둗?둗?둗?둗?둗?둛  | 150/200 [01:35<00:35,  1.41it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 151/200 [01:35<00:34,  1.41it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둛  | 152/200 [01:36<00:33,  1.41it/s]

Evaluating:  76%|?둗?둗?둗?둗?둗?둗?둗?둚  | 153/200 [01:37<00:33,  1.42it/s]

Evaluating:  77%|?둗?둗?둗?둗?둗?둗?둗?둚  | 154/200 [01:37<00:32,  1.42it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 155/200 [01:38<00:31,  1.41it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 156/200 [01:39<00:31,  1.41it/s]

Evaluating:  78%|?둗?둗?둗?둗?둗?둗?둗?둙  | 157/200 [01:39<00:30,  1.41it/s]

Evaluating:  79%|?둗?둗?둗?둗?둗?둗?둗?둘  | 158/200 [01:40<00:29,  1.41it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둘  | 159/200 [01:41<00:29,  1.41it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 160/200 [01:42<00:28,  1.41it/s]

Evaluating:  80%|?둗?둗?둗?둗?둗?둗?둗?둗  | 161/200 [01:42<00:27,  1.41it/s]

Evaluating:  81%|?둗?둗?둗?둗?둗?둗?둗?둗  | 162/200 [01:43<00:26,  1.41it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 163/200 [01:44<00:26,  1.41it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둞 | 164/200 [01:44<00:25,  1.41it/s]

Evaluating:  82%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 165/200 [01:45<00:24,  1.41it/s]

Evaluating:  83%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 166/200 [01:46<00:24,  1.41it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둝 | 167/200 [01:47<00:23,  1.41it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 168/200 [01:47<00:22,  1.41it/s]

Evaluating:  84%|?둗?둗?둗?둗?둗?둗?둗?둗?둜 | 169/200 [01:48<00:21,  1.41it/s]

Evaluating:  85%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 170/200 [01:49<00:21,  1.41it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 171/200 [01:49<00:20,  1.41it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둛 | 172/200 [01:50<00:19,  1.42it/s]

Evaluating:  86%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 173/200 [01:51<00:19,  1.42it/s]

Evaluating:  87%|?둗?둗?둗?둗?둗?둗?둗?둗?둚 | 174/200 [01:51<00:18,  1.42it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 175/200 [01:52<00:17,  1.42it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 176/200 [01:53<00:16,  1.42it/s]

Evaluating:  88%|?둗?둗?둗?둗?둗?둗?둗?둗?둙 | 177/200 [01:54<00:16,  1.42it/s]

Evaluating:  89%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 178/200 [01:54<00:15,  1.42it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둘 | 179/200 [01:55<00:14,  1.42it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 180/200 [01:56<00:14,  1.42it/s]

Evaluating:  90%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 181/200 [01:56<00:13,  1.42it/s]

Evaluating:  91%|?둗?둗?둗?둗?둗?둗?둗?둗?둗 | 182/200 [01:57<00:12,  1.42it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 183/200 [01:58<00:11,  1.42it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둞| 184/200 [01:59<00:11,  1.42it/s]

Evaluating:  92%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 185/200 [01:59<00:10,  1.42it/s]

Evaluating:  93%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 186/200 [02:00<00:09,  1.42it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둝| 187/200 [02:01<00:09,  1.42it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 188/200 [02:01<00:08,  1.42it/s]

Evaluating:  94%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둜| 189/200 [02:02<00:07,  1.42it/s]

Evaluating:  95%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 190/200 [02:03<00:07,  1.42it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 191/200 [02:03<00:06,  1.42it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둛| 192/200 [02:04<00:05,  1.42it/s]

Evaluating:  96%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 193/200 [02:05<00:04,  1.42it/s]

Evaluating:  97%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둚| 194/200 [02:06<00:04,  1.42it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 195/200 [02:06<00:03,  1.41it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 196/200 [02:07<00:02,  1.41it/s]

Evaluating:  98%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둙| 197/200 [02:08<00:02,  1.41it/s]

Evaluating:  99%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 198/200 [02:08<00:01,  1.42it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둘| 199/200 [02:09<00:00,  1.42it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:10<00:00,  1.47it/s]

Evaluating: 100%|?둗?둗?둗?둗?둗?둗?둗?둗?둗?둗| 200/200 [02:10<00:00,  1.54it/s]




Loss: 0.9448




Precision: 0.7786, Recall: 0.7836, F1-Score: 0.7770




              precision    recall  f1-score   support

           0       0.76      0.66      0.71       797
           1       0.85      0.71      0.78       775
           2       0.88      0.87      0.87       795
           3       0.87      0.83      0.85      1110
           4       0.86      0.80      0.83      1260
           5       0.89      0.68      0.77       882
           6       0.85      0.80      0.82       940
           7       0.48      0.60      0.53       473
           8       0.66      0.85      0.74       746
           9       0.60      0.73      0.66       689
          10       0.76      0.78      0.77       670
          11       0.64      0.80      0.71       312
          12       0.72      0.81      0.76       665
          13       0.84      0.85      0.84       314
          14       0.85      0.78      0.81       756
          15       0.97      0.98      0.97      1607

    accuracy                           0.80     12791
   macro avg       0.78   




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




adding eps to diagonal and taking inverse




taking square root




dot products...




trying to take final svd




computed everything!




CCA coefficients mean concern: (0.9205710611067252, 0.9205710611067252)




CCA coefficients mean non-concern: (0.9157052931702654, 0.9157052931702654)




Linear CKA concern: 0.9871949644456689




Linear CKA non-concern: 0.9852018812362567




Kernel CKA concern: 0.9851944431117843




Kernel CKA non-concern: 0.9857268944003947


