In [7]:
# necessary libs to run deepod and pyod
!pip install pyod
!pip install deepod
!pip install numpy==1.24.4 --force-reinstall

Collecting numpy==1.24.4
  Using cached numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Using cached numpy-1.24.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 1.24.4
    Uninstalling numpy-1.24.4:
      Successfully uninstalled numpy-1.24.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jax 0.5.2 requires numpy>=1.25, but you have numpy 1.24.4 which is incompatible.
torchvision 0.21.0+cu124 requires torch==2.6.0, but you have torch 1.13.0 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.24.4 which is incompatible.
treescope 0.1.9 requires numpy>=1.25.2, but you have numpy 1.24.4 which is incompatible.
pymc 5.22.0 requires numpy>=1.25.0, but you have 

Collecting pyod
  Using cached pyod-2.0.5-py3-none-any.whl.metadata (46 kB)
Using cached pyod-2.0.5-py3-none-any.whl (200 kB)
Installing collected packages: pyod
Successfully installed pyod-2.0.5


In [2]:
running='colab'

In [3]:
import os

In [6]:
if running=='local':
    os.chdir(os.path.dirname(os.getcwd()))
    project_path = os.getcwd()
elif running=='colab':
    from google.colab import drive
    drive.mount('/content/drive')
    project_path_colab = "/content/drive/MyDrive/Projeto ML/2025/AD/second_setup"
    repo_name = "adaptative-text-anomaly-detection"
    project_path = os.path.join(project_path_colab, repo_name)
    os.chdir(project_path)
else:
    raise ValueError("Invalid running environment. Choose 'local' or 'colab'.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
import os
import sys
import pandas as pd
sys.path.append(r'../src')
sys.path.append(r'./src')

import time
import os
from tqdm import tqdm
import os

import pandas as pd

In [8]:
from pipeline.anomaly_detection import label_normal_vs_anomaly,adjust_contamination, pretty_print_data_info, split_data
from pipeline.benchmark_runner import benchmark_unsupervised_models, benchmark_semisupervised_models
from models.MLP import MLP

In [9]:
from deepod.models import DeepSAD, DeepSVDD, DevNet
from pyod.models.xgbod import XGBOD
from pyod.models.lof import LOF
from pyod.models.iforest import IForest
from pyod.models.xgbod import XGBOD
from pyod.models.ocsvm import OCSVM
from pyod.models.auto_encoder import AutoEncoder
from pyod.models.vae import VAE
from pyod.models.pca import PCA
from pyod.models.kde import KDE
from pyod.models.hbos import HBOS
from sklearn.svm import OneClassSVM
from sklearn.neural_network import MLPClassifier

In [None]:
config = {
    "en": {
        "datasets": [
            "tweets-hate-speech-detection/tweets_hate_speech_detection",
            "SetFit/20_newsgroups",
            "cardiffnlp/tweet_eval"
        ],
        "encoders": [
            "sentence-transformers/distiluse-base-multilingual-cased-v1",
            "sentence-transformers/distiluse-base-multilingual-cased-v2",
            "FacebookAI/xlm-roberta-large"
        ]
    },
    "pt": {
        "datasets": [
            "JAugusto97/told-br",
            "wikinews",
            "augustop/portuguese-tweets-for-sentiment-analysis"
        ],
        "encoders": [
            "sentence-transformers/distiluse-base-multilingual-cased-v1",
            "sentence-transformers/distiluse-base-multilingual-cased-v2",
            "FacebookAI/xlm-roberta-large",
            "neuralmind/bert-base-portuguese-cased",
            "neuralmind/bert-large-portuguese-cased",
            "PORTULAN/serafim-100m-portuguese-pt-sentence-encoder-ir"
        ]
    }
}

model_groups = {
    "semi": {
        "models": {
            "DevNet": lambda: DevNet(),
            "DeepSAD": lambda: DeepSAD(epochs=100, rep_dim=128, device='cuda'),
            "XGBOD": lambda: XGBOD(estimator_list=[LOF(), IForest()]),
            "MLP": lambda: MLP()
        },
        "benchmark_fn": benchmark_semisupervised_models,
        "extra_args": {
            "contamination_levels": [0.05],
            "n_rounds": 1
        },
        "wrap_model": lambda name, fn: {name: (fn, False)}  # unpack into (model_fn, uncertainty)
    },
    "unsupervised": {
        "models": {
            "IForest": lambda: IForest(),
            "LOF": lambda: LOF(),
            "DeepSVDD": lambda: DeepSVDD(epochs=100, rep_dim=128),
            "OCSVM": lambda: OCSVM(kernel='rbf', nu=0.05, gamma='scale'),
            "OneClassSVM": lambda: OneClassSVM(kernel='rbf', nu=0.05, gamma='scale'),
            "AutoEncoder": lambda: AutoEncoder(),
            "VAE": lambda: VAE(),
            "HBOS": lambda: HBOS()
        },
        "benchmark_fn": benchmark_unsupervised_models,
        "extra_args": {},
        "wrap_model": lambda name, fn: {name: fn}  # no need to wrap with (fn, uncertainty)
    }
}

In [19]:
total_combinations = sum(
    len(cfg["datasets"]) * len(cfg["encoders"])
    for cfg in config.values()
)

with tqdm(total=total_combinations, desc="Benchmarking dataset/encoder pairs") as pbar:
    for lang, lang_config in config.items():
        for dataset_name in lang_config["datasets"]:
            dataset_short = dataset_name.split("/")[-1]

            for encoder_name in lang_config["encoders"]:
                encoder_short = encoder_name.split("/")[-1]
                combo_name = f"{dataset_short}_{encoder_short}"

                try:
                    print(f"\nProcessing: {combo_name}")
                    texts_df = pd.read_parquet(os.path.join(project_path, f"data/texts_{dataset_short}.parquet"))
                    labels_df = pd.read_parquet(os.path.join(project_path, f"data/labels_{dataset_short}.parquet"))
                    embeddings_df = pd.read_parquet(os.path.join(project_path, f"data/embeddings_{dataset_short}_{encoder_short}.parquet"))

                    labeled_anomalies_df = label_normal_vs_anomaly(labels_df, as_df=True)

                    texts, labels, embeddings = adjust_contamination(
                        texts=texts_df.squeeze().tolist(),
                        labels=labeled_anomalies_df.squeeze().values,
                        embeddings=embeddings_df.values,
                        perc_anomalous=0.05
                    )

                    pretty_print_data_info(texts, labels, embeddings)
                    x_train, x_test, y_train, y_test = split_data(embeddings, labels, random_state=42)

                    # Loop through model groups (semi + unsupervised)
                    for group_name, group_info in model_groups.items():
                        models = group_info["models"]
                        benchmark_fn = group_info["benchmark_fn"]
                        extra_args = group_info["extra_args"]
                        wrap_model = group_info["wrap_model"]

                        for model_name, model_fn in models.items():
                            print(f"Running {group_name} model: {model_name} on {combo_name}")
                            start_time = time.time()

                            benchmark_fn(
                                x_train, y_train, x_test, y_test,
                                model_constructor=wrap_model(model_name, model_fn),
                                dataset_name=combo_name,
                                **extra_args
                            )

                            print(f"✅ Finished {model_name} in {time.time() - start_time:.2f} seconds")

                except Exception as e:
                    print(f"[❌ ERROR] Failed processing {combo_name}: {e}")

                pbar.update(1)


Benchmarking dataset/encoder pairs:   0%|          | 0/3 [00:00<?, ?it/s]


Processing: tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
✅ Selected normal label (inlier → 0): 0
⚠️  Anomaly labels (outlier → 1): [-1, 1]
Shape of embeddings: (31206, 512)
Shape of labels: (31206,)
Number of anomalies: 1486
Number of normal: 29720
Percentage of anomalies: 0.047619047619047616
Shape of texts: 31206
Train set: (24964, 512) (with 1189 outliers and 23775 inliers)
Test  set: (6242, 512) (with 297 outliers and 5945 inliers)
Running semi model: DevNet on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running DevNet (uncertainty_model=False)
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=512, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=1, bias=Fa


testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  40%|████      | 157/391 [00:00<00:00, 1564.09it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1500.57it/s]

testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  32%|███▏      | 125/391 [00:00<00:00, 1245.53it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1335.84it/s]

testing: 100%|██████████| 98/98 [00:00<00:00, 1528.56it/s]


✅ Finished DevNet in 104.47 seconds
Running semi model: DeepSAD on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running DeepSAD (uncertainty_model=False)
Start Training...
ensemble size: 1
training data counter: Counter({0: 23775, -1: 1189})
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=512, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.854045, time: 0.9s
epoch 10, training loss: 0.307587, time: 0.9s
epoch 20, training loss: 0.184367, time: 0.9s
epoch 30, training loss: 0.265096, time: 0.9s
epoch 40, training loss: 0.160461, time: 0.9s
epoch 50, training loss: 0.103714, time: 0.9s
epoch 60, training loss: 0.204419, time: 1.1s



testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  39%|███▉      | 153/391 [00:00<00:00, 1525.32it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1460.64it/s]

testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  31%|███▏      | 123/391 [00:00<00:00, 1225.71it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1369.03it/s]

testing: 100%|██████████| 98/98 [00:00<00:00, 1254.16it/s]


✅ Finished DeepSAD in 95.50 seconds
Running semi model: XGBOD on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running XGBOD (uncertainty_model=False)


Parameters: { "silent" } are not used.



✅ Finished XGBOD in 54.02 seconds
Running semi model: MLP on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running MLP (uncertainty_model=False)




✅ Finished MLP in 67.52 seconds
Running unsupervised model: IForest on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running IForest [unsupervised]




✅ Finished IForest in 1.14 seconds
Running unsupervised model: LOF on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running LOF [unsupervised]




✅ Finished LOF in 41.61 seconds
Running unsupervised model: DeepSVDD on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running DeepSVDD [unsupervised]
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=512, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.046177, time: 0.8s
epoch 10, training loss: 0.001147, time: 0.8s
epoch 20, training loss: 0.000806, time: 0.8s
epoch 30, training loss: 0.000752, time: 1.0s
epoch 40, training loss: 0.000652, time: 0.7s
epoch 50, training loss: 0.000656, time: 0.7s
epoch 60, training loss: 0.000633, time: 0.7s
epoch 70, training loss: 0.000531, time: 0.7s
epoch 80, tr


testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  31%|███       | 120/391 [00:00<00:00, 1194.00it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1328.52it/s]

testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  38%|███▊      | 150/391 [00:00<00:00, 1496.19it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1455.63it/s]

testing: 100%|██████████| 98/98 [00:00<00:00, 1440.78it/s]


✅ Finished DeepSVDD in 77.76 seconds
Running unsupervised model: OCSVM on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running OCSVM [unsupervised]




✅ Finished OCSVM in 55.40 seconds
Running unsupervised model: OneClassSVM on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running OneClassSVM [unsupervised]




✅ Finished OneClassSVM in 42.35 seconds
Running unsupervised model: AutoEncoder on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running AutoEncoder [unsupervised]



Training:   0%|          | 0/10 [00:00<?, ?it/s][A
Training:  10%|█         | 1/10 [00:02<00:26,  2.90s/it][A
Training:  20%|██        | 2/10 [00:05<00:23,  2.89s/it][A
Training:  30%|███       | 3/10 [00:08<00:20,  2.89s/it][A
Training:  40%|████      | 4/10 [00:12<00:19,  3.18s/it][A
Training:  50%|█████     | 5/10 [00:15<00:15,  3.07s/it][A
Training:  60%|██████    | 6/10 [00:18<00:12,  3.01s/it][A
Training:  70%|███████   | 7/10 [00:20<00:08,  2.97s/it][A
Training:  80%|████████  | 8/10 [00:24<00:06,  3.09s/it][A
Training:  90%|█████████ | 9/10 [00:27<00:03,  3.11s/it][A
Training: 100%|██████████| 10/10 [00:30<00:00,  3.03s/it]


✅ Finished AutoEncoder in 35.05 seconds
Running unsupervised model: VAE on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running VAE [unsupervised]



Training:   0%|          | 0/30 [00:00<?, ?it/s][A
Training:   3%|▎         | 1/30 [00:05<02:37,  5.43s/it][A
Training:   7%|▋         | 2/30 [00:10<02:18,  4.95s/it][A
Training:  10%|█         | 3/30 [00:14<02:11,  4.88s/it][A
Training:  13%|█▎        | 4/30 [00:19<02:09,  4.96s/it][A
Training:  17%|█▋        | 5/30 [00:24<01:59,  4.76s/it][A
Training:  20%|██        | 6/30 [00:29<01:56,  4.83s/it][A
Training:  23%|██▎       | 7/30 [00:34<01:50,  4.81s/it][A
Training:  27%|██▋       | 8/30 [00:38<01:43,  4.69s/it][A
Training:  30%|███       | 9/30 [00:43<01:42,  4.88s/it][A
Training:  33%|███▎      | 10/30 [00:48<01:34,  4.74s/it][A
Training:  37%|███▋      | 11/30 [00:52<01:28,  4.64s/it][A
Training:  40%|████      | 12/30 [00:57<01:27,  4.85s/it][A
Training:  43%|████▎     | 13/30 [01:02<01:20,  4.72s/it][A
Training:  47%|████▋     | 14/30 [01:06<01:13,  4.62s/it][A
Training:  50%|█████     | 15/30 [01:12<01:12,  4.83s/it][A
Training:  53%|█████▎    | 16/30 [01:16<0

✅ Finished VAE in 145.49 seconds
Running unsupervised model: HBOS on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v1
→ Running HBOS [unsupervised]


Benchmarking dataset/encoder pairs:  33%|███▎      | 1/3 [12:54<25:48, 774.48s/it]

✅ Finished HBOS in 6.68 seconds

Processing: tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
✅ Selected normal label (inlier → 0): 0
⚠️  Anomaly labels (outlier → 1): [-1, 1]
Shape of embeddings: (31206, 512)
Shape of labels: (31206,)
Number of anomalies: 1486
Number of normal: 29720
Percentage of anomalies: 0.047619047619047616
Shape of texts: 31206
Train set: (24964, 512) (with 1189 outliers and 23775 inliers)
Test  set: (6242, 512) (with 297 outliers and 5945 inliers)
Running semi model: DevNet on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running DevNet (uncertainty_model=False)
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=512, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_feat


testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  40%|████      | 157/391 [00:00<00:00, 1569.86it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1527.23it/s]

testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  40%|███▉      | 155/391 [00:00<00:00, 1545.80it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1400.77it/s]

testing: 100%|██████████| 98/98 [00:00<00:00, 1532.31it/s]


✅ Finished DevNet in 104.68 seconds
Running semi model: DeepSAD on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running DeepSAD (uncertainty_model=False)
Start Training...
ensemble size: 1
training data counter: Counter({0: 23775, -1: 1189})
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=512, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.871954, time: 1.0s
epoch 10, training loss: 0.202095, time: 0.9s
epoch 20, training loss: 0.147886, time: 0.9s
epoch 30, training loss: 0.110564, time: 1.2s
epoch 40, training loss: 0.094747, time: 0.9s
epoch 50, training loss: 0.136676, time: 1.0s
epoch 60, training loss: 0.092802, time: 0.9s



testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  37%|███▋      | 145/391 [00:00<00:00, 1441.79it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1424.50it/s]

testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  36%|███▌      | 140/391 [00:00<00:00, 1391.39it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1325.46it/s]

testing: 100%|██████████| 98/98 [00:00<00:00, 1381.95it/s]


✅ Finished DeepSAD in 100.95 seconds
Running semi model: XGBOD on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running XGBOD (uncertainty_model=False)


Parameters: { "silent" } are not used.



✅ Finished XGBOD in 52.86 seconds
Running semi model: MLP on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running MLP (uncertainty_model=False)




✅ Finished MLP in 72.21 seconds
Running unsupervised model: IForest on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running IForest [unsupervised]




✅ Finished IForest in 1.10 seconds
Running unsupervised model: LOF on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running LOF [unsupervised]




✅ Finished LOF in 41.15 seconds
Running unsupervised model: DeepSVDD on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running DeepSVDD [unsupervised]
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=512, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.046767, time: 0.8s
epoch 10, training loss: 0.001636, time: 0.7s
epoch 20, training loss: 0.001078, time: 0.8s
epoch 30, training loss: 0.001007, time: 0.9s
epoch 40, training loss: 0.000918, time: 0.7s
epoch 50, training loss: 0.000842, time: 0.7s
epoch 60, training loss: 0.000762, time: 0.7s
epoch 70, training loss: 0.000790, time: 0.7s
epoch 80, tr


testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  38%|███▊      | 150/391 [00:00<00:00, 1498.82it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1405.84it/s]

testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  36%|███▋      | 142/391 [00:00<00:00, 1412.44it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1370.30it/s]

testing: 100%|██████████| 98/98 [00:00<00:00, 1417.71it/s]


✅ Finished DeepSVDD in 79.92 seconds
Running unsupervised model: OCSVM on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running OCSVM [unsupervised]




✅ Finished OCSVM in 55.62 seconds
Running unsupervised model: OneClassSVM on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running OneClassSVM [unsupervised]




✅ Finished OneClassSVM in 42.28 seconds
Running unsupervised model: AutoEncoder on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running AutoEncoder [unsupervised]



Training:   0%|          | 0/10 [00:00<?, ?it/s][A
Training:  10%|█         | 1/10 [00:02<00:26,  2.99s/it][A
Training:  20%|██        | 2/10 [00:05<00:23,  2.99s/it][A
Training:  30%|███       | 3/10 [00:08<00:20,  2.98s/it][A
Training:  40%|████      | 4/10 [00:12<00:19,  3.29s/it][A
Training:  50%|█████     | 5/10 [00:15<00:15,  3.18s/it][A
Training:  60%|██████    | 6/10 [00:18<00:12,  3.11s/it][A
Training:  70%|███████   | 7/10 [00:21<00:09,  3.07s/it][A
Training:  80%|████████  | 8/10 [00:25<00:06,  3.27s/it][A
Training:  90%|█████████ | 9/10 [00:28<00:03,  3.21s/it][A
Training: 100%|██████████| 10/10 [00:31<00:00,  3.14s/it]


✅ Finished AutoEncoder in 33.86 seconds
Running unsupervised model: VAE on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running VAE [unsupervised]



Training:   0%|          | 0/30 [00:00<?, ?it/s][A
Training:   3%|▎         | 1/30 [00:05<02:31,  5.23s/it][A
Training:   7%|▋         | 2/30 [00:09<02:12,  4.74s/it][A
Training:  10%|█         | 3/30 [00:13<02:03,  4.57s/it][A
Training:  13%|█▎        | 4/30 [00:19<02:05,  4.83s/it][A
Training:  17%|█▋        | 5/30 [00:23<01:56,  4.66s/it][A
Training:  20%|██        | 6/30 [00:27<01:49,  4.57s/it][A
Training:  23%|██▎       | 7/30 [00:33<01:49,  4.78s/it][A
Training:  27%|██▋       | 8/30 [00:37<01:42,  4.64s/it][A
Training:  30%|███       | 9/30 [00:42<01:36,  4.61s/it][A
Training:  33%|███▎      | 10/30 [00:47<01:35,  4.77s/it][A
Training:  37%|███▋      | 11/30 [00:51<01:28,  4.66s/it][A
Training:  40%|████      | 12/30 [00:56<01:24,  4.72s/it][A
Training:  43%|████▎     | 13/30 [01:01<01:20,  4.74s/it][A
Training:  47%|████▋     | 14/30 [01:05<01:14,  4.63s/it][A
Training:  50%|█████     | 15/30 [01:10<01:11,  4.74s/it][A
Training:  53%|█████▎    | 16/30 [01:15<0

✅ Finished VAE in 143.78 seconds
Running unsupervised model: HBOS on tweets_hate_speech_detection_distiluse-base-multilingual-cased-v2
→ Running HBOS [unsupervised]


Benchmarking dataset/encoder pairs:  67%|██████▋   | 2/3 [25:14<12:34, 754.03s/it]

✅ Finished HBOS in 3.08 seconds

Processing: tweets_hate_speech_detection_xlm-roberta-large
✅ Selected normal label (inlier → 0): 0
⚠️  Anomaly labels (outlier → 1): [-1, 1]
Shape of embeddings: (31206, 1024)
Shape of labels: (31206,)
Number of anomalies: 1486
Number of normal: 29720
Percentage of anomalies: 0.047619047619047616
Shape of texts: 31206
Train set: (24964, 1024) (with 1189 outliers and 23775 inliers)
Test  set: (6242, 1024) (with 297 outliers and 5945 inliers)
Running semi model: DevNet on tweets_hate_speech_detection_xlm-roberta-large
→ Running DevNet (uncertainty_model=False)
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=1024, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=1, bias=Fals


testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  38%|███▊      | 147/391 [00:00<00:00, 1465.91it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1403.64it/s]

testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  37%|███▋      | 145/391 [00:00<00:00, 1440.32it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 1306.45it/s]

testing: 100%|██████████| 98/98 [00:00<00:00, 1428.49it/s]


✅ Finished DevNet in 106.42 seconds
Running semi model: DeepSAD on tweets_hate_speech_detection_xlm-roberta-large
→ Running DeepSAD (uncertainty_model=False)
Start Training...
ensemble size: 1
training data counter: Counter({0: 23775, -1: 1189})
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=1024, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.982176, time: 0.9s
epoch 10, training loss: 0.479647, time: 1.0s
epoch 20, training loss: 0.571948, time: 1.1s
epoch 30, training loss: 0.598953, time: 0.9s
epoch 40, training loss: 1.154663, time: 0.9s
epoch 50, training loss: 0.508224, time: 0.9s
epoch 60, training loss: 0.903054, time: 1.2s
epoch 70, training


testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  26%|██▌       | 100/391 [00:00<00:00, 995.98it/s][A
testing:  52%|█████▏    | 204/391 [00:00<00:00, 1019.66it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 964.02it/s]

testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  22%|██▏       | 86/391 [00:00<00:00, 859.22it/s][A
testing:  44%|████▍     | 172/391 [00:00<00:00, 845.68it/s][A
testing:  69%|██████▉   | 270/391 [00:00<00:00, 903.06it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 873.81it/s]

testing:   0%|          | 0/98 [00:00<?, ?it/s][A
testing: 100%|██████████| 98/98 [00:00<00:00, 826.73it/s]


✅ Finished DeepSAD in 102.45 seconds
Running semi model: XGBOD on tweets_hate_speech_detection_xlm-roberta-large
→ Running XGBOD (uncertainty_model=False)


Parameters: { "silent" } are not used.



✅ Finished XGBOD in 97.75 seconds
Running semi model: MLP on tweets_hate_speech_detection_xlm-roberta-large
→ Running MLP (uncertainty_model=False)




✅ Finished MLP in 315.85 seconds
Running unsupervised model: IForest on tweets_hate_speech_detection_xlm-roberta-large
→ Running IForest [unsupervised]




✅ Finished IForest in 1.38 seconds
Running unsupervised model: LOF on tweets_hate_speech_detection_xlm-roberta-large
→ Running LOF [unsupervised]




✅ Finished LOF in 76.86 seconds
Running unsupervised model: DeepSVDD on tweets_hate_speech_detection_xlm-roberta-large
→ Running DeepSVDD [unsupervised]
Start Training...
ensemble size: 1
MLPnet(
  (network): Sequential(
    (0): LinearBlock(
      (linear): Linear(in_features=1024, out_features=100, bias=False)
      (act_layer): ReLU()
    )
    (1): LinearBlock(
      (linear): Linear(in_features=100, out_features=50, bias=False)
      (act_layer): ReLU()
    )
    (2): LinearBlock(
      (linear): Linear(in_features=50, out_features=128, bias=False)
      (act_layer): Identity()
    )
  )
)
epoch  1, training loss: 0.003749, time: 0.8s
epoch 10, training loss: 0.000119, time: 0.8s
epoch 20, training loss: 0.000095, time: 1.0s
epoch 30, training loss: 0.000043, time: 0.8s
epoch 40, training loss: 0.000037, time: 0.8s
epoch 50, training loss: 0.000043, time: 0.8s
epoch 60, training loss: 0.000049, time: 0.8s
epoch 70, training loss: 0.000032, time: 0.9s
epoch 80, training loss: 0.000


testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  23%|██▎       | 88/391 [00:00<00:00, 873.00it/s][A
testing:  46%|████▌     | 180/391 [00:00<00:00, 899.58it/s][A
testing:  70%|██████▉   | 272/391 [00:00<00:00, 905.54it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 889.92it/s]

testing:   0%|          | 0/391 [00:00<?, ?it/s][A
testing:  26%|██▌       | 101/391 [00:00<00:00, 1007.90it/s][A
testing:  52%|█████▏    | 202/391 [00:00<00:00, 926.45it/s] [A
testing:  76%|███████▌  | 296/391 [00:00<00:00, 910.94it/s][A
testing: 100%|██████████| 391/391 [00:00<00:00, 890.14it/s]

testing:   0%|          | 0/98 [00:00<?, ?it/s][A
testing: 100%|██████████| 98/98 [00:00<00:00, 799.45it/s]


✅ Finished DeepSVDD in 83.98 seconds
Running unsupervised model: OCSVM on tweets_hate_speech_detection_xlm-roberta-large
→ Running OCSVM [unsupervised]




✅ Finished OCSVM in 106.46 seconds
Running unsupervised model: OneClassSVM on tweets_hate_speech_detection_xlm-roberta-large
→ Running OneClassSVM [unsupervised]




✅ Finished OneClassSVM in 81.22 seconds
Running unsupervised model: AutoEncoder on tweets_hate_speech_detection_xlm-roberta-large
→ Running AutoEncoder [unsupervised]



Training:   0%|          | 0/10 [00:00<?, ?it/s][A
Training:  10%|█         | 1/10 [00:03<00:31,  3.46s/it][A
Training:  20%|██        | 2/10 [00:06<00:27,  3.47s/it][A
Training:  30%|███       | 3/10 [00:10<00:25,  3.68s/it][A
Training:  40%|████      | 4/10 [00:14<00:22,  3.74s/it][A
Training:  50%|█████     | 5/10 [00:18<00:18,  3.62s/it][A
Training:  60%|██████    | 6/10 [00:21<00:14,  3.56s/it][A
Training:  70%|███████   | 7/10 [00:25<00:11,  3.80s/it][A
Training:  80%|████████  | 8/10 [00:29<00:07,  3.66s/it][A
Training:  90%|█████████ | 9/10 [00:32<00:03,  3.57s/it][A
Training: 100%|██████████| 10/10 [00:36<00:00,  3.61s/it]


✅ Finished AutoEncoder in 40.00 seconds
Running unsupervised model: VAE on tweets_hate_speech_detection_xlm-roberta-large
→ Running VAE [unsupervised]



Training:   0%|          | 0/30 [00:00<?, ?it/s][A
Training:   3%|▎         | 1/30 [00:04<02:17,  4.73s/it][A
Training:   7%|▋         | 2/30 [00:09<02:12,  4.73s/it][A
Training:  10%|█         | 3/30 [00:15<02:19,  5.17s/it][A
Training:  13%|█▎        | 4/30 [00:19<02:09,  4.98s/it][A
Training:  17%|█▋        | 5/30 [00:24<02:05,  5.01s/it][A
Training:  20%|██        | 6/30 [00:30<02:01,  5.06s/it][A
Training:  23%|██▎       | 7/30 [00:34<01:53,  4.93s/it][A
Training:  27%|██▋       | 8/30 [00:40<01:52,  5.12s/it][A
Training:  30%|███       | 9/30 [00:44<01:43,  4.95s/it][A
Training:  33%|███▎      | 10/30 [00:49<01:37,  4.85s/it][A
Training:  37%|███▋      | 11/30 [00:54<01:36,  5.06s/it][A
Training:  40%|████      | 12/30 [00:59<01:28,  4.94s/it][A
Training:  43%|████▎     | 13/30 [01:04<01:23,  4.91s/it][A
Training:  47%|████▋     | 14/30 [01:09<01:20,  5.02s/it][A
Training:  50%|█████     | 15/30 [01:14<01:13,  4.92s/it][A
Training:  53%|█████▎    | 16/30 [01:19<0

✅ Finished VAE in 152.82 seconds
Running unsupervised model: HBOS on tweets_hate_speech_detection_xlm-roberta-large
→ Running HBOS [unsupervised]


Benchmarking dataset/encoder pairs: 100%|██████████| 3/3 [44:52<00:00, 897.35s/it]

✅ Finished HBOS in 6.36 seconds



