In [2]:
import adbench
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os

# Load data

In [3]:
basepath = "./data"

train_csvs = [
    # "memory_limit_merge_label",
    # "io_saturation_merge_ob_2024-03-0706_11_36UTC_label",
    # "io_saturation_merge_ob_2024-03-0717_44_47UTC_label",
    # "memory_limit_merge_ob_2024-03-0508_09_42UTC_label",
    "memory_limit_merge_ob_2024-03-0508_56_01UTC_label",
    "memory_limit_merge_ob_2024-03-0509_42_19UTC_label",
    # "memory_limit_merge_ob_2024-03-0510_28_34UTC_label",
    # "memory_limit_merge_ob_2024-03-0511_14_49UTC_label",
    # "memory_limit_4_16_merge_ob_2024-03-1016_04_40UTC_label",
    # "memory_limit_4_16_merge_ob_2024-03-1017_05_55UTC_label",
]

test_csvs = [
    # "memory_limit_merge_label",
    "io_saturation_merge_ob_2024-03-0706_11_36UTC_label",
    "io_saturation_merge_ob_2024-03-0717_44_47UTC_label",
    # "memory_limit_merge_ob_2024-03-0508_09_42UTC_label",
    # "memory_limit_merge_ob_2024-03-0508_56_01UTC_label",
    # "memory_limit_merge_ob_2024-03-0509_42_19UTC_label",
    "memory_limit_merge_ob_2024-03-0510_28_34UTC_label",
    "memory_limit_merge_ob_2024-03-0511_14_49UTC_label",
    "memory_limit_4_16_merge_ob_2024-03-1016_04_40UTC_label",
    "memory_limit_4_16_merge_ob_2024-03-1017_05_55UTC_label",
]

train_tasks = [
    # "io_saturation_merge",
    "memory_limit_merge",
    # "memory_limit_4_16_merge",
]

test_tasks = [
    "io_saturation_merge",
    "memory_limit_merge",
    "memory_limit_4_16_merge",
]

dfs_train = [[] for _ in range(len(train_tasks))]
dfs_test = [[] for _ in range(len(test_tasks))]

f = lambda x: os.path.join(basepath, x + ".csv")
for csv in train_csvs:
    df = pd.read_csv(f(csv))
    for i, task in enumerate(train_tasks):
        if task in csv:
            dfs_train[i].append(df)
            break

for csv in test_csvs:
    df = pd.read_csv(f(csv))
    for i, t in enumerate(test_tasks):
        if t in csv:
            dfs_test[i].append(df)
            break

dfs_train_all = [pd.concat(dfs, axis=0) for dfs in dfs_train]
dfs_test_all = [pd.concat(dfs, axis=0) for dfs in dfs_test]

num_norm = lambda x: len(x[x["label"] == 0])
num_anom = lambda x: len(x[x["label"] != 0])
print(
    f"dfs_train_all[{[df.shape for df in dfs_train_all]}], normal[{[num_norm(df) for df in dfs_train_all]}], anomaly[{[num_anom(df) for df in dfs_train_all]}]"
)
print(
    f"dfs_test_all[{[df.shape for df in dfs_test_all]}], normal[{[num_norm(df) for df in dfs_test_all]}], anomaly[{[num_anom(df) for df in dfs_test_all]}]"
)

dfs_train_all[[(1107, 511)]], normal[[378]], anomaly[[729]]
dfs_test_all[[(2552, 511), (1106, 511), (1466, 511)]], normal[[363, 378, 738]], anomaly[[2189, 728, 728]]


# Data cleaning
## Remove irrelevant data
Remove those columns whose data never change

In [4]:
# Find those columns in df_all that are always zero
zero_cols = dfs_train_all[0].columns[(dfs_train_all[0] == 0).all()]
print(len(zero_cols))

# Drop those cols
dfs_train_all = [df.drop(zero_cols, axis=1) for df in dfs_train_all]
dfs_test_all = [df.drop(zero_cols, axis=1) for df in dfs_test_all]

272


# Dataset construction

Reconstruct dataset with respect to anomaly ratio

In [5]:
def construct_dataset(dfs, anomaly_ratio=0.1):
    new_dfs = []
    for df in dfs:
        num_normal = num_anom(df)
        required_num_anomaly = int(num_normal * (anomaly_ratio / (1 - anomaly_ratio)))

        normal_samples = df[df["label"] == 0]
        anomaly_samples = df[df["label"] != 0].sample(
            n=required_num_anomaly, random_state=42
        )

        new_dfs.append(
            pd.concat([normal_samples, anomaly_samples]).sample(
                frac=1, random_state=42, axis=0
            )
        )

    return new_dfs


dfs_train_all = construct_dataset(dfs_train_all, anomaly_ratio=0.2)
dfs_test_all = construct_dataset(dfs_test_all, anomaly_ratio=0.5)

print(
    f"dfs_train_all[{[df.shape for df in dfs_train_all]}], normal[{[num_norm(df) for df in dfs_train_all]}], anomaly[{[num_anom(df) for df in dfs_train_all]}]"
)
print(
    f"dfs_test_all[{[df.shape for df in dfs_test_all]}], normal[{[num_norm(df) for df in dfs_test_all]}], anomaly[{[num_anom(df) for df in dfs_test_all]}]"
)

dfs_train_all[[(560, 239)]], normal[[378]], anomaly[[182]]
dfs_test_all[[(2552, 239), (1106, 239), (1466, 239)]], normal[[363, 378, 738]], anomaly[[2189, 728, 728]]


In [6]:
from torch.utils.data import DataLoader, Dataset
import random

batch_size = 32
class OBDataset(Dataset):
    def __init__(self, dfs):
        self.data = []
        for df in dfs:
            X = df.drop(columns=['label']).to_numpy()
            y=df['label'].to_numpy()
            self.data.append((X,y))
            
    def __getitem__(self, index):
        task_index = random.randint(0, len(self.data)-1)
        X, y = self.data[task_index]

        sample_index = random.randint(0, len(X)-1)
        return X[sample_index], y[sample_index]

    def __len__(self):
        return sum([len(x) for x, _ in self.data])
    
train_dataset = OBDataset(dfs_train_all)
test_dataset = OBDataset(dfs_test_all)
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)
print(len(train_data_loader.dataset.data))
print([len(x) for x, _ in train_data_loader.dataset.data])
print(sum([len(x) for x, _ in train_data_loader.dataset.data]))

1
[560]
560


# Without meta-learning
First test those models without meta-learning

In [7]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score, recall_score, f1_score
from adbench.baseline.PyOD import PYOD
from adbench.baseline.PReNet.run import PReNet

epochs = 10
seed=42

total_train = pd.concat(dfs_train_all)

X = total_train.drop(columns=['label']).to_numpy()
y=total_train['label'].to_numpy()
X_train, X_val, y_train, y_val=train_test_split(X,y,test_size=0.1,random_state=seed,shuffle=True)

total_test = pd.concat(dfs_test_all)
X_test=total_test.drop(columns=['label']).to_numpy()
y_test=total_test['label'].to_numpy()

model = PReNet(seed=seed)

print(f"X[{X.shape}] y[{y.shape}]")
# X_train=X_train.numpy()
# y_train=y_train.numpy()
# X_val=X_val.numpy()
# y_val=y_val.numpy()

print(f"X_train[{X_train.shape}] y_train[{y_train.shape}] X_test[{X_test.shape}] y_test[{y_test.shape}]")

predict_labels = lambda scores, threshold=0.5: (scores > threshold).astype(int)

model.fit(X_train, y_train)
score = model.predict_score(X_val)
y_pred = predict_labels(score)
val_auc = roc_auc_score(y_val, score)
val_acc = accuracy_score(y_val, y_pred)
val_recall = recall_score(y_val, y_pred, average="macro")
val_f1 = f1_score(y_val, y_pred, average="macro")
print(f"PReNet AUC:{val_auc} Acc:{val_acc} Recall:{val_recall} f1:{val_f1}")

2024-03-17 00:33:31.072272: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-03-17 00:33:31.118610: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-17 00:33:31.118644: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-17 00:33:31.118652: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-17 00:33:31.126642: I tensorflow/core/platform/cpu_feature_g

X[(560, 238)] y[(560,)]
X_train[(504, 238)] y_train[(504,)] X_test[(5124, 238)] y_test[(5124,)]
PReNet AUC:0.5666666666666667 Acc:0.7321428571428571 Recall:0.5 f1:0.42268041237113396


# Compare all baselines
Compare all baselines, including unsupervised, semi-supervised and full-supervised models

In [8]:
import time


def evaluate_model(name: str, scores, y_truth, threshold=0.5):
    predict_labels = lambda scores, threshold=0.5: (scores > threshold).astype(int)
    y_pred = predict_labels(scores, threshold)
    auc = roc_auc_score(y_truth, scores)
    acc = accuracy_score(y_truth, y_pred)
    recall = recall_score(y_truth, y_pred)
    f1 = f1_score(y_truth, y_pred)
    # print(f"{name}: AUC:{auc} Acc:{acc} Recall:{recall} f1:{f1}")
    return (auc, acc, recall, f1)


seed=42
def model_fit(model_name: str, model_class, seed):
    try:
        # fit
        start_time = time.time()
        model = model_class(model_name=model_name, seed=42)
        model.fit(X_train, y_train)
        end_time = time.time()
        fit_time = end_time - start_time

        # predict
        # val
        start_time = time.time()
        if model_name in ['DAGMM']:
            score = model.predict_score(X_test=X_val, X_train=X_train)
        else:
            score = model.predict_score(X_val)
        end_time = time.time()
        val_predict_time = end_time - start_time
        val_auc, val_acc, val_recall, val_f1 = evaluate_model(model_name, score, y_val)

        # test
        start_time = time.time()
        score = model.predict_score(X_test)
        end_time = time.time()
        test_predict_time = end_time - start_time
        test_auc, test_acc, test_recall, test_f1 = evaluate_model(
            model_name, score, y_test
        )

        return (
            fit_time,
            val_predict_time,
            test_predict_time,
            {
                "val_auc": val_auc,
                "val_acc": val_acc,
                "val_recall": val_recall,
                "val_f1": val_f1,
                "test_auc": test_auc,
                "test_acc": test_acc,
                "test_recall": test_recall,
                "test_f1": test_f1,
            },
        )
    except Exception as e:
        print(f"Error running {model_name}: {e}")

In [9]:
model_dict = {}

# Unsupervised algorithms
from adbench.baseline.PyOD import PYOD
from adbench.baseline.DAGMM.run import DAGMM

# from pyod
for _ in [
    "IForest",
    "OCSVM",
    "CBLOF",
    "COF",
    "COPOD",
    "ECOD",
    "FeatureBagging",
    "HBOS",
    "KNN",
    "LODA",
    "LOF",
    "LSCP",
    "MCD",
    "PCA",
    "SOD",
    "SOGAAL",
    "MOGAAL",
    "DeepSVDD",
]:
    model_dict[_] = PYOD
# from dagmm
model_dict["DAGMM"] = DAGMM

# Semi-supervised algorithms
from adbench.baseline.PyOD import PYOD
from adbench.baseline.GANomaly.run import GANomaly
from adbench.baseline.DeepSAD.src.run import DeepSAD
from adbench.baseline.REPEN.run import REPEN
from adbench.baseline.DevNet.run import DevNet
from adbench.baseline.PReNet.run import PReNet
from adbench.baseline.FEAWAD.run import FEAWAD

model_dict.update(
    {
        "GANomaly": GANomaly,
        "DeepSAD": DeepSAD,
        "REPEN": REPEN,
        "DevNet": DevNet,
        "PReNet": PReNet,
        "FEAWAD": FEAWAD,
        'XGBOD':PYOD,
    }
)

# Full-supervised algorithms
from adbench.baseline.Supervised import supervised
from adbench.baseline.FTTransformer.run import FTTransformer

# from sklearn
for _ in ['LR','NB','SVM','MLP','RF','LGB','XGB','CatB']:
    model_dict[_]=supervised
# ResNet and FTTransformer for tabular data
for _ in ['ResNet','FTTransformer']:
    model_dict[_]=FTTransformer
    
# Remove the computational-expensive models
for _ in ['SOGAAL','MOGAAL','LSCP','MCD','FeatureBagging']:
    if _ in model_dict.keys():
        model_dict.pop(_)


In [10]:
from tqdm import tqdm
import gc

results=[]
# Model fitting
for model_name, model_class in tqdm(model_dict.items()):
    try:
        fit_time, val_predict_time, test_predict_time, metrics = model_fit(model_name,model_class,seed)
        results.append([model_name, metrics, fit_time,val_predict_time,test_predict_time])
        print(f'{model_name}: {metrics}, ', f'fitting time: {fit_time}, val inference time: {val_predict_time}, test inference time: {test_predict_time}')
        gc.collect()
    except Exception as e:
        print(e)

  0%|          | 0/31 [00:00<?, ?it/s]

best param: None


  3%|▎         | 1/31 [00:00<00:26,  1.14it/s]

IForest: {'val_auc': 0.9544715447154472, 'val_acc': 0.7321428571428571, 'val_recall': 0.0, 'val_f1': 0.0, 'test_auc': 0.4511637548449207, 'test_acc': 0.2886416861826698, 'test_recall': 0.0, 'test_f1': 0.0},  fitting time: 0.2879786491394043, val inference time: 0.03004312515258789, test inference time: 0.3106415271759033
best param: None
Error running OCSVM: Input contains NaN, infinity or a value too large for dtype('float64').
cannot unpack non-iterable NoneType object
best param: None


 10%|▉         | 3/31 [00:03<00:37,  1.32s/it]

CBLOF: {'val_auc': 0.5967479674796747, 'val_acc': 0.26785714285714285, 'val_recall': 1.0, 'val_f1': 0.4225352112676056, 'test_auc': 0.3832513905235715, 'test_acc': 0.7113583138173302, 'test_recall': 1.0, 'test_f1': 0.8313376667807048},  fitting time: 2.4187216758728027, val inference time: 0.13985180854797363, test inference time: 0.10178685188293457
best param: None


 19%|█▉        | 6/31 [00:53<03:49,  9.18s/it]

COF: {'val_auc': 0.9853658536585366, 'val_acc': 0.26785714285714285, 'val_recall': 1.0, 'val_f1': 0.4225352112676056, 'test_auc': 0.5146041842308089, 'test_acc': 0.7113583138173302, 'test_recall': 1.0, 'test_f1': 0.8313376667807048},  fitting time: 0.2505061626434326, val inference time: 0.007873773574829102, test inference time: 49.445401191711426
best param: None
Error running COPOD: Input contains NaN, infinity or a value too large for dtype('float64').
cannot unpack non-iterable NoneType object
best param: None
Error running ECOD: Input contains NaN, infinity or a value too large for dtype('float64').
cannot unpack non-iterable NoneType object
best param: None
Error running HBOS: index -9223372036854775808 is out of bounds for axis 0 with size 11
cannot unpack non-iterable NoneType object
best param: None


 26%|██▌       | 8/31 [00:54<02:06,  5.49s/it]

KNN: {'val_auc': 0.975609756097561, 'val_acc': 0.26785714285714285, 'val_recall': 1.0, 'val_f1': 0.4225352112676056, 'test_auc': 0.37878613344017903, 'test_acc': 0.7113583138173302, 'test_recall': 1.0, 'test_f1': 0.8313376667807048},  fitting time: 0.010478496551513672, val inference time: 0.004376649856567383, test inference time: 0.35323190689086914
best param: None
Error running LODA: module 'numpy' has no attribute 'int'.
`np.int` was a deprecated alias for the builtin `int`. To avoid this error in existing code, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.
The aliases was originally deprecated in NumPy 1.20; for more details and guidance see the original release note at:
    https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
cannot unpack non-it

 32%|███▏      | 10/31 [00:54<01:13,  3.50s/it]

best param: None


 35%|███▌      | 11/31 [00:55<00:57,  2.88s/it]

PCA: {'val_auc': 0.6666666666666666, 'val_acc': 0.26785714285714285, 'val_recall': 1.0, 'val_f1': 0.4225352112676056, 'test_auc': 0.3606809925143133, 'test_acc': 0.7113583138173302, 'test_recall': 1.0, 'test_f1': 0.8313376667807048},  fitting time: 0.07253646850585938, val inference time: 0.004468441009521484, test inference time: 0.2754065990447998
best param: None


 39%|███▊      | 12/31 [01:17<02:18,  7.30s/it]

SOD: {'val_auc': 0.45528455284552843, 'val_acc': 0.26785714285714285, 'val_recall': 1.0, 'val_f1': 0.4225352112676056, 'test_auc': 0.542866338153444, 'test_acc': 0.7113583138173302, 'test_recall': 1.0, 'test_f1': 0.8313376667807048},  fitting time: 4.366870641708374, val inference time: 0.023936033248901367, test inference time: 17.30292582511902
best param: None


2024-03-17 00:34:59.629737: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22344 MB memory:  -> device: 0, name: Quadro RTX 6000, pci bus id: 0000:af:00.0, compute capability: 7.5
2024-03-17 00:34:59.635470: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:382] MLIR V1 optimization pass is not enabled




2024-03-17 00:35:00.122754: W tensorflow/c/c_api.cc:305] Operation '{name:'net_output/kernel/Assign' id:42 op device:{requested: '', assigned: ''} def:{{{node net_output/kernel/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](net_output/kernel, net_output/kernel/Initializer/stateless_random_uniform)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2024-03-17 00:35:00.143216: W tensorflow/c/c_api.cc:305] Operation '{name:'pow/y' id:59 op device:{requested: '', assigned: ''} def:{{{node pow/y}} = Const[_has_manual_control_dependencies=true, dtype=DT_FLOAT, value=Tensor<type: float shape: [] values: 2>]()}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes a

Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 238)]             0         
                                                                 
 dense_1 (Dense)             (None, 64)                15232     
                                                                 
 net_output (Dense)          (None, 32)                2048      
                                                                 
 tf_op_layer_sub_1 (TensorF  [(None, 32)]              0         
 lowOpLayer)                                                     
                                                                 
 tf_op_layer_pow_1 (TensorF  [(None, 32)]              0         
 lowOpLayer)                                                     
                                                                 
 tf_op_layer_Sum_1 (TensorF  [(None,)]                 0   

2024-03-17 00:35:00.887544: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2024-03-17 00:35:01.024632: W tensorflow/c/c_api.cc:305] Operation '{name:'dense_1/kernel/Assign' id:91 op device:{requested: '', assigned: ''} def:{{{node dense_1/kernel/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](dense_1/kernel, dense_1/kernel/Initializer/stateless_random_uniform)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2024-03-17 00:35:01.036042: W tensorflow/c/c_api.cc:305] Operation '{name:'pow_1/y' id:135 op device:{requested: '', assigned: ''} def:{{{node pow_1/y}} = Const[_has_manual_control_dependencies=true, dtype=DT_FLOAT, value=Tensor<type: float shape: [] values: 2>]()}}' was changed by setting attribute 

None
Error running DeepSVDD: ('Error when checking model target: expected no data, but got:', array([[-1.81418593,  1.69324837,  0.76783903, ...,  0.        ,
         0.        , 13.25      ],
       [-0.66209305, -0.59568198, -0.20727599, ...,  0.        ,
         0.        , 13.25      ],
       [ 1.06326641, -0.65101579, -1.13247046, ...,  0.        ,
         0.        , 13.25      ],
       ...,
       [-0.48294724, -0.64561737,  0.17544833, ...,  0.        ,
         0.        , 13.25      ],
       [-0.53854422, -0.5228033 ,  0.08725534, ...,  0.        ,
         0.        , 13.25      ],
       [-0.27291423, -0.16515793,  0.14050394, ...,  0.        ,
         0.        , 13.25      ]]))
cannot unpack non-iterable NoneType object
using the params: 4


 45%|████▌     | 14/31 [01:36<02:26,  8.63s/it]

Error running DAGMM: predict_score() missing 1 required positional argument: 'X_train'
cannot unpack non-iterable NoneType object
GANomaly: {'val_auc': 0.5, 'val_acc': 0.26785714285714285, 'val_recall': 1.0, 'val_f1': 0.4225352112676056, 'test_auc': 0.5, 'test_acc': 0.7113583138173302, 'test_recall': 1.0, 'test_f1': 0.8313376667807048},  fitting time: 1.3235163688659668, val inference time: 0.0006148815155029297, test inference time: 0.08360123634338379


 52%|█████▏    | 16/31 [02:23<04:26, 17.80s/it]

DeepSAD: {'val_auc': 0.5, 'val_acc': 0.26785714285714285, 'val_recall': 1.0, 'val_f1': 0.4225352112676056, 'test_auc': 0.380265648665218, 'test_acc': 0.7113583138173302, 'test_recall': 1.0, 'test_f1': 0.8313376667807048},  fitting time: 45.09444212913513, val inference time: 0.008061885833740234, test inference time: 0.8699290752410889


2024-03-17 00:36:04.220570: W tensorflow/c/c_api.cc:305] Operation '{name:'training/Adadelta/hidden_layer/bias/accum_var/Assign' id:357 op device:{requested: '', assigned: ''} def:{{{node training/Adadelta/hidden_layer/bias/accum_var/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](training/Adadelta/hidden_layer/bias/accum_var, training/Adadelta/hidden_layer/bias/accum_var/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2024-03-17 00:38:47.162871: W tensorflow/c/c_api.cc:305] Operation '{name:'hidden_layer/Relu' id:179 op device:{requested: '', assigned: ''} def:{{{node hidden_layer/Relu}} = Relu[T=DT_FLOAT, _has_manual_control_dependencies=true](hidden_layer/BiasAdd)}}' was changed by setting attribute after it was run by a session. This mutation wi

REPEN: {'val_auc': 0.502439024390244, 'val_acc': 0.5535714285714286, 'val_recall': 0.4666666666666667, 'val_f1': 0.35897435897435903, 'test_auc': 0.40068179014664373, 'test_acc': 0.7113583138173302, 'test_recall': 1.0, 'test_f1': 0.8313376667807048},  fitting time: 163.17646646499634, val inference time: 0.06448006629943848, test inference time: 0.25226879119873047
Training size: 504, No. outliers: 167
Epoch 1/50


2024-03-17 00:38:47.748771: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22344 MB memory:  -> device: 0, name: Quadro RTX 6000, pci bus id: 0000:af:00.0, compute capability: 7.5
 58%|█████▊    | 18/31 [05:07<09:09, 42.29s/it]

Error running DevNet: 'RMSprop' object has no attribute 'get_updates'
cannot unpack non-iterable NoneType object


 61%|██████▏   | 19/31 [05:36<07:37, 38.15s/it]2024-03-17 00:39:16.020270: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22344 MB memory:  -> device: 0, name: Quadro RTX 6000, pci bus id: 0000:af:00.0, compute capability: 7.5


PReNet: {'val_auc': 0.5666666666666667, 'val_acc': 0.7321428571428571, 'val_recall': 0.0, 'val_f1': 0.0, 'test_auc': 0.6495479928880875, 'test_acc': 0.2886416861826698, 'test_recall': 0.0, 'test_f1': 0.0},  fitting time: 11.811742067337036, val inference time: 0.1325216293334961, test inference time: 15.899823427200317
autoencoder pre-training start....
Epoch 1/100
Error running FEAWAD: 'Adam' object has no attribute 'get_updates'
cannot unpack non-iterable NoneType object
best param: None


 68%|██████▊   | 21/31 [05:36<03:28, 20.85s/it]

Error running XGBOD: index -9223372036854775808 is out of bounds for axis 0 with size 6
cannot unpack non-iterable NoneType object
LR: {'val_auc': 0.7723577235772358, 'val_acc': 0.7321428571428571, 'val_recall': 0.0, 'val_f1': 0.0, 'test_auc': 0.646689408462879, 'test_acc': 0.2886416861826698, 'test_recall': 0.0, 'test_f1': 0.0},  fitting time: 0.011340618133544922, val inference time: 0.0002846717834472656, test inference time: 0.003363370895385742


 74%|███████▍  | 23/31 [05:36<01:34, 11.75s/it]

NB: {'val_auc': 0.9878048780487805, 'val_acc': 0.6964285714285714, 'val_recall': 1.0, 'val_f1': 0.6382978723404256, 'test_auc': 0.49945130315500685, 'test_acc': 0.7105776736924277, 'test_recall': 0.9989026063100137, 'test_f1': 0.8308043354249858},  fitting time: 0.0023207664489746094, val inference time: 0.0004165172576904297, test inference time: 0.01479649543762207


 77%|███████▋  | 24/31 [05:37<01:01,  8.77s/it]

SVM: {'val_auc': 0.5, 'val_acc': 0.7321428571428571, 'val_recall': 0.0, 'val_f1': 0.0, 'test_auc': 0.5, 'test_acc': 0.2886416861826698, 'test_recall': 0.0, 'test_f1': 0.0},  fitting time: 0.09906172752380371, val inference time: 0.003880023956298828, test inference time: 0.40140557289123535
MLP: {'val_auc': 0.5, 'val_acc': 0.7321428571428571, 'val_recall': 0.0, 'val_f1': 0.0, 'test_auc': 0.5, 'test_acc': 0.2886416861826698, 'test_recall': 0.0, 'test_f1': 0.0},  fitting time: 0.06413507461547852, val inference time: 0.0004048347473144531, test inference time: 0.004105091094970703


 84%|████████▍ | 26/31 [05:38<00:23,  4.69s/it]

RF: {'val_auc': 1.0, 'val_acc': 1.0, 'val_recall': 1.0, 'val_f1': 1.0, 'test_auc': 0.9990833720556006, 'test_acc': 0.9957064793130367, 'test_recall': 0.9939643347050754, 'test_f1': 0.9969730324711062},  fitting time: 0.13283753395080566, val inference time: 0.0037343502044677734, test inference time: 0.023192405700683594
[LightGBM] [Info] Number of positive: 167, number of negative: 337
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.004616 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 20517
[LightGBM] [Info] Number of data points in the train set: 504, number of used features: 169
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.331349 -> initscore=-0.702089
[LightGBM] [Info] Start training from score -0.702089
LGB: {'val_auc': 1.0, 'val_acc': 1.0, 'val_recall': 1.0, 'val_f1': 1.0, 'test_auc': 0.9670660393195639, 'test_acc': 0.8530444964871194, 'test_recall': 0.9950617283950617, 'test_f1': 0.905

 87%|████████▋ | 27/31 [05:39<00:14,  3.55s/it]

XGB: {'val_auc': 0.9999999999999999, 'val_acc': 1.0, 'val_recall': 1.0, 'val_f1': 1.0, 'test_auc': 0.9625321487565747, 'test_acc': 0.8458235753317721, 'test_recall': 0.7840877914951989, 'test_f1': 0.8785736243467569},  fitting time: 0.40828728675842285, val inference time: 0.0018970966339111328, test inference time: 0.015877485275268555


 90%|█████████ | 28/31 [05:39<00:08,  2.71s/it]

Learning rate set to 0.007689
0:	learn: 0.6754943	total: 62.8ms	remaining: 1m 2s
1:	learn: 0.6577670	total: 69.3ms	remaining: 34.6s
2:	learn: 0.6383564	total: 74.4ms	remaining: 24.7s
3:	learn: 0.6217418	total: 83ms	remaining: 20.7s
4:	learn: 0.6062866	total: 89.5ms	remaining: 17.8s
5:	learn: 0.5905259	total: 95.2ms	remaining: 15.8s
6:	learn: 0.5737338	total: 102ms	remaining: 14.4s
7:	learn: 0.5573959	total: 109ms	remaining: 13.5s
8:	learn: 0.5410778	total: 115ms	remaining: 12.7s
9:	learn: 0.5267518	total: 121ms	remaining: 12s
10:	learn: 0.5119136	total: 127ms	remaining: 11.4s
11:	learn: 0.4985890	total: 133ms	remaining: 11s
12:	learn: 0.4836728	total: 138ms	remaining: 10.5s
13:	learn: 0.4714160	total: 143ms	remaining: 10.1s
14:	learn: 0.4595632	total: 149ms	remaining: 9.8s
15:	learn: 0.4471629	total: 156ms	remaining: 9.57s
16:	learn: 0.4348376	total: 161ms	remaining: 9.3s
17:	learn: 0.4237034	total: 167ms	remaining: 9.08s
18:	learn: 0.4136131	total: 173ms	remaining: 8.95s
19:	learn: 0.

 94%|█████████▎| 29/31 [05:47<00:08,  4.12s/it]

CatB: {'val_auc': 1.0, 'val_acc': 1.0, 'val_recall': 1.0, 'val_f1': 1.0, 'test_auc': 0.9988093018769401, 'test_acc': 0.9968774395003903, 'test_recall': 0.9956104252400548, 'test_f1': 0.9978003849326368},  fitting time: 7.19886589050293, val inference time: 0.004919528961181641, test inference time: 0.01023411750793457
Epoch 001 | Validation metric: 0.3313 <<< BEST VALIDATION EPOCH
Epoch 002 | Validation metric: 0.3313
Epoch 003 | Validation metric: 0.3313
Epoch 004 | Validation metric: 0.3313
Epoch 005 | Validation metric: 0.3313
Epoch 006 | Validation metric: 0.3313
Epoch 007 | Validation metric: 0.3313
Epoch 008 | Validation metric: 0.3313
Epoch 009 | Validation metric: 0.3313
Epoch 010 | Validation metric: 0.3313
Epoch 011 | Validation metric: 0.3313
Epoch 012 | Validation metric: 0.3313
Epoch 013 | Validation metric: 0.3313
Epoch 014 | Validation metric: 0.3313
Epoch 015 | Validation metric: 0.3313
Epoch 016 | Validation metric: 0.3313
Epoch 017 | Validation metric: 0.3313
Epoch 01

100%|██████████| 31/31 [09:43<00:00, 18.82s/it]

ResNet: {'val_auc': 0.5, 'val_acc': 0.7321428571428571, 'val_recall': 0.0, 'val_f1': 0.0, 'test_auc': 0.5, 'test_acc': 0.2886416861826698, 'test_recall': 0.0, 'test_f1': 0.0},  fitting time: 231.16221380233765, val inference time: 0.0596921443939209, test inference time: 4.457643747329712
number of gpu: 1
cuda name: Quadro RTX 6000
GPU is on
Error running FTTransformer: CUDA out of memory. Tried to allocate 28.00 MiB (GPU 0; 23.65 GiB total capacity; 799.79 MiB already allocated; 8.69 MiB free; 816.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF
cannot unpack non-iterable NoneType object





In [11]:
results

[['IForest',
  {'val_auc': 0.9544715447154472,
   'val_acc': 0.7321428571428571,
   'val_recall': 0.0,
   'val_f1': 0.0,
   'test_auc': 0.4511637548449207,
   'test_acc': 0.2886416861826698,
   'test_recall': 0.0,
   'test_f1': 0.0},
  0.2879786491394043,
  0.03004312515258789,
  0.3106415271759033],
 ['CBLOF',
  {'val_auc': 0.5967479674796747,
   'val_acc': 0.26785714285714285,
   'val_recall': 1.0,
   'val_f1': 0.4225352112676056,
   'test_auc': 0.3832513905235715,
   'test_acc': 0.7113583138173302,
   'test_recall': 1.0,
   'test_f1': 0.8313376667807048},
  2.4187216758728027,
  0.13985180854797363,
  0.10178685188293457],
 ['COF',
  {'val_auc': 0.9853658536585366,
   'val_acc': 0.26785714285714285,
   'val_recall': 1.0,
   'val_f1': 0.4225352112676056,
   'test_auc': 0.5146041842308089,
   'test_acc': 0.7113583138173302,
   'test_recall': 1.0,
   'test_f1': 0.8313376667807048},
  0.2505061626434326,
  0.007873773574829102,
  49.445401191711426],
 ['KNN',
  {'val_auc': 0.97560975609

In [12]:
results

[['IForest',
  {'val_auc': 0.9544715447154472,
   'val_acc': 0.7321428571428571,
   'val_recall': 0.0,
   'val_f1': 0.0,
   'test_auc': 0.4511637548449207,
   'test_acc': 0.2886416861826698,
   'test_recall': 0.0,
   'test_f1': 0.0},
  0.2879786491394043,
  0.03004312515258789,
  0.3106415271759033],
 ['CBLOF',
  {'val_auc': 0.5967479674796747,
   'val_acc': 0.26785714285714285,
   'val_recall': 1.0,
   'val_f1': 0.4225352112676056,
   'test_auc': 0.3832513905235715,
   'test_acc': 0.7113583138173302,
   'test_recall': 1.0,
   'test_f1': 0.8313376667807048},
  2.4187216758728027,
  0.13985180854797363,
  0.10178685188293457],
 ['COF',
  {'val_auc': 0.9853658536585366,
   'val_acc': 0.26785714285714285,
   'val_recall': 1.0,
   'val_f1': 0.4225352112676056,
   'test_auc': 0.5146041842308089,
   'test_acc': 0.7113583138173302,
   'test_recall': 1.0,
   'test_f1': 0.8313376667807048},
  0.2505061626434326,
  0.007873773574829102,
  49.445401191711426],
 ['KNN',
  {'val_auc': 0.97560975609

: 