In [1]:
import os
import random
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef, average_precision_score
from pyod.utils.data import precision_n_scores
from pyod.models.iforest import IForest
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
# Per l'uso della memoria degli algoritmi
from memory_profiler import memory_usage
# Per la metrica sul tempo di Addestramento e Inferenza
import time

In [2]:
def evaluate_metrics(y_test, y_pred, y_proba=None, digits=3):
    res = {"Accuracy": round(accuracy_score(y_test, y_pred), digits),
           "Precision": precision_score(y_test, y_pred).round(digits),
           "Recall": recall_score(y_test, y_pred).round(digits),
           "F1": f1_score(y_test, y_pred).round(digits),
           "MCC": round(matthews_corrcoef(y_test, y_pred), ndigits=digits)}
    if y_proba is not None:
        res["AUC_PR"] = average_precision_score(y_test, y_proba).round(digits)
        res["AUC_ROC"] = roc_auc_score(y_test, y_proba).round(digits)
        res["PREC_N_SCORES"] = precision_n_scores(y_test, y_proba).round(digits)
    return res


def set_seed_numpy(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

In [3]:
features = [
    "mean", "var", "std", "len", "duration", "len_weighted", "gaps_squared", "n_peaks",
    "smooth10_n_peaks", "smooth20_n_peaks", "var_div_duration", "var_div_len",
    "diff_peaks", "diff2_peaks", "diff_var", "diff2_var", "kurtosis", "skew",
]
SEED = 2137

In [5]:
df = pd.read_csv("data/dataset.csv", index_col="segment")

X_train, y_train = df.loc[df.train==1, features], df.loc[df.train==1, "anomaly"]
X_test, y_test = df.loc[df.train==0, features], df.loc[df.train==0, "anomaly"]
X_train_nominal = df.loc[(df.anomaly==0)&(df.train==1), features]

prep = StandardScaler()
X_train_nominal2 = prep.fit_transform(X_train_nominal)
X_train2 = prep.transform(X_train)
X_test2 = prep.transform(X_test)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
set_seed_numpy(SEED) 

# Supervised Model

In [6]:
model = AdaBoostClassifier(random_state=SEED)
model.fit(X_train2, y_train)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

AdaBoostClassifier(random_state=2137) 
 {'Accuracy': 0.934, 'Precision': 0.89, 'Recall': 0.788, 'F1': 0.836, 'MCC': 0.797, 'AUC_PR': 0.923, 'AUC_ROC': 0.962, 'PREC_N_SCORES': 0.841}


In [7]:
import xgboost as xgb

y_train_np = y_train

model = xgb.XGBClassifier (
    n_estimators=50,
    max_depth=3,
    learning_rate=0.1,
    random_state=SEED
)
model.fit(X_train, y_train)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test)
y_predicted_score = model.predict_proba(X_test)[:, 1]  # Probabilità per la classe positiva
# Questa è la probabilità che la classificazione sia corretta

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=50, n_jobs=None,
              num_parallel_tree=None, random_state=2137, ...) 
 {'Accuracy': 0.957, 'Precision': 0.959, 'Recall': 0.832, 'F1': 0.891, 'MCC': 0.867, 'AUC_PR': 0.961, 'AUC_ROC': 0.986, 'PREC_N_SCORES': 0.876}


In [8]:
import xgboost as xgb

y_train_np = y_train

model = xgb.XGBClassifier (
    n_estimators=50,
    max_depth=3,
    learning_rate=0.1,
    random_state=SEED
)
model.fit(X_train_scaled, y_train)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test_scaled)
y_predicted_score = model.predict_proba(X_test_scaled)[:, 1]  # Probabilità per la classe positiva
# Questa è la probabilità che la classificazione sia corretta

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=50, n_jobs=None,
              num_parallel_tree=None, random_state=2137, ...) 
 {'Accuracy': 0.953, 'Precision': 0.94, 'Recall': 0.832, 'F1': 0.883, 'MCC': 0.856, 'AUC_PR': 0.949, 'AUC_ROC': 0.976, 'PREC_N_SCORES': 0.867}


In [None]:
from sklearn.svm import LinearSVC

# Inizializza e addestra il modello
model = LinearSVC()
model.fit(X_train2, y_train)

# Predizione
y_test_scores = model.decision_function(X_test2)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test2)

# Questa è la probabilità che la classificazione sia corretta
print(evaluate_metrics(y_test, y_predicted, y_predicted_score))

{'Accuracy': 0.926, 'Precision': 0.911, 'Recall': 0.726, 'F1': 0.808, 'MCC': 0.771, 'AUC_PR': 0.73, 'AUC_ROC': 0.806, 'PREC_N_SCORES': 0.628}




In [16]:
from sklearn.linear_model import LogisticRegression

# Inizializza e addestra il modello
model = LogisticRegression(max_iter=500)
model.fit(X_train2, y_train)

# Predizione
y_test_scores = model.decision_function(X_test2)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test2)

# Questa è la probabilità che la classificazione sia corretta
print(evaluate_metrics(y_test, y_predicted, y_predicted_score))

{'Accuracy': 0.924, 'Precision': 0.92, 'Recall': 0.708, 'F1': 0.8, 'MCC': 0.764, 'AUC_PR': 0.73, 'AUC_ROC': 0.806, 'PREC_N_SCORES': 0.628}


## Unsupervised Model

MO_GAAL

In [10]:
from pyod.models.mo_gaal import MO_GAAL
import os
os.environ['TF_USE_LEGACY_KERAS'] = 'True'

model = MO_GAAL(k=10, stop_epochs=20, lr_d=0.01, lr_g=0.0001, momentum=0.9, contamination=0.1)
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

Epoch 1 of 60
Epoch 2 of 60
Epoch 3 of 60
Epoch 4 of 60
Epoch 5 of 60
Epoch 6 of 60
Epoch 7 of 60
Epoch 8 of 60
Epoch 9 of 60
Epoch 10 of 60
Epoch 11 of 60
Epoch 12 of 60
Epoch 13 of 60
Epoch 14 of 60
Epoch 15 of 60
Epoch 16 of 60
Epoch 17 of 60
Epoch 18 of 60
Epoch 19 of 60
Epoch 20 of 60
Epoch 21 of 60
Epoch 22 of 60
Epoch 23 of 60
Epoch 24 of 60
Epoch 25 of 60
Epoch 26 of 60
Epoch 27 of 60
Epoch 28 of 60
Epoch 29 of 60
Epoch 30 of 60
Epoch 31 of 60
Epoch 32 of 60
Epoch 33 of 60
Epoch 34 of 60
Epoch 35 of 60
Epoch 36 of 60
Epoch 37 of 60
Epoch 38 of 60
Epoch 39 of 60
Epoch 40 of 60
Epoch 41 of 60
Epoch 42 of 60
Epoch 43 of 60
Epoch 44 of 60
Epoch 45 of 60
Epoch 46 of 60
Epoch 47 of 60
Epoch 48 of 60
Epoch 49 of 60
Epoch 50 of 60
Epoch 51 of 60
Epoch 52 of 60
Epoch 53 of 60
Epoch 54 of 60
Epoch 55 of 60
Epoch 56 of 60
Epoch 57 of 60
Epoch 58 of 60
Epoch 59 of 60
Epoch 60 of 60
MO_GAAL(contamination=0.1, k=10, lr_d=0.01, lr_g=0.0001, momentum=0.9,
    stop_epochs=20) 
 {'Accuracy': 0.9

ANO-GAAL

Non funzionante

In [28]:
import os
os.environ["TF_USE_LEGACY_KERAS"] = "True"

# Ora importa PyOD e usa AnoGAN come prima
from pyod.models.anogan import AnoGAN
import tensorflow as tf

model = AnoGAN()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))


KeyboardInterrupt: 

SO_GAAL

Non funzionante

In [40]:
from pyod.models.so_gaal import SO_GAAL

# Verifica le dimensioni dei dati generati
print("Dimensione X_train:", X_train.shape)
print("Dimensione y_train:", y_train.shape)
print("Dimensione X_test:", X_test.shape)
print("Dimensione y_test:", y_test.shape)

model = SO_GAAL()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

# Valutazione del modello
print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))


Dimensione X_train: (500, 2)
Dimensione y_train: (500,)
Dimensione X_test: (100, 2)
Dimensione y_test: (100,)
Epoch 1 of 60
Epoch 2 of 60
Epoch 3 of 60
Epoch 4 of 60
Epoch 5 of 60
Epoch 6 of 60
Epoch 7 of 60
Epoch 8 of 60
Epoch 9 of 60
Epoch 10 of 60
Epoch 11 of 60
Epoch 12 of 60
Epoch 13 of 60
Epoch 14 of 60
Epoch 15 of 60
Epoch 16 of 60
Epoch 17 of 60
Epoch 18 of 60
Epoch 19 of 60
Epoch 20 of 60
Epoch 21 of 60
Epoch 22 of 60


ValueError: Using a target size (torch.Size([94, 1])) that is different to the input size (torch.Size([500, 1])) is deprecated. Please ensure they have the same size.

RF+ICCS

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Inizializza e addestra il modello
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train, y_train)

# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test)
# Predizione
y_test_scores = model.predict_proba(X_test)

# Questa è la probabilità che la classificazione sia corretta
print(evaluate_metrics(y_test, y_predicted, y_predicted_score))

{'Accuracy': 0.964, 'Precision': 0.98, 'Recall': 0.85, 'F1': 0.91, 'MCC': 0.891, 'AUC_PR': 0.73, 'AUC_ROC': 0.806, 'PREC_N_SCORES': 0.628}


Linear+L2

In [None]:
from sklearn.linear_model import RidgeClassifier

# Inizializza e addestra il modello Ridge Classifier (Linear + L2)
model = RidgeClassifier(alpha=1.0)  # 'alpha' è il parametro di regolarizzazione L2
model.fit(X_train, y_train)

# Predizione delle etichette di classe
y_predicted = model.predict(X_test)

# Ottieni le probabilità della classe positiva per AUC (si utilizza decision_function per ottenere punteggi di decisione)
y_test_scores = model.decision_function(X_test)

# Calcola e stampa le metriche
metrics = evaluate_metrics(y_test, y_predicted, y_test_scores)
print(metrics)


{'Accuracy': 0.902, 'Precision': 0.969, 'Recall': 0.558, 'F1': 0.708, 'MCC': 0.69, 'AUC_PR': 0.889, 'AUC_ROC': 0.95, 'PREC_N_SCORES': 0.814}


Isolation Forest

In [15]:
model = IForest(random_state=SEED, contamination=.2)
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

IForest(behaviour='old', bootstrap=False, contamination=0.2, max_features=1.0,
    max_samples='auto', n_estimators=100, n_jobs=1, random_state=2137,
    verbose=0) 
 {'Accuracy': 0.701, 'Precision': 0.297, 'Recall': 0.292, 'F1': 0.295, 'MCC': 0.105, 'AUC_PR': 0.347, 'AUC_ROC': 0.635, 'PREC_N_SCORES': 0.301}


KNN

In [16]:
from pyod.models.knn import KNN

model = KNN()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0) 
 {'Accuracy': 0.849, 'Precision': 0.78, 'Recall': 0.407, 'F1': 0.535, 'MCC': 0.489, 'AUC_PR': 0.658, 'AUC_ROC': 0.852, 'PREC_N_SCORES': 0.593}


OCSVM

In [17]:
from pyod.models.ocsvm import OCSVM

model = OCSVM()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False) 
 {'Accuracy': 0.837, 'Precision': 0.721, 'Recall': 0.389, 'F1': 0.506, 'MCC': 0.447, 'AUC_PR': 0.659, 'AUC_ROC': 0.788, 'PREC_N_SCORES': 0.655}


ABOD

In [18]:
from pyod.models.abod import ABOD

model = ABOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

ABOD(contamination=0.1, method='fast', n_neighbors=5) 
 {'Accuracy': 0.845, 'Precision': 0.782, 'Recall': 0.381, 'F1': 0.512, 'MCC': 0.472, 'AUC_PR': 0.644, 'AUC_ROC': 0.843, 'PREC_N_SCORES': 0.584}


INNE

In [19]:
from pyod.models.inne import INNE

model = INNE()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

INNE(contamination=0.1, max_samples='auto', n_estimators=200,
   random_state=None) 
 {'Accuracy': 0.832, 'Precision': 0.694, 'Recall': 0.381, 'F1': 0.491, 'MCC': 0.427, 'AUC_PR': 0.636, 'AUC_ROC': 0.805, 'PREC_N_SCORES': 0.655}


ALAD

In [20]:
from pyod.models.alad import ALAD

model = ALAD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

ALAD(activation_hidden_disc='tanh', activation_hidden_gen='tanh',
   add_disc_zz_loss=True, add_recon_loss=False, batch_size=32,
   contamination=0.1, dec_layers=[5, 10, 25], device=device(type='cpu'),
   disc_xx_layers=[25, 10, 5], disc_xz_layers=[25, 10, 5],
   disc_zz_layers=[25, 10, 5], dropout_rate=0.2, enc_layers=[25, 10, 5],
   epochs=200, lambda_recon_loss=0.1, latent_dim=2,
   learning_rate_disc=0.0001, learning_rate_gen=0.0001,
   output_activation=None, preprocessing=False,
   spectral_normalization=False, verbose=0) 
 {'Accuracy': 0.83, 'Precision': 0.725, 'Recall': 0.327, 'F1': 0.451, 'MCC': 0.408, 'AUC_PR': 0.633, 'AUC_ROC': 0.759, 'PREC_N_SCORES': 0.549}


LMDD

In [21]:
from pyod.models.lmdd import LMDD

model = LMDD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

LMDD(contamination=0.1, dis_measure='aad', n_iter=50, random_state=None) 
 {'Accuracy': 0.822, 'Precision': 1.0, 'Recall': 0.168, 'F1': 0.288, 'MCC': 0.37, 'AUC_PR': 0.624, 'AUC_ROC': 0.765, 'PREC_N_SCORES': 0.663}


SOD

In [22]:
from pyod.models.sod import SOD

model = SOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

SOD(alpha=0.8, contamination=0.1, n_neighbors=20, ref_set=10) 
 {'Accuracy': 0.826, 'Precision': 0.611, 'Recall': 0.513, 'F1': 0.558, 'MCC': 0.453, 'AUC_PR': 0.621, 'AUC_ROC': 0.797, 'PREC_N_SCORES': 0.549}


COF

In [23]:
from pyod.models.cof import COF

model = COF()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

COF(contamination=0.1, method='fast', n_neighbors=20) 
 {'Accuracy': 0.834, 'Precision': 0.667, 'Recall': 0.442, 'F1': 0.532, 'MCC': 0.449, 'AUC_PR': 0.603, 'AUC_ROC': 0.774, 'PREC_N_SCORES': 0.593}


LODA

In [24]:
from pyod.models.loda import LODA

model = LODA()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

LODA(contamination=0.1, n_bins=10, n_random_cuts=100) 
 {'Accuracy': 0.83, 'Precision': 0.689, 'Recall': 0.372, 'F1': 0.483, 'MCC': 0.418, 'AUC_PR': 0.549, 'AUC_ROC': 0.692, 'PREC_N_SCORES': 0.522}


LUNAR

In [25]:
from pyod.models.lunar import LUNAR

model = LUNAR()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

LUNAR(contamination=0.1, epsilon=0.1, lr=0.001, model_type='WEIGHT',
   n_epochs=200, n_neighbours=5, negative_sampling='MIXED', proportion=1.0,
   scaler=MinMaxScaler(), val_size=0.1, verbose=0, wd=0.1) 
 {'Accuracy': 0.813, 'Precision': 0.719, 'Recall': 0.204, 'F1': 0.317, 'MCC': 0.313, 'AUC_PR': 0.542, 'AUC_ROC': 0.799, 'PREC_N_SCORES': 0.46}


CBLOF

In [26]:
from pyod.models.cblof import CBLOF

model = CBLOF()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

  super()._check_params_vs_input(X, default_n_init=10)


CBLOF(alpha=0.9, beta=5, check_estimator=False, clustering_estimator=None,
   contamination=0.1, n_clusters=8, n_jobs=None, random_state=None,
   use_weights=False) 
 {'Accuracy': 0.8, 'Precision': 0.556, 'Recall': 0.31, 'F1': 0.398, 'MCC': 0.307, 'AUC_PR': 0.493, 'AUC_ROC': 0.642, 'PREC_N_SCORES': 0.425}


DIF

In [27]:
from pyod.models.dif import DIF

model = DIF()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

DIF(batch_size=1000, contamination=0.1, device=device(type='cpu'),
  hidden_activation='tanh', hidden_neurons=[500, 100], max_samples=256,
  n_ensemble=50, n_estimators=6, random_state=None, representation_dim=20,
  skip_connection=False) 
 {'Accuracy': 0.788, 'Precision': 1.0, 'Recall': 0.009, 'F1': 0.018, 'MCC': 0.084, 'AUC_PR': 0.512, 'AUC_ROC': 0.826, 'PREC_N_SCORES': 0.513}


VAE

In [28]:
from pyod.models.vae import VAE

model = VAE()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

Training:   0%|          | 0/30 [00:00<?, ?it/s]

Training: 100%|██████████| 30/30 [00:22<00:00,  1.35it/s]


VAE(batch_norm=False, batch_size=32, beta=1.0, capacity=0.0,
  compile_mode='default', contamination=0.1,
  decoder_neuron_list=[32, 64, 128], device=device(type='cpu'),
  dropout_rate=0.2, encoder_neuron_list=[128, 64, 32], epoch_num=30,
  hidden_activation_name='relu', latent_dim=2, lr=0.001,
  optimizer_name='adam', optimizer_params={'weight_decay': 1e-05},
  output_activation_name='sigmoid', preprocessing=True, random_state=42,
  use_compile=False, verbose=1) 
 {'Accuracy': 0.794, 'Precision': 0.532, 'Recall': 0.292, 'F1': 0.377, 'MCC': 0.283, 'AUC_PR': 0.446, 'AUC_ROC': 0.687, 'PREC_N_SCORES': 0.513}


GMM

In [29]:
from pyod.models.gmm import GMM

model = GMM()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

GMM(contamination=0.1, covariance_type='full', init_params='kmeans',
  max_iter=100, means_init=None, n_components=1, n_init=1,
  precisions_init=None, random_state=None, reg_covar=1e-06, tol=0.001,
  warm_start=False, weights_init=None) 
 {'Accuracy': 0.783, 'Precision': 0.482, 'Recall': 0.239, 'F1': 0.32, 'MCC': 0.225, 'AUC_PR': 0.426, 'AUC_ROC': 0.713, 'PREC_N_SCORES': 0.389}


DeepSVDD

In [30]:
from pyod.models.deep_svdd import DeepSVDD

# Determina il numero di feature
n_features = X_train2.shape[1]

model = DeepSVDD(n_features=n_features)
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

Epoch 1/100, Loss: 36.17359483242035
Epoch 2/100, Loss: 36.19166633486748
Epoch 3/100, Loss: 36.2466336786747
Epoch 4/100, Loss: 36.13528761267662
Epoch 5/100, Loss: 36.165921211242676
Epoch 6/100, Loss: 36.13916572928429
Epoch 7/100, Loss: 36.189294904470444
Epoch 8/100, Loss: 36.17238187789917
Epoch 9/100, Loss: 36.2117395401001
Epoch 10/100, Loss: 36.185857594013214
Epoch 11/100, Loss: 36.13321906328201
Epoch 12/100, Loss: 36.1584706902504
Epoch 13/100, Loss: 36.17630282044411
Epoch 14/100, Loss: 36.17380636930466
Epoch 15/100, Loss: 36.25334322452545
Epoch 16/100, Loss: 36.1712027490139
Epoch 17/100, Loss: 36.12485006451607
Epoch 18/100, Loss: 36.4436274766922
Epoch 19/100, Loss: 36.22374951839447
Epoch 20/100, Loss: 36.2115415930748
Epoch 21/100, Loss: 36.16678577661514
Epoch 22/100, Loss: 36.20809951424599
Epoch 23/100, Loss: 36.228652626276016
Epoch 24/100, Loss: 36.154085248708725
Epoch 25/100, Loss: 36.138443648815155
Epoch 26/100, Loss: 36.5161928832531
Epoch 27/100, Loss: 36

PCA

In [31]:
from pyod.models.pca import PCA

model = PCA()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

PCA(contamination=0.1, copy=True, iterated_power='auto', n_components=None,
  n_selected_components=None, random_state=None, standardization=True,
  svd_solver='auto', tol=0.0, weighted=True, whiten=False) 
 {'Accuracy': 0.777, 'Precision': 0.456, 'Recall': 0.23, 'F1': 0.306, 'MCC': 0.206, 'AUC_PR': 0.373, 'AUC_ROC': 0.612, 'PREC_N_SCORES': 0.363}


COPOD

In [32]:
from pyod.models.copod import COPOD

model = COPOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

COPOD(contamination=0.1, n_jobs=1) 
 {'Accuracy': 0.767, 'Precision': 0.4, 'Recall': 0.177, 'F1': 0.245, 'MCC': 0.147, 'AUC_PR': 0.328, 'AUC_ROC': 0.627, 'PREC_N_SCORES': 0.257}


SOS

In [33]:
from pyod.models.sos import SOS

model = SOS()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

SOS(contamination=0.1, eps=1e-05, metric='euclidean', perplexity=4.5) 
 {'Accuracy': 0.758, 'Precision': 0.364, 'Recall': 0.177, 'F1': 0.238, 'MCC': 0.125, 'AUC_PR': 0.308, 'AUC_ROC': 0.524, 'PREC_N_SCORES': 0.274}


ECOD

In [34]:
from pyod.models.ecod import ECOD

model = ECOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

ECOD(contamination=0.1, n_jobs=1) 
 {'Accuracy': 0.767, 'Precision': 0.396, 'Recall': 0.168, 'F1': 0.236, 'MCC': 0.14, 'AUC_PR': 0.34, 'AUC_ROC': 0.637, 'PREC_N_SCORES': 0.345}


## Prove ESN

In [35]:
import numpy as np
from reservoirpy.nodes import Reservoir, Ridge

# Creazione del reservoir
reservoir = Reservoir(units=1000, sr=0.95)  # sr: raggio spettrale

# Creazione del nodo di output per il readout
readout = Ridge(ridge=1e-5)

# Connessione del reservoir al readout per creare l'ESN
reservoir >> readout

# Addestramento del modello
readout.fit(reservoir.run(X_train_scaled), X_train_scaled)  # Si allena il readout sugli stati del reservoir

# Predizione per il rilevamento di anomalie
reservoir_states = reservoir.run(X_test_scaled)
predictions = readout.run(reservoir_states)
errors = np.abs(predictions - X_test_scaled)

# Definisci una soglia per identificare le anomalie
threshold = np.percentile(errors, 95)  # Prendi il 95° percentile degli errori come soglia
anomalies = errors > threshold

print("ESN", '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))



Running Reservoir-0: 100%|██████████| 1594/1594 [00:00<00:00, 2797.32it/s]
Running Reservoir-0: 100%|██████████| 529/529 [00:00<00:00, 2798.52it/s]
Running Ridge-0: 100%|██████████| 529/529 [00:00<00:00, 6901.57it/s]

ESN 
 {'Accuracy': 0.767, 'Precision': 0.396, 'Recall': 0.168, 'F1': 0.236, 'MCC': 0.14, 'AUC_PR': 0.34, 'AUC_ROC': 0.637, 'PREC_N_SCORES': 0.345}





In [36]:
from reservoirpy.nodes import ESN

# Caricamento del dataset
# Esempio: X_train, X_test, y_train, y_test sono già disponibili

# Creazione dell'ESN
#model = ESN(n_inputs=X_train.shape[1], n_outputs=1, units=100, random_state=42)
model = ESN(n_inputs=X_train_scaled.shape[1], n_outputs=X_train_scaled.shape[1], units=500, spectral_radius=0.95, sparsity=0.5, leaking_rate=0.5, random_state=42)


# Addestramento dell'ESN
model.fit(reservoir.run(X_train_scaled), X_train_scaled)


# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_predicted, y_predicted_score)

# Stampa i risultati
print(model, metrics)


Running Reservoir-0: 100%|██████████| 1594/1594 [00:00<00:00, 2742.70it/s]
Running ESN-0: 100%|██████████| 1/1 [00:00<00:00,  2.15it/s]


Fitting node ESN-0...
'ESN-0': ESN('Reservoir-1', 'Ridge-1') {'Accuracy': 0.767, 'Precision': 0.396, 'Recall': 0.168, 'F1': 0.236, 'MCC': 0.14, 'AUC_PR': 0.34, 'AUC_ROC': 0.637, 'PREC_N_SCORES': 0.345}


### ESN con "grid"

In [37]:
from itertools import product
from sklearn.metrics import f1_score
from sklearn.linear_model import Ridge
import numpy as np

# Funzione per creare sequenze temporali dai dati
def create_sequences(data, seq_length):
    sequences = []
    for i in range(len(data) - seq_length + 1):
        seq = data[i:i + seq_length]
        sequences.append(seq)
    return np.array(sequences)

# Parametri da ottimizzare manualmente
param_grid = {
    'units': [50, 100, 150, 500],
    'spectral_radius': [0.8, 0.9, 0.95],
    'leaking_rate': [ 0.1, 0.3, 0.5]
}

best_score = -1
best_params = {}
best_model = None

# Imposta la lunghezza delle sequenze
seq_length = 100  # Modifica la lunghezza della sequenza in base alle tue esigenze

# Creare le sequenze per X_train e X_test
X_train_sequences = create_sequences(X_train_scaled, seq_length)
X_test_sequences = create_sequences(X_test_scaled, seq_length)

# Ciclo su tutte le combinazioni di parametri
for units, spectral_radius, leaking_rate in product(param_grid['units'], 
                                                    param_grid['spectral_radius'], 
                                                    param_grid['leaking_rate']):
    # Inizializza il modello ESN con i parametri correnti
    model = ESN(n_inputs=X_train_sequences.shape[2], n_outputs=1, units=units, 
                spectral_radius=spectral_radius, leaking_rate=leaking_rate, random_state=42)
    
    readout = Ridge()

    print(f"X_train_sequences shape: {X_train_sequences.shape}")  # Verifica che la forma dei dati di training sia corretta
    print(f"X_test_sequences shape: {X_test_sequences.shape}")    # Verifica che la forma dei dati di test sia corretta
    
    try:
        # Trasponi X_train_sequences se necessario per avere (num_samples, num_features, seq_length)
        X_train_seq_transposed = X_train_sequences.reshape(X_train_sequences.shape[0], -1)  # Forma: (num_samples, seq_length * num_features)
        X_test_seq_transposed = X_test_sequences.reshape(X_test_sequences.shape[0], -1)    # Forma: (num_samples, seq_length * num_features)
        
        # Addestra il modello con un warmup esteso
        model.fit(X_train_seq_transposed, X_train_scaled[:X_train_seq_transposed.shape[0]])

        # Predizione per il rilevamento di anomalie (stati del serbatoio sui dati di test)
        reservoir_states_train = model.run(X_train_seq_transposed)
        readout.fit(reservoir_states_train, y_train[:X_train_seq_transposed.shape[0]])

        # Predizione sui dati di test
        reservoir_states_test = model.run(X_test_seq_transposed)
        predictions = readout.predict(reservoir_states_test)

        # Calcola la metrica F1
        score = f1_score(y_test[:X_test_seq_transposed.shape[0]], predictions.round())  # Arrotonda le predizioni per il rilevamento di anomalie

        # Aggiorna i migliori parametri e modello se il punteggio è migliore
        if score > best_score:
            best_score = score
            best_params = {'units': units, 'spectral_radius': spectral_radius, 'leaking_rate': leaking_rate}
            best_model = model

    except ValueError as e:
        print(f"Errore con parametri {units}, {spectral_radius}, {leaking_rate}: {e}")

# Stampa i migliori parametri e punteggio
print("Best parameters:", best_params)
print("Best F1 score:", best_score)

# Ora, con il miglior modello, possiamo fare una predizione finale sui dati di test

# Utilizza il miglior modello per fare una predizione finale sui dati di test
X_test_seq_transposed = X_test_sequences.reshape(X_test_sequences.shape[0], -1)  # Trasponi X_test per il modello
reservoir_states_test = best_model.run(X_test_seq_transposed)  # Ottieni gli stati del serbatoio sui dati di test

# Predizione finale
predictions = readout.predict(reservoir_states_test)  # Predizione finale con il modello readout

# Calcola le metriche
metrics = evaluate_metrics(y_test[:X_test_seq_transposed.shape[0]], predictions.round(), predictions)

# Stampa i risultati
print("Best model:", best_model)
print("Metrics:", metrics)

X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-1: 100%|██████████| 1/1 [00:00<00:00,  3.70it/s]


Fitting node ESN-1...


Running ESN-1: 100%|██████████| 1/1 [00:00<00:00,  3.01it/s]
Running ESN-1: 100%|██████████| 1/1 [00:00<00:00, 11.11it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-2: 100%|██████████| 1/1 [00:00<00:00,  3.74it/s]


Fitting node ESN-2...


Running ESN-2: 100%|██████████| 1/1 [00:00<00:00,  2.38it/s]
Running ESN-2: 100%|██████████| 1/1 [00:00<00:00,  8.13it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-3: 100%|██████████| 1/1 [00:00<00:00,  3.31it/s]


Fitting node ESN-3...


Running ESN-3: 100%|██████████| 1/1 [00:00<00:00,  2.59it/s]
Running ESN-3: 100%|██████████| 1/1 [00:00<00:00,  9.29it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-4: 100%|██████████| 1/1 [00:00<00:00,  1.70it/s]


Fitting node ESN-4...


Running ESN-4: 100%|██████████| 1/1 [00:00<00:00,  2.10it/s]
Running ESN-4: 100%|██████████| 1/1 [00:00<00:00, 10.47it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-5: 100%|██████████| 1/1 [00:00<00:00,  1.60it/s]


Fitting node ESN-5...


Running ESN-5: 100%|██████████| 1/1 [00:00<00:00,  2.08it/s]
Running ESN-5: 100%|██████████| 1/1 [00:00<00:00,  9.09it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-6: 100%|██████████| 1/1 [00:00<00:00,  3.44it/s]


Fitting node ESN-6...


Running ESN-6: 100%|██████████| 1/1 [00:00<00:00,  2.77it/s]
Running ESN-6: 100%|██████████| 1/1 [00:00<00:00,  9.43it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-7: 100%|██████████| 1/1 [00:00<00:00,  3.35it/s]


Fitting node ESN-7...


Running ESN-7: 100%|██████████| 1/1 [00:00<00:00,  1.31it/s]
Running ESN-7: 100%|██████████| 1/1 [00:00<00:00,  4.85it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-8: 100%|██████████| 1/1 [00:00<00:00,  1.57it/s]


Fitting node ESN-8...


Running ESN-8: 100%|██████████| 1/1 [00:01<00:00,  1.03s/it]
Running ESN-8: 100%|██████████| 1/1 [00:00<00:00,  3.27it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-9: 100%|██████████| 1/1 [00:00<00:00,  1.13it/s]


Fitting node ESN-9...


Running ESN-9: 100%|██████████| 1/1 [00:01<00:00,  1.22s/it]
Running ESN-9: 100%|██████████| 1/1 [00:00<00:00,  3.76it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-10: 100%|██████████| 1/1 [00:00<00:00,  1.51it/s]


Fitting node ESN-10...


Running ESN-10: 100%|██████████| 1/1 [00:01<00:00,  1.01s/it]
Running ESN-10: 100%|██████████| 1/1 [00:00<00:00,  3.91it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-11: 100%|██████████| 1/1 [00:00<00:00,  1.31it/s]


Fitting node ESN-11...


Running ESN-11: 100%|██████████| 1/1 [00:00<00:00,  1.72it/s]
Running ESN-11: 100%|██████████| 1/1 [00:00<00:00,  5.35it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-12: 100%|██████████| 1/1 [00:00<00:00,  2.33it/s]


Fitting node ESN-12...


Running ESN-12: 100%|██████████| 1/1 [00:00<00:00,  1.97it/s]
Running ESN-12: 100%|██████████| 1/1 [00:00<00:00,  6.69it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-13: 100%|██████████| 1/1 [00:00<00:00,  1.98it/s]


Fitting node ESN-13...


Running ESN-13: 100%|██████████| 1/1 [00:00<00:00,  1.69it/s]
Running ESN-13: 100%|██████████| 1/1 [00:00<00:00,  6.00it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-14: 100%|██████████| 1/1 [00:00<00:00,  2.02it/s]


Fitting node ESN-14...


Running ESN-14: 100%|██████████| 1/1 [00:00<00:00,  1.99it/s]
Running ESN-14: 100%|██████████| 1/1 [00:00<00:00,  9.01it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-15: 100%|██████████| 1/1 [00:00<00:00,  3.53it/s]


Fitting node ESN-15...


Running ESN-15: 100%|██████████| 1/1 [00:00<00:00,  2.82it/s]
Running ESN-15: 100%|██████████| 1/1 [00:00<00:00,  9.03it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-16: 100%|██████████| 1/1 [00:00<00:00,  3.36it/s]


Fitting node ESN-16...


Running ESN-16: 100%|██████████| 1/1 [00:00<00:00,  2.75it/s]
Running ESN-16: 100%|██████████| 1/1 [00:00<00:00,  9.01it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-17: 100%|██████████| 1/1 [00:00<00:00,  3.44it/s]


Fitting node ESN-17...


Running ESN-17: 100%|██████████| 1/1 [00:00<00:00,  2.68it/s]
Running ESN-17: 100%|██████████| 1/1 [00:00<00:00,  8.96it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-18: 100%|██████████| 1/1 [00:00<00:00,  3.45it/s]


Fitting node ESN-18...


Running ESN-18: 100%|██████████| 1/1 [00:00<00:00,  2.92it/s]
Running ESN-18: 100%|██████████| 1/1 [00:00<00:00,  9.16it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-19: 100%|██████████| 1/1 [00:00<00:00,  3.11it/s]


Fitting node ESN-19...


Running ESN-19: 100%|██████████| 1/1 [00:00<00:00,  2.55it/s]
Running ESN-19: 100%|██████████| 1/1 [00:00<00:00,  7.76it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-20: 100%|██████████| 1/1 [00:00<00:00,  3.09it/s]


Fitting node ESN-20...


Running ESN-20: 100%|██████████| 1/1 [00:00<00:00,  2.60it/s]
Running ESN-20: 100%|██████████| 1/1 [00:00<00:00,  8.88it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-21: 100%|██████████| 1/1 [00:00<00:00,  3.11it/s]


Fitting node ESN-21...


Running ESN-21: 100%|██████████| 1/1 [00:00<00:00,  2.57it/s]
Running ESN-21: 100%|██████████| 1/1 [00:00<00:00,  7.50it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-22: 100%|██████████| 1/1 [00:00<00:00,  3.08it/s]


Fitting node ESN-22...


Running ESN-22: 100%|██████████| 1/1 [00:00<00:00,  1.67it/s]
Running ESN-22: 100%|██████████| 1/1 [00:00<00:00,  7.16it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-23: 100%|██████████| 1/1 [00:00<00:00,  2.56it/s]


Fitting node ESN-23...


Running ESN-23: 100%|██████████| 1/1 [00:00<00:00,  2.29it/s]
Running ESN-23: 100%|██████████| 1/1 [00:00<00:00,  7.94it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-24: 100%|██████████| 1/1 [00:00<00:00,  2.79it/s]


Fitting node ESN-24...


Running ESN-24: 100%|██████████| 1/1 [00:00<00:00,  2.26it/s]
Running ESN-24: 100%|██████████| 1/1 [00:00<00:00,  7.87it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-25: 100%|██████████| 1/1 [00:00<00:00,  2.76it/s]


Fitting node ESN-25...


Running ESN-25: 100%|██████████| 1/1 [00:00<00:00,  2.19it/s]
Running ESN-25: 100%|██████████| 1/1 [00:00<00:00,  8.04it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-26: 100%|██████████| 1/1 [00:00<00:00,  2.73it/s]


Fitting node ESN-26...


Running ESN-26: 100%|██████████| 1/1 [00:00<00:00,  2.37it/s]
Running ESN-26: 100%|██████████| 1/1 [00:00<00:00,  7.72it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-27: 100%|██████████| 1/1 [00:00<00:00,  2.68it/s]


Fitting node ESN-27...


Running ESN-27: 100%|██████████| 1/1 [00:00<00:00,  1.95it/s]
Running ESN-27: 100%|██████████| 1/1 [00:00<00:00,  7.30it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-28: 100%|██████████| 1/1 [00:00<00:00,  1.49it/s]


Fitting node ESN-28...


Running ESN-28: 100%|██████████| 1/1 [00:00<00:00,  1.27it/s]
Running ESN-28: 100%|██████████| 1/1 [00:00<00:00,  4.53it/s]


Errore con parametri 500, 0.8, 0.1: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].
X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-29: 100%|██████████| 1/1 [00:00<00:00,  1.42it/s]


Fitting node ESN-29...


Running ESN-29: 100%|██████████| 1/1 [00:00<00:00,  1.20it/s]
Running ESN-29: 100%|██████████| 1/1 [00:00<00:00,  4.57it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-30: 100%|██████████| 1/1 [00:00<00:00,  1.46it/s]


Fitting node ESN-30...


Running ESN-30: 100%|██████████| 1/1 [00:00<00:00,  1.33it/s]
Running ESN-30: 100%|██████████| 1/1 [00:00<00:00,  4.52it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-31: 100%|██████████| 1/1 [00:00<00:00,  1.43it/s]


Fitting node ESN-31...


Running ESN-31: 100%|██████████| 1/1 [00:00<00:00,  1.29it/s]
Running ESN-31: 100%|██████████| 1/1 [00:00<00:00,  5.11it/s]


Errore con parametri 500, 0.9, 0.1: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].
X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-32: 100%|██████████| 1/1 [00:00<00:00,  1.68it/s]


Fitting node ESN-32...


Running ESN-32: 100%|██████████| 1/1 [00:00<00:00,  1.33it/s]
Running ESN-32: 100%|██████████| 1/1 [00:00<00:00,  4.59it/s]


X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-33: 100%|██████████| 1/1 [00:00<00:00,  1.51it/s]


Fitting node ESN-33...


Running ESN-33: 100%|██████████| 1/1 [00:00<00:00,  1.36it/s]
Running ESN-33: 100%|██████████| 1/1 [00:00<00:00,  4.45it/s]


Errore con parametri 500, 0.9, 0.5: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].
X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-34: 100%|██████████| 1/1 [00:00<00:00,  1.59it/s]


Fitting node ESN-34...


Running ESN-34: 100%|██████████| 1/1 [00:00<00:00,  1.45it/s]
Running ESN-34: 100%|██████████| 1/1 [00:00<00:00,  5.48it/s]


Errore con parametri 500, 0.95, 0.1: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].
X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-35: 100%|██████████| 1/1 [00:00<00:00,  1.54it/s]


Fitting node ESN-35...


Running ESN-35: 100%|██████████| 1/1 [00:00<00:00,  1.50it/s]
Running ESN-35: 100%|██████████| 1/1 [00:00<00:00,  5.47it/s]


Errore con parametri 500, 0.95, 0.3: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].
X_train_sequences shape: (1495, 100, 18)
X_test_sequences shape: (430, 100, 18)


Running ESN-36: 100%|██████████| 1/1 [00:00<00:00,  1.59it/s]


Fitting node ESN-36...


Running ESN-36: 100%|██████████| 1/1 [00:00<00:00,  1.52it/s]
Running ESN-36: 100%|██████████| 1/1 [00:00<00:00,  4.95it/s]


Best parameters: {'units': 500, 'spectral_radius': 0.9, 'leaking_rate': 0.3}
Best F1 score: 0.406015037593985


Running ESN-32: 100%|██████████| 1/1 [00:00<00:00,  5.18it/s]

Best model: 'ESN-32': ESN('Reservoir-33', 'Ridge-33')
Metrics: {'Accuracy': 0.814, 'Precision': 0.676, 'Recall': 0.269, 'F1': 0.385, 'MCC': 0.342, 'AUC_PR': 0.526, 'AUC_ROC': 0.731, 'PREC_N_SCORES': 0.452}





## XGBOD

In [38]:
from pyod.models.xgbod import XGBOD

# Inizializza e addestra XGBOD
model = XGBOD()
model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)
# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)

#n_estimators=50,
#max_depth=3,
#learning_rate=0.1,
#random_state=SEED

Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=1, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=1, no...ax_features=1.0,
    max_samples='auto', n_estimators=200, n_jobs=1, random_state=0,
    verbose=0)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, F

#### Con metiche di Memoria e Tempo

In [39]:
import time
from memory_profiler import memory_usage
from pyod.models.xgbod import XGBOD

# Inizializza e addestra XGBOD
model = XGBOD(n_estimators=50, max_depth=3, learning_rate=0.1, random_state=SEED)

def train_model():
    start_time = time.time()
    mem_usage = memory_usage((model.fit, (X_train_scaled, y_train)))
    training_time = time.time() - start_time
    print(f"\n Tempo di addestramento: {training_time} secondi")
    print(f"Uso della memoria durante l'addestramento: {max(mem_usage)} MiB")
    return training_time, mem_usage

def inference_model():
    start_time = time.time()
    mem_usage_inference = memory_usage((model.predict, (X_test_scaled,)))
    inference_time = time.time() - start_time
    y_pred = model.predict(X_test_scaled)
    print(f"\n Tempo di inferenza: {inference_time} secondi")
    print(f"Uso della memoria durante l'inferenza: {max(mem_usage_inference)} MiB")
    return y_pred, inference_time, mem_usage_inference



### XGBOD più modelli unsupervised

In [40]:
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]
# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models)

model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)
# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)

Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, n...3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True], subsample=1) {'Accuracy': 0.968, 'Precision': 0.944, 'Recall': 0.903, 'F1': 0.923, 'MCC': 0.903, 'AUC_PR': 0.974, 'AUC_ROC': 0.991, 'PREC_N_SCORES': 0.92}


#### Con Metriche di Tempo e Memoria

In [41]:
import time
from memory_profiler import memory_usage
from pyod.models.xgbod import XGBOD

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]
# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models)

def train_model():
    start_time = time.time()
    mem_usage = memory_usage((model.fit, (X_train_scaled, y_train)))
    training_time = time.time() - start_time
    print(f"\n Tempo di addestramento: {training_time} secondi")
    print(f"Uso della memoria durante l'addestramento: {max(mem_usage)} MiB")
    return training_time, mem_usage

def inference_model():
    start_time = time.time()
    mem_usage_inference = memory_usage((model.predict, (X_test_scaled,)))
    inference_time = time.time() - start_time
    y_pred = model.predict(X_test_scaled)
    print(f"\n Tempo di inferenza: {inference_time} secondi")
    print(f"Uso della memoria durante l'inferenza: {max(mem_usage_inference)} MiB")
    return y_pred, inference_time, mem_usage_inference

# Addestramento del modello e monitoraggio delle metriche di efficientamento
training_time, mem_usage = train_model()

# Inferenza del modello e monitoraggio delle metriche di efficientamento
y_pred, inference_time, mem_usage_inference = inference_model()

# Calcola i punteggi di decisione
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche con le nuove metriche di efficientamento
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.184173345565796 secondi
Uso della memoria durante l'addestramento: 611.49609375 MiB

 Tempo di inferenza: 1.5959651470184326 secondi
Uso della memoria durante l'inferenza: 602.36328125 MiB
XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, n...3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True]

### XGBOD più modelli unsupervised e Parametri

In [42]:
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]

# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models,
              n_estimators=100,
              max_depth=3,
              learning_rate=0.2,
              n_jobs=-1,
              random_state=SEED
            )

model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)
print("")
print(metrics)

Parameters: { "silent" } are not used.




{'Accuracy': 0.97, 'Precision': 0.945, 'Recall': 0.912, 'F1': 0.928, 'MCC': 0.909, 'AUC_PR': 0.973, 'AUC_ROC': 0.992, 'PREC_N_SCORES': 0.92}


#### Con Metriche di Tempo e Memoria

In [43]:
import time
from memory_profiler import memory_usage
from pyod.models.xgbod import XGBOD

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]
# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models, n_estimators=100, max_depth=3, learning_rate=0.2, random_state=SEED)

def train_model():
    start_time = time.time()
    mem_usage = memory_usage((model.fit, (X_train_scaled, y_train)))
    training_time = time.time() - start_time
    print(f"\n Tempo di addestramento: {training_time} secondi")
    print(f"Uso della memoria durante l'addestramento: {max(mem_usage)} MiB")
    return training_time, mem_usage

def inference_model():
    start_time = time.time()
    mem_usage_inference = memory_usage((model.predict, (X_test_scaled,)))
    inference_time = time.time() - start_time
    y_pred = model.predict(X_test_scaled)
    print(f"\n Tempo di inferenza: {inference_time} secondi")
    print(f"Uso della memoria durante l'inferenza: {max(mem_usage_inference)} MiB")
    return y_pred, inference_time, mem_usage_inference

# Addestramento del modello e monitoraggio delle metriche di efficientamento
training_time, mem_usage = train_model()

# Inferenza del modello e monitoraggio delle metriche di efficientamento
y_pred, inference_time, mem_usage_inference = inference_model()

# Calcola i punteggi di decisione
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche con le nuove metriche di efficientamento
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.3739891052246094 secondi
Uso della memoria durante l'addestramento: 611.7890625 MiB

 Tempo di inferenza: 1.7508814334869385 secondi
Uso della memoria durante l'inferenza: 603.37890625 MiB
XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, n...3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   gamma=0, learning_rate=0.2, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=2137, reg_alpha=0,
   reg_lambda=1, scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, Tr

### Early Stopping
Termina l'esecuzione anticipatamente se per un numero prestabilito di round non migliorano più i parametri

In [44]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]

# Divisione del dataset di allenamento per avere un set di validazione
X_train_sub, X_val, y_train_sub, y_val = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=SEED)

# Inizializzazione del modello
model = XGBOD(estimator_list=unsupervised_models, n_estimators=50, max_depth=3, learning_rate=0.2, n_jobs=-1, random_state=SEED)

best_score = -np.inf
patience = 10       # Numero di volte che il modello cercherà di migliorarsi
patience_counter = 0
n_iterations = 100      # Numero massimo di cicli del'allenamento

for i in range(n_iterations):  # Numero massimo di iterazioni
    model.fit(X_train_sub, y_train_sub)
    
    # Predizione sul set di validazione
    y_val_pred = model.predict(X_val)
    val_score = accuracy_score(y_val, y_val_pred)
    
    # Controllo early stopping
    if val_score > best_score:
        best_score = val_score
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping at iteration {i}")
            break
    model.n_estimators += 1  # Incrementa il numero di stimatori per la prossima iterazione

# Predizione sul set di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)
print("")
print(metrics)


Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.



Early stopping at iteration 12

{'Accuracy': 0.97, 'Precision': 0.971, 'Recall': 0.885, 'F1': 0.926, 'MCC': 0.909, 'AUC_PR': 0.969, 'AUC_ROC': 0.99, 'PREC_N_SCORES': 0.912}


In [45]:
import time
from memory_profiler import memory_usage
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Definizione dei modelli unsupervised
unsupervised_models = [KNN(), LOF(), ABOD(), OCSVM()]

# Divisione del dataset di allenamento per avere un set di validazione
X_train_sub, X_val, y_train_sub, y_val = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=SEED)

# Inizializzazione del modello
model = XGBOD(estimator_list=unsupervised_models, n_estimators=50, max_depth=3, learning_rate=0.2, n_jobs=-1, random_state=SEED)

best_score = -np.inf
patience = 10       # Numero di volte che il modello cercherà di migliorarsi
patience_counter = 0
n_iterations = 100  # Numero massimo di cicli dell'allenamento

for i in range(n_iterations):  # Numero massimo di iterazioni
    start_time = time.time()
    mem_usage = memory_usage((model.fit, (X_train_sub, y_train_sub)))
    training_time = time.time() - start_time
    print(f"\n Tempo di addestramento: {training_time} secondi")
    print(f"Uso della memoria durante l'addestramento: {max(mem_usage)} MiB")
    
    # Predizione sul set di validazione
    start_time = time.time()
    mem_usage_inference = memory_usage((model.predict, (X_val,)))
    inference_time = time.time() - start_time
    y_val_pred = model.predict(X_val)
    print(f"\n Tempo di inferenza: {inference_time} secondi")
    print(f"Uso della memoria durante l'inferenza: {max(mem_usage_inference)} MiB")

    val_score = accuracy_score(y_val, y_val_pred)
    
    # Controllo early stopping
    if val_score > best_score:
        best_score = val_score
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping at iteration {i}")
            break
    model.n_estimators += 1  # Incrementa il numero di stimatori per la prossima iterazione

# Predizione sul set di test
start_time = time.time()
mem_usage_inference_test = memory_usage((model.predict, (X_test_scaled,)))
inference_time_test = time.time() - start_time
y_pred = model.predict(X_test_scaled)
print(f"\n Tempo di inferenza sul test: {inference_time_test} secondi")
print(f"Uso della memoria durante l'inferenza sul test: {max(mem_usage_inference_test)} MiB")

y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche con le nuove metriche di efficientamento
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score,)
print(metrics)


Parameters: { "silent" } are not used.




 Tempo di addestramento: 1.9182665348052979 secondi
Uso della memoria durante l'addestramento: 613.16796875 MiB

 Tempo di inferenza: 1.47544527053833 secondi
Uso della memoria durante l'inferenza: 613.171875 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 1.9029278755187988 secondi
Uso della memoria durante l'addestramento: 613.0390625 MiB

 Tempo di inferenza: 1.5100088119506836 secondi
Uso della memoria durante l'inferenza: 613.046875 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.14433217048645 secondi
Uso della memoria durante l'addestramento: 612.80859375 MiB

 Tempo di inferenza: 1.6270020008087158 secondi
Uso della memoria durante l'inferenza: 612.82421875 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.1460025310516357 secondi
Uso della memoria durante l'addestramento: 613.1796875 MiB

 Tempo di inferenza: 1.6065373420715332 secondi
Uso della memoria durante l'inferenza: 613.18359375 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.1586568355560303 secondi
Uso della memoria durante l'addestramento: 613.46875 MiB

 Tempo di inferenza: 1.68288254737854 secondi
Uso della memoria durante l'inferenza: 613.4765625 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.3993046283721924 secondi
Uso della memoria durante l'addestramento: 613.5703125 MiB

 Tempo di inferenza: 1.4842274188995361 secondi
Uso della memoria durante l'inferenza: 613.5703125 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.049354076385498 secondi
Uso della memoria durante l'addestramento: 613.578125 MiB

 Tempo di inferenza: 1.5631005764007568 secondi
Uso della memoria durante l'inferenza: 613.5859375 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 1.8952484130859375 secondi
Uso della memoria durante l'addestramento: 614.625 MiB

 Tempo di inferenza: 1.4641797542572021 secondi
Uso della memoria durante l'inferenza: 614.6328125 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 1.9484944343566895 secondi
Uso della memoria durante l'addestramento: 614.5625 MiB

 Tempo di inferenza: 2.032728433609009 secondi
Uso della memoria durante l'inferenza: 614.56640625 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 4.249924182891846 secondi
Uso della memoria durante l'addestramento: 611.890625 MiB

 Tempo di inferenza: 2.6989939212799072 secondi
Uso della memoria durante l'inferenza: 607.4921875 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.9948065280914307 secondi
Uso della memoria durante l'addestramento: 614.16015625 MiB

 Tempo di inferenza: 1.9658229351043701 secondi
Uso della memoria durante l'inferenza: 607.90234375 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.2505972385406494 secondi
Uso della memoria durante l'addestramento: 615.23046875 MiB

 Tempo di inferenza: 1.774336338043213 secondi
Uso della memoria durante l'inferenza: 615.234375 MiB


Parameters: { "silent" } are not used.




 Tempo di addestramento: 1.9607925415039062 secondi
Uso della memoria durante l'addestramento: 615.19921875 MiB

 Tempo di inferenza: 1.441871166229248 secondi
Uso della memoria durante l'inferenza: 615.20703125 MiB
Early stopping at iteration 12

 Tempo di inferenza sul test: 1.5843160152435303 secondi
Uso della memoria durante l'inferenza sul test: 608.421875 MiB
{'Accuracy': 0.97, 'Precision': 0.971, 'Recall': 0.885, 'F1': 0.926, 'MCC': 0.909, 'AUC_PR': 0.969, 'AUC_ROC': 0.99, 'PREC_N_SCORES': 0.912}


### XGBOD + ESN

In [46]:
import numpy as np
from reservoirpy.nodes import Reservoir, Ridge
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# Definizione dei modelli unsupervised
unsupervised_models = [
    KNN(),
    LOF(),
    ABOD(),
    OCSVM()
]

# Creazione del reservoir
reservoir = Reservoir(units=1000, sr=0.95)  # sr: raggio spettrale
# Creazione del nodo di output per il readout
readout = Ridge(ridge=1e-5)
# Connessione del reservoir al readout per creare l'ESN
reservoir >> readout

# Pipeline di preprocessing
preprocessor = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler()),
    ('reservoir', reservoir)
])

# Trasformazione dei dati di addestramento e test con ESN
# Addestramento del modello
readout.fit(reservoir.run(X_train_scaled), X_train_scaled)  # Si allena il readout sugli stati del reservoir

# Predizione per il rilevamento di anomalie
X_train_transformed = reservoir.run(X_train_scaled)
X_test_transformed = reservoir.run(X_test_scaled)

# Creazione del modello XGBOD con parametri specificati
model = XGBOD(estimator_list=unsupervised_models, n_estimators=50, max_depth=3, learning_rate=0.1, n_jobs=-1, random_state=42)
# Uso le trasformazioni di ESN con il modello XGBOD
model.fit(X_train_transformed, y_train)

# Predizione sui dati di test
y_pred = model.predict(X_test_transformed)
y_predicted_score = model.decision_function(X_test_transformed)

# Valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(f"Model: {model}")
print(f"Metrics: {metrics}")


Running Reservoir-38: 100%|██████████| 1594/1594 [00:00<00:00, 2836.66it/s]
Running Reservoir-38: 100%|██████████| 1594/1594 [00:00<00:00, 2651.93it/s]
Running Reservoir-38: 100%|██████████| 529/529 [00:00<00:00, 2925.92it/s]
Parameters: { "silent" } are not used.



Model: XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, n...3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=50, n_jobs=-1, nthread=None,
   objective='binary:logistic', random_state=42, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True], subsample=1)
Metrics: {'Accuracy': 0.951, 'Precision': 0.922, 'Recall': 0.841, 'F1': 0.88, 'MCC': 0.85, 'AUC_PR': 0.958, 'AUC_ROC': 0.986, 'PREC_N_SCORES': 0.876}


### Batch Processing
Addestra il modello su piccole porzioni migliorando il carico sulla memoria e la velocità

In [47]:
import numpy as np
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Dividi il dataset in batch
n_batches = 10  # Specifica il numero di batch che vuoi
X_train_batches = np.array_split(X_train_scaled, n_batches)
y_train_batches = np.array_split(y_train, n_batches)

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]

# Inizializza i modelli per ciascun batch
models = []
for X_batch, y_batch in zip(X_train_batches, y_train_batches):
    # Inizializza e addestra il modello
    model = XGBOD(estimator_list=unsupervised_models,
                  n_estimators=100,
                  max_depth=3,
                  learning_rate=0.2,
                  n_jobs=-1,
                  random_state=SEED
                )
    model.fit(X_batch, y_batch)
    models.append(model)

# Prevedi gli outlier nel dataset di test e combinalo
y_pred_scores = np.zeros_like(X_test_scaled[:, 0], dtype=float)
for model in models:
    y_pred_scores += model.decision_function(X_test_scaled)

# Media dei punteggi di decisione
y_pred_scores /= n_batches
y_pred = (y_pred_scores > np.mean(y_pred_scores)).astype(int)

# Esegui la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_pred_scores)
print("")
print(metrics)


  return bound(*args, **kwds)
Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.




{'Accuracy': 0.749, 'Precision': 0.455, 'Recall': 0.885, 'F1': 0.601, 'MCC': 0.496, 'AUC_PR': 0.878, 'AUC_ROC': 0.918, 'PREC_N_SCORES': 0.796}


#### Cross Validation

In [48]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SelectKBest, f_classif
from pyod.models.xgbod import XGBOD
from sklearn.model_selection import cross_val_score
import numpy as np

# Preprocessing and model pipeline
preprocessor = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('feature_selection', SelectKBest(score_func=f_classif, k=10)),
    ('classifier', XGBOD(n_estimators=50, max_depth=3, learning_rate=0.1))
])

# Cross-validation with pipeline
scores = cross_val_score(pipeline, X_train_scaled, y_train, cv=5, scoring='roc_auc', n_jobs=-1)
print(f"Cross-validation scores: {scores}")
print(f"Mean ROC AUC score: {np.mean(scores)}")

# Train and evaluate model
pipeline.fit(X_train_scaled, y_train)
y_pred = pipeline.predict(X_test_scaled)
y_predicted_score = pipeline.decision_function(X_test_scaled)

metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)
print(pipeline.named_steps['classifier'], metrics)


Cross-validation scores: [nan nan nan nan nan]
Mean ROC AUC score: nan


Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=1, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=1, no...ax_features=1.0,
    max_samples='auto', n_estimators=200, n_jobs=1, random_state=0,
    verbose=0)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=50, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, Fa

#### XGBOD con ricerca iperparametri con "grid"

In [49]:
from sklearn.model_selection import RandomizedSearchCV
from pyod.models.xgbod import XGBOD
import numpy as np

# Definizione della griglia di parametri
param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5],
    'learning_rate': [0.01, 0.1]
}

# Inizializza il modello
model = XGBOD()

# Randomized search con meno iterazioni e parallelizzazione
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, cv=3, scoring='roc_auc', random_state=42, n_jobs=-1)
random_search.fit(X_train_scaled, y_train)

# Migliori parametri trovati
best_params = random_search.best_params_
print(f"Best parameters found: {best_params}")

# Riaddestramento del modello con i migliori parametri
model = XGBOD(**best_params)
model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)


Parameters: { "silent" } are not used.



Best parameters found: {'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.01}


Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=1, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=1, no...ax_features=1.0,
    max_samples='auto', n_estimators=200, n_jobs=1, random_state=0,
    verbose=0)],
   gamma=0, learning_rate=0.01, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=50, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, F

### FCNN

In [50]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Definisci il modello FCNN
model = Sequential([
    Conv1D(64, 3, activation='relu', input_shape=(X_train_scaled.shape[1], 1)),
    MaxPooling1D(2),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Poiché si tratta di una classificazione binaria
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Addestra il modello
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))

# Prevedi gli outlier nel dataset di test
y_pred = (model.predict(X_test_scaled) > 0.5).astype("int32")
y_predicted_score = model.predict(X_test_scaled)

metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)

Using TensorFlow backend.


ImportError: cannot import name '_v1_name_scope' from 'keras.backend' (c:\Users\franc\OneDrive - University of Pisa\Università\Tesi\OPS-SAT-AD\OPS-SAT-AD-main\.venv\Lib\site-packages\keras\backend\__init__.py)

# Rocket

In [51]:
from sktime.transformations.panel.rocket import Rocket
from pyod.models.xgbod import XGBOD
import numpy as np


# 2. Applica ROCKET
rocket = Rocket(num_kernels=10000)
rocket.fit(X_train_scaled, y_train)
features = rocket.transform(X_train_scaled)
# Verifica che il numero di campioni di features e y_test sia lo stesso
assert features.shape[0] == y_test.shape[0], "Il numero di campioni non corrisponde!"


# 3. Rilevamento delle anomalie
model = XGBOD(contamination=0.01, random_state=42)  # Modello non supervisionato
anomaly_scores = model.fit_predict(features, y_test)


AssertionError: Il numero di campioni non corrisponde!

## Rilevamento di anomalie UNSUPERVISED

In [None]:
import numpy as np
import pandas as pd
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score

# Funzioni già definite in precedenza
def generate_kernels(input_length, num_kernels):
    candidate_lengths = np.array((7, 9, 11), dtype=np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype=np.float64)
    biases = np.zeros(num_kernels, dtype=np.float64)
    dilations = np.zeros(num_kernels, dtype=np.int32)
    paddings = np.zeros(num_kernels, dtype=np.int32)

    a1 = 0
    for i in range(num_kernels):
        _length = lengths[i]
        _weights = np.random.normal(0, 1, _length)
        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()
        biases[i] = np.random.uniform(-1, 1)
        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation
        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding
        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath=True)
def apply_kernel(X, weights, length, bias, dilation, padding):
    input_length = len(X)
    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)
    _ppv = 0
    _max = np.NINF
    _mean_sum = 0  # Per calcolare la media
    end = (input_length + padding) - ((length - 1) * dilation)
    for i in range(-padding, end):
        _sum = bias
        index = i
        for j in range(length):
            if index > -1 and index < input_length:
                _sum += weights[j] * X[index]
            index += dilation
        _mean_sum += _sum  # Aggiungi al totale per la media
        if _sum > _max:
            _max = _sum
        if _sum > 0:
            _ppv += 1
    mean_response = _mean_sum / output_length  # Calcola la media
    return _ppv / output_length, _max, mean_response

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel=True, fastmath=True)
def apply_kernels(X, kernels):
    weights, lengths, biases, dilations, paddings = kernels
    num_examples, _ = X.shape
    num_kernels = len(lengths)
    _X = np.zeros((num_examples, num_kernels * 3), dtype=np.float64)  # 3 features per kernel
    for i in prange(num_examples):
        a1 = 0  # Per i pesi
        a2 = 0  # Per le caratteristiche
        for j in range(num_kernels):
            b1 = a1 + lengths[j]
            b2 = a2 + 3
            _X[i, a2:b2] = apply_kernel(
                X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j]
            )
            a1 = b1
            a2 = b2
    return _X

def detect_anomalies_with_threshold(scores, threshold):
    return (scores > threshold).astype(int)

# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 10000
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train2, kernels)
features_test = apply_kernels(X_test2, kernels)

# Sintesi delle caratteristiche per esempio
anomaly_scores_train = np.mean(features_train, axis=1)  # Media
anomaly_scores_test = np.mean(features_test, axis=1)  # Media

# Rilevamento delle anomalie
threshold = np.percentile(anomaly_scores_train , 95)
anomaly_labels_train = detect_anomalies_with_threshold(anomaly_scores_train , threshold)
anomaly_labels_test = detect_anomalies_with_threshold(anomaly_scores_test , threshold)

# Visualizzazione dei risultati
print("Anomalie rilevate nel training set:", anomaly_labels_train)
print("Anomalie rilevate nel test set:", anomaly_labels_test)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, anomaly_labels_test, y_proba=anomaly_scores_test)
print("Metriche di valutazione sul test set:\n", metrics)
# {'Accuracy': 0.832, 'Precision': 0.962, 'Recall': 0.221, 'F1': 0.36, 'MCC': 0.415, 'AUC_PR': 0.726, 'AUC_ROC': 0.772, 'PREC_N_SCORES': 0.646}

Anomalie rilevate nel training set: [1 0 0 ... 0 0 0]
Anomalie rilevate nel test set: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0
 0 0 0 0 0 0 1

## Rilevamento di anomalie SUPERVISED

In [24]:
import numpy as np
import pandas as pd
from pyod.models.xgbod import XGBOD
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score

@njit("Tuple((float64[:],int32[:],float64[:],int32[:],int32[:]))(int64,int64)")
def generate_kernels(input_length, num_kernels):

    candidate_lengths = np.array((7, 9, 11), dtype = np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype = np.float64)
    biases = np.zeros(num_kernels, dtype = np.float64)
    dilations = np.zeros(num_kernels, dtype = np.int32)
    paddings = np.zeros(num_kernels, dtype = np.int32)

    a1 = 0

    for i in range(num_kernels):

        _length = lengths[i]

        _weights = np.random.normal(0, 1, _length)

        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()

        biases[i] = np.random.uniform(-1, 1)

        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation

        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding

        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding):

    input_length = len(X)

    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (input_length + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < input_length:

                _sum = _sum + weights[j] * X[index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return _ppv / output_length, _max

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel = True, fastmath = True)
def apply_kernels(X, kernels):

    weights, lengths, biases, dilations, paddings = kernels

    num_examples, _ = X.shape
    num_kernels = len(lengths)

    _X = np.zeros((num_examples, num_kernels * 2), dtype = np.float64) # 2 features per kernel

    for i in prange(num_examples):

        a1 = 0 # for weights
        a2 = 0 # for features

        for j in range(num_kernels):

            b1 = a1 + lengths[j]
            b2 = a2 + 2

            _X[i, a2:b2] = \
            apply_kernel(X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j])

            a1 = b1
            a2 = b2

    return _X
    weights, lengths, biases, dilations, paddings = kernels
    num_examples, _ = X.shape
    num_kernels = len(lengths)
    _X = np.zeros((num_examples, num_kernels * 3), dtype=np.float64)  # 3 features per kernel
    for i in prange(num_examples):
        a1 = 0  # Per i pesi
        a2 = 0  # Per le caratteristiche
        for j in range(num_kernels):
            b1 = a1 + lengths[j]
            b2 = a2 + 3
            _X[i, a2:b2] = apply_kernel(
                X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j]
            )
            a1 = b1
            a2 = b2
    return _X

# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 1000 # Valore standard
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train_scaled, kernels)
features_test = apply_kernels(X_test_scaled, kernels)

# Addestramento del modello supervisionato
model = XGBOD(n_estimators=100, max_depth=3, learning_rate=0.1, random_state=SEED)
model.fit(features_train, y_train)

# Predizione delle anomalie nei dati di test
y_pred = model.predict(features_test)
y_proba = model.predict_proba(features_test)

# Visualizzazione dei risultati
print("Predizioni nel test set:", y_pred)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_proba=y_proba)
print("Metriche di valutazione:\n", metrics)

# Scaled -> {'Accuracy': 0.6, 'Precision': 0.7, 'Recall': 0.583, 'F1': 0.636, 'MCC': 0.204, 'AUC_PR': 0.632, 'AUC_ROC': 0.542}
# Non Scaled -> {'Accuracy': 0.7, 'Precision': 0.75, 'Recall': 0.75, 'F1': 0.75, 'MCC': 0.375, 'AUC_PR': 0.712, 'AUC_ROC': 0.656}



KeyboardInterrupt: 

Con standard Scaler

In [None]:
import numpy as np
import pandas as pd
from pyod.models.iforest import IsolationForest
from pyod.models.knn import KNN
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score

@njit("Tuple((float64[:],int32[:],float64[:],int32[:],int32[:]))(int64,int64)")
def generate_kernels(input_length, num_kernels):

    candidate_lengths = np.array((7, 9, 11), dtype = np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype = np.float64)
    biases = np.zeros(num_kernels, dtype = np.float64)
    dilations = np.zeros(num_kernels, dtype = np.int32)
    paddings = np.zeros(num_kernels, dtype = np.int32)

    a1 = 0

    for i in range(num_kernels):

        _length = lengths[i]

        _weights = np.random.normal(0, 1, _length)

        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()

        biases[i] = np.random.uniform(-1, 1)

        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation

        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding

        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding):

    input_length = len(X)

    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (input_length + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < input_length:

                _sum = _sum + weights[j] * X[index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return _ppv / output_length, _max

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel = True, fastmath = True)
def apply_kernels(X, kernels):

    weights, lengths, biases, dilations, paddings = kernels

    num_examples, _ = X.shape
    num_kernels = len(lengths)

    _X = np.zeros((num_examples, num_kernels * 2), dtype = np.float64) # 2 features per kernel

    for i in prange(num_examples):

        a1 = 0 # for weights
        a2 = 0 # for features

        for j in range(num_kernels):

            b1 = a1 + lengths[j]
            b2 = a2 + 2

            _X[i, a2:b2] = \
            apply_kernel(X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j])

            a1 = b1
            a2 = b2

    return _X

# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 1000
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train2, kernels)
features_test = apply_kernels(X_test2, kernels)


# Addestramento del modello supervisionato
model = KNN()
model.fit(features_train)

# Predizione delle anomalie nei dati di test
y_pred = model.predict(features_test)
y_proba = model.decision_function(features_test)

# Visualizzazione dei risultati
print("Predizioni nel test set:", y_pred)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_proba)
print("Metriche di valutazione:\n", metrics)
# {'Accuracy': 0.845, 'Precision': 0.763, 'Recall': 0.398, 'F1': 0.523, 'MCC': 0.475, 'AUC_PR': 0.619, 'AUC_ROC': 0.811, 'PREC_N_SCORES': 0.54}

Predizioni nel test set: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 1 0 1 0 0 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0
 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1
 0 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


### Regressione Logistica -> Classificatore lineare

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from pyod.models.knn import KNN
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score

@njit("Tuple((float64[:],int32[:],float64[:],int32[:],int32[:]))(int64,int64)")
def generate_kernels(input_length, num_kernels):

    candidate_lengths = np.array((7, 9, 11), dtype = np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype = np.float64)
    biases = np.zeros(num_kernels, dtype = np.float64)
    dilations = np.zeros(num_kernels, dtype = np.int32)
    paddings = np.zeros(num_kernels, dtype = np.int32)

    a1 = 0

    for i in range(num_kernels):

        _length = lengths[i]

        _weights = np.random.normal(0, 1, _length)

        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()

        biases[i] = np.random.uniform(-1, 1)

        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation

        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding

        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding):

    input_length = len(X)

    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (input_length + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < input_length:

                _sum = _sum + weights[j] * X[index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return _ppv / output_length, _max

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel = True, fastmath = True)
def apply_kernels(X, kernels):

    weights, lengths, biases, dilations, paddings = kernels

    num_examples, _ = X.shape
    num_kernels = len(lengths)

    _X = np.zeros((num_examples, num_kernels * 2), dtype = np.float64) # 2 features per kernel

    for i in prange(num_examples):

        a1 = 0 # for weights
        a2 = 0 # for features

        for j in range(num_kernels):

            b1 = a1 + lengths[j]
            b2 = a2 + 2

            _X[i, a2:b2] = \
            apply_kernel(X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j])

            a1 = b1
            a2 = b2

    return _X

# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 1000
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train2, kernels)
features_test = apply_kernels(X_test2, kernels)


# Addestramento del modello supervisionato
model = LogisticRegression(max_iter=1000)
model.fit(features_train, y_train)

# Predizione delle anomalie nei dati di test
y_pred = model.predict(features_test)
y_proba = model.decision_function(features_test)

# Visualizzazione dei risultati
print("Predizioni nel test set:", y_pred)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_proba)
print("Metriche di valutazione:\n", metrics)
# {'Accuracy': 0.977, 'Precision': 0.972, 'Recall': 0.92, 'F1': 0.945, 'MCC': 0.932, 'AUC_PR': 0.962, 'AUC_ROC': 0.984, 'PREC_N_SCORES': 0.929}

Predizioni nel test set: [0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 1 1 0 1 0 0 1
 0 0 1 0 0 1 1 0 1 1 1 0 1 0 0 0 0 0 0 1 1 1 0 1 0 0 1 0 1 1 1 0 1 1 1 1 1
 0 1 0 1 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 1 0 0 1 0 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 1 0 1 1 0 0 0
 0 0 1 0 1 0 0 1 1 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0
 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0
 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 1 1 1 0 0 0 0 0 0
 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


### Prova con Dettagli dal GitHub del Paper

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score

@njit("Tuple((float64[:],int32[:],float64[:],int32[:],int32[:]))(int64,int64)")
def generate_kernels(input_length, num_kernels):

    candidate_lengths = np.array((7, 9, 11), dtype = np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype = np.float64)
    biases = np.zeros(num_kernels, dtype = np.float64)
    dilations = np.zeros(num_kernels, dtype = np.int32)
    paddings = np.zeros(num_kernels, dtype = np.int32)

    a1 = 0

    for i in range(num_kernels):

        _length = lengths[i]

        _weights = np.random.normal(0, 1, _length)

        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()

        biases[i] = np.random.uniform(-1, 1)

        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation

        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding

        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding):

    input_length = len(X)

    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (input_length + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < input_length:

                _sum = _sum + weights[j] * X[index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return _ppv / output_length, _max

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel = True, fastmath = True)
def apply_kernels(X, kernels):

    weights, lengths, biases, dilations, paddings = kernels

    num_examples, _ = X.shape
    num_kernels = len(lengths)

    _X = np.zeros((num_examples, num_kernels * 2), dtype = np.float64) # 2 features per kernel

    for i in prange(num_examples):

        a1 = 0 # for weights
        a2 = 0 # for features

        for j in range(num_kernels):

            b1 = a1 + lengths[j]
            b2 = a2 + 2

            _X[i, a2:b2] = \
            apply_kernel(X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j])

            a1 = b1
            a2 = b2

    return _X

def detect_anomalies_with_threshold(scores, threshold):
    return (scores > threshold).astype(int)


# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 1000
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train2, kernels)
features_test = apply_kernels(X_test2, kernels)


# Addestramento del modello supervisionato
model = Ridge(alpha=1.0)
model.fit(features_train, y_train)

# Predizione delle anomalie nei dati di test
anomaly_scores_test = model.predict(features_test)
anomaly_scores_train = model.predict(features_train)

# Rilevamento delle anomalie
threshold = np.percentile(anomaly_scores_train , 95)
anomaly_labels_train = detect_anomalies_with_threshold(anomaly_scores_train , threshold)
anomaly_labels_test = detect_anomalies_with_threshold(anomaly_scores_test , threshold)

# Visualizzazione dei risultati
print("Predizioni nel test set:", anomaly_labels_test)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, anomaly_labels_test, y_proba=anomaly_scores_test)
print("Metriche di valutazione:\n", metrics)
#  {'Accuracy': 0.888, 'Precision': 0.966, 'Recall': 0.496, 'F1': 0.655, 'MCC': 0.644, 'AUC_PR': 0.922, 'AUC_ROC': 0.962, 'PREC_N_SCORES': 0.912}

Predizioni nel test set: [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0
 0 0 0 0 0 1 1 0 1 1 1 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 1 0 0 0 1 1 1
 0 1 0 1 0 0 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 0 0 0
 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 1 1 1 0 0 0 0 0 1 0
 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
