In [5]:
import os
import random
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef, average_precision_score
from pyod.utils.data import precision_n_scores
from pyod.models.iforest import IForest
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
# Per l'uso della memoria degli algoritmi
from memory_profiler import memory_usage
# Per la metrica sul tempo di Addestramento e Inferenza
import time

In [3]:
def evaluate_metrics(y_test, y_pred, y_proba=None, digits=3):
    res = {"Accuracy": round(accuracy_score(y_test, y_pred), digits),
           "Precision": precision_score(y_test, y_pred).round(digits),
           "Recall": recall_score(y_test, y_pred).round(digits),
           "F1": f1_score(y_test, y_pred).round(digits),
           "MCC": round(matthews_corrcoef(y_test, y_pred), ndigits=digits)}
    if y_proba is not None:
        res["AUC_PR"] = average_precision_score(y_test, y_proba).round(digits)
        res["AUC_ROC"] = roc_auc_score(y_test, y_proba).round(digits)
        res["PREC_N_SCORES"] = precision_n_scores(y_test, y_proba).round(digits)
    return res


def set_seed_numpy(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

In [10]:
features = [
    "mean", "var", "std", "len", "duration", "len_weighted", "gaps_squared", "n_peaks",
    "smooth10_n_peaks", "smooth20_n_peaks", "var_div_duration", "var_div_len",
    "diff_peaks", "diff2_peaks", "diff_var", "diff2_var", "kurtosis", "skew",
]
SEED = 2137

In [11]:
df = pd.read_csv("data/dataset.csv", index_col="segment")

X_train, y_train = df.loc[df.train==1, features], df.loc[df.train==1, "anomaly"]
print(y_train)
X_test, y_test = df.loc[df.train==0, features], df.loc[df.train==0, "anomaly"]
X_train_nominal = df.loc[(df.anomaly==0)&(df.train==1), features]

prep = StandardScaler()
X_train_nominal2 = prep.fit_transform(X_train_nominal)
X_train2 = prep.transform(X_train)
X_test2 = prep.transform(X_test)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print("X_train", X_train.shape)
print("X_test", X_test.shape)
print("X_train2", X_train2.shape)
print("X_test2", X_test2.shape)


segment
1       1
2       1
3       1
4       1
6       0
       ..
2118    0
2120    0
2121    0
2122    0
2123    1
Name: anomaly, Length: 1594, dtype: int64
X_train (1594, 18)
X_test (529, 18)
X_train2 (1594, 18)
X_test2 (529, 18)


In [12]:
set_seed_numpy(SEED) 

# Supervised Model

In [12]:
model = AdaBoostClassifier(random_state=SEED)
model.fit(X_train2, y_train)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))



AdaBoostClassifier(random_state=2137) 
 {'Accuracy': 0.934, 'Precision': 0.89, 'Recall': 0.788, 'F1': 0.836, 'MCC': 0.797, 'AUC_PR': 0.923, 'AUC_ROC': 0.962, 'PREC_N_SCORES': 0.841}


In [13]:
import xgboost as xgb

y_train_np = y_train

model = xgb.XGBClassifier (
    n_estimators=50,
    max_depth=3,
    learning_rate=0.1,
    random_state=SEED
)
model.fit(X_train, y_train)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test)
y_predicted_score = model.predict_proba(X_test)[:, 1]  # Probabilità per la classe positiva
# Questa è la probabilità che la classificazione sia corretta

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=50, n_jobs=None,
              num_parallel_tree=None, random_state=2137, ...) 
 {'Accuracy': 0.957, 'Precision': 0.959, 'Recall': 0.832, 'F1': 0.891, 'MCC': 0.867, 'AUC_PR': 0.961, 'AUC_ROC': 0.986, 'PREC_N_SCORES': 0.876}


In [14]:
import xgboost as xgb

y_train_np = y_train

model = xgb.XGBClassifier (
    n_estimators=50,
    max_depth=3,
    learning_rate=0.1,
    random_state=SEED
)
model.fit(X_train_scaled, y_train)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test_scaled)
y_predicted_score = model.predict_proba(X_test_scaled)[:, 1]  # Probabilità per la classe positiva
# Questa è la probabilità che la classificazione sia corretta

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=50, n_jobs=None,
              num_parallel_tree=None, random_state=2137, ...) 
 {'Accuracy': 0.953, 'Precision': 0.94, 'Recall': 0.832, 'F1': 0.883, 'MCC': 0.856, 'AUC_PR': 0.949, 'AUC_ROC': 0.976, 'PREC_N_SCORES': 0.867}


In [15]:
from sklearn.svm import LinearSVC

# Inizializza e addestra il modello
model = LinearSVC()
model.fit(X_train2, y_train)

# Predizione
y_test_scores = model.decision_function(X_test2)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test2)

# Questa è la probabilità che la classificazione sia corretta
print(evaluate_metrics(y_test, y_predicted, y_predicted_score))

{'Accuracy': 0.928, 'Precision': 0.921, 'Recall': 0.726, 'F1': 0.812, 'MCC': 0.777, 'AUC_PR': 0.949, 'AUC_ROC': 0.976, 'PREC_N_SCORES': 0.867}


In [16]:
from sklearn.linear_model import LogisticRegression

# Inizializza e addestra il modello
model = LogisticRegression(max_iter=500)
model.fit(X_train2, y_train)

# Predizione
y_test_scores = model.decision_function(X_test2)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test2)

# Questa è la probabilità che la classificazione sia corretta
print(evaluate_metrics(y_test, y_predicted, y_predicted_score))

{'Accuracy': 0.924, 'Precision': 0.92, 'Recall': 0.708, 'F1': 0.8, 'MCC': 0.764, 'AUC_PR': 0.949, 'AUC_ROC': 0.976, 'PREC_N_SCORES': 0.867}


# Unsupervised Model

MO_GAAL

In [12]:
from pyod.models.mo_gaal import MO_GAAL
import os
os.environ['TF_USE_LEGACY_KERAS'] = 'True'

model = MO_GAAL()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))
 # {'Accuracy': 0.896, 'Precision': 0.939, 'Recall': 0.549, 'F1': 0.693, 'MCC': 0.669, 'AUC_PR': 0.771, 'AUC_ROC': 0.849, 'PREC_N_SCORES': 0.699}

KeyboardInterrupt: 

ANO-GAAL

In [None]:
import os
os.environ["TF_USE_LEGACY_KERAS"] = "True"

# Ora importa PyOD e usa AnoGAN come prima
from pyod.models.anogan import AnoGAN
import tensorflow as tf

model = AnoGAN(verbose=1) # per stampare più cose
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))


Train iter: 1
Train iter: 2
Train iter: 3
Train iter: 4
Train iter: 5
Train iter: 6
Train iter: 7
Train iter: 8
Train iter: 9
Train iter: 10
Train iter: 11
Train iter: 12
Train iter: 13
Train iter: 14
Train iter: 15
Train iter: 16
Train iter: 17
Train iter: 18
Train iter: 19
Train iter: 20
Train iter: 21
Train iter: 22
Train iter: 23
Train iter: 24
Train iter: 25
Train iter: 26
Train iter: 27
Train iter: 28
Train iter: 29
Train iter: 30
Train iter: 31
Train iter: 32
Train iter: 33
Train iter: 34
Train iter: 35
Train iter: 36
Train iter: 37
Train iter: 38
Train iter: 39
Train iter: 40
Train iter: 41
Train iter: 42
Train iter: 43
Train iter: 44
Train iter: 45
Train iter: 46
Train iter: 47
Train iter: 48
Train iter: 49
Train iter: 50
Train iter: 51
Train iter: 52
Train iter: 53
Train iter: 54
Train iter: 55
Train iter: 56
Train iter: 57
Train iter: 58
Train iter: 59
Train iter: 60
Train iter: 61
Train iter: 62
Train iter: 63
Train iter: 64
Train iter: 65
Train iter: 66
Train iter: 67
Trai

KeyboardInterrupt: 

SO_GAAL

In [None]:
from pyod.models.so_gaal import SO_GAAL

# Verifica le dimensioni dei dati generati
print("Dimensione X_train:", X_train.shape)
print("Dimensione y_train:", y_train.shape)
print("Dimensione X_test:", X_test.shape)
print("Dimensione y_test:", y_test.shape)

model = SO_GAAL()
model.fit(X_train2[:len(X_train2) // 500 * 500])

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

# Valutazione del modello
print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))


Dimensione X_train: (1594, 18)
Dimensione y_train: (1594,)
Dimensione X_test: (529, 18)
Dimensione y_test: (529,)
Epoch 1 of 60
Epoch 2 of 60
Epoch 3 of 60
Epoch 4 of 60
Epoch 5 of 60
Epoch 6 of 60
Epoch 7 of 60
Epoch 8 of 60
Epoch 9 of 60
Epoch 10 of 60
Epoch 11 of 60
Epoch 12 of 60
Epoch 13 of 60
Epoch 14 of 60
Epoch 15 of 60
Epoch 16 of 60
Epoch 17 of 60
Epoch 18 of 60
Epoch 19 of 60
Epoch 20 of 60
Epoch 21 of 60
Epoch 22 of 60
Epoch 23 of 60
Epoch 24 of 60
Epoch 25 of 60
Epoch 26 of 60
Epoch 27 of 60
Epoch 28 of 60
Epoch 29 of 60
Epoch 30 of 60
Epoch 31 of 60
Epoch 32 of 60
Epoch 33 of 60
Epoch 34 of 60
Epoch 35 of 60
Epoch 36 of 60
Epoch 37 of 60
Epoch 38 of 60
Epoch 39 of 60
Epoch 40 of 60
Epoch 41 of 60
Epoch 42 of 60
Epoch 43 of 60
Epoch 44 of 60
Epoch 45 of 60
Epoch 46 of 60
Epoch 47 of 60
Epoch 48 of 60
Epoch 49 of 60
Epoch 50 of 60
Epoch 51 of 60
Epoch 52 of 60
Epoch 53 of 60
Epoch 54 of 60
Epoch 55 of 60
Epoch 56 of 60
Epoch 57 of 60
Epoch 58 of 60
Epoch 59 of 60
Epoch 60 o

RF+ICCS

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Inizializza e addestra il modello
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train, y_train)

# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test)
# Predizione
y_test_scores = model.predict_proba(X_test)

# Questa è la probabilità che la classificazione sia corretta
print(evaluate_metrics(y_test, y_predicted, y_predicted_score))

NameError: name 'X_train' is not defined

Linear+L2

In [None]:
from sklearn.linear_model import RidgeClassifier

# Inizializza e addestra il modello Ridge Classifier (Linear + L2)
model = RidgeClassifier(alpha=1.0)  # 'alpha' è il parametro di regolarizzazione L2
model.fit(X_train, y_train)

# Predizione delle etichette di classe
y_predicted = model.predict(X_test)

# Ottieni le probabilità della classe positiva per AUC (si utilizza decision_function per ottenere punteggi di decisione)
y_test_scores = model.decision_function(X_test)

# Calcola e stampa le metriche
metrics = evaluate_metrics(y_test, y_predicted, y_test_scores)
print(metrics)


{'Accuracy': 0.902, 'Precision': 0.969, 'Recall': 0.558, 'F1': 0.708, 'MCC': 0.69, 'AUC_PR': 0.889, 'AUC_ROC': 0.95, 'PREC_N_SCORES': 0.814}


Isolation Forest

In [None]:
model = IForest(random_state=SEED, contamination=.2)
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

IForest(behaviour='old', bootstrap=False, contamination=0.2, max_features=1.0,
    max_samples='auto', n_estimators=100, n_jobs=1, random_state=2137,
    verbose=0) 
 {'Accuracy': 0.701, 'Precision': 0.297, 'Recall': 0.292, 'F1': 0.295, 'MCC': 0.105, 'AUC_PR': 0.347, 'AUC_ROC': 0.635, 'PREC_N_SCORES': 0.301}


KNN

In [None]:
from pyod.models.knn import KNN

model = KNN()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0) 
 {'Accuracy': 0.849, 'Precision': 0.78, 'Recall': 0.407, 'F1': 0.535, 'MCC': 0.489, 'AUC_PR': 0.658, 'AUC_ROC': 0.852, 'PREC_N_SCORES': 0.593}


OCSVM

In [None]:
from pyod.models.ocsvm import OCSVM

model = OCSVM()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False) 
 {'Accuracy': 0.837, 'Precision': 0.721, 'Recall': 0.389, 'F1': 0.506, 'MCC': 0.447, 'AUC_PR': 0.659, 'AUC_ROC': 0.788, 'PREC_N_SCORES': 0.655}


ABOD

In [None]:
from pyod.models.abod import ABOD

model = ABOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

ABOD(contamination=0.1, method='fast', n_neighbors=5) 
 {'Accuracy': 0.845, 'Precision': 0.782, 'Recall': 0.381, 'F1': 0.512, 'MCC': 0.472, 'AUC_PR': 0.644, 'AUC_ROC': 0.843, 'PREC_N_SCORES': 0.584}


INNE

In [None]:
from pyod.models.inne import INNE

model = INNE()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

INNE(contamination=0.1, max_samples='auto', n_estimators=200,
   random_state=None) 
 {'Accuracy': 0.832, 'Precision': 0.694, 'Recall': 0.381, 'F1': 0.491, 'MCC': 0.427, 'AUC_PR': 0.636, 'AUC_ROC': 0.805, 'PREC_N_SCORES': 0.655}


ALAD

In [None]:
from pyod.models.alad import ALAD

model = ALAD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

ALAD(activation_hidden_disc='tanh', activation_hidden_gen='tanh',
   add_disc_zz_loss=True, add_recon_loss=False, batch_size=32,
   contamination=0.1, dec_layers=[5, 10, 25], device=device(type='cpu'),
   disc_xx_layers=[25, 10, 5], disc_xz_layers=[25, 10, 5],
   disc_zz_layers=[25, 10, 5], dropout_rate=0.2, enc_layers=[25, 10, 5],
   epochs=200, lambda_recon_loss=0.1, latent_dim=2,
   learning_rate_disc=0.0001, learning_rate_gen=0.0001,
   output_activation=None, preprocessing=False,
   spectral_normalization=False, verbose=0) 
 {'Accuracy': 0.783, 'Precision': 0.485, 'Recall': 0.283, 'F1': 0.358, 'MCC': 0.25, 'AUC_PR': 0.426, 'AUC_ROC': 0.626, 'PREC_N_SCORES': 0.407}


LMDD

In [None]:
from pyod.models.lmdd import LMDD

model = LMDD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

LMDD(contamination=0.1, dis_measure='aad', n_iter=50, random_state=None) 
 {'Accuracy': 0.822, 'Precision': 1.0, 'Recall': 0.168, 'F1': 0.288, 'MCC': 0.37, 'AUC_PR': 0.624, 'AUC_ROC': 0.765, 'PREC_N_SCORES': 0.663}


SOD

In [None]:
from pyod.models.sod import SOD

model = SOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

SOD(alpha=0.8, contamination=0.1, n_neighbors=20, ref_set=10) 
 {'Accuracy': 0.826, 'Precision': 0.611, 'Recall': 0.513, 'F1': 0.558, 'MCC': 0.453, 'AUC_PR': 0.621, 'AUC_ROC': 0.797, 'PREC_N_SCORES': 0.549}


COF

In [None]:
from pyod.models.cof import COF

model = COF()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

COF(contamination=0.1, method='fast', n_neighbors=20) 
 {'Accuracy': 0.834, 'Precision': 0.667, 'Recall': 0.442, 'F1': 0.532, 'MCC': 0.449, 'AUC_PR': 0.603, 'AUC_ROC': 0.774, 'PREC_N_SCORES': 0.593}


LODA

In [None]:
from pyod.models.loda import LODA

model = LODA()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

LODA(contamination=0.1, n_bins=10, n_random_cuts=100) 
 {'Accuracy': 0.83, 'Precision': 0.689, 'Recall': 0.372, 'F1': 0.483, 'MCC': 0.418, 'AUC_PR': 0.549, 'AUC_ROC': 0.692, 'PREC_N_SCORES': 0.522}


LUNAR

In [None]:
from pyod.models.lunar import LUNAR

model = LUNAR()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

LUNAR(contamination=0.1, epsilon=0.1, lr=0.001, model_type='WEIGHT',
   n_epochs=200, n_neighbours=5, negative_sampling='MIXED', proportion=1.0,
   scaler=MinMaxScaler(), val_size=0.1, verbose=0, wd=0.1) 
 {'Accuracy': 0.815, 'Precision': 0.742, 'Recall': 0.204, 'F1': 0.319, 'MCC': 0.322, 'AUC_PR': 0.539, 'AUC_ROC': 0.796, 'PREC_N_SCORES': 0.451}


CBLOF

In [None]:
from pyod.models.cblof import CBLOF

model = CBLOF()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

CBLOF(alpha=0.9, beta=5, check_estimator=False, clustering_estimator=None,
   contamination=0.1, n_clusters=8, n_jobs=None, random_state=None,
   use_weights=False) 
 {'Accuracy': 0.802, 'Precision': 0.569, 'Recall': 0.292, 'F1': 0.386, 'MCC': 0.304, 'AUC_PR': 0.45, 'AUC_ROC': 0.574, 'PREC_N_SCORES': 0.372}


DIF

In [None]:
from pyod.models.dif import DIF

model = DIF()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.predict_proba(X_test2)[:,1]

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

DIF(batch_size=1000, contamination=0.1, device=device(type='cpu'),
  hidden_activation='tanh', hidden_neurons=[500, 100], max_samples=256,
  n_ensemble=50, n_estimators=6, random_state=None, representation_dim=20,
  skip_connection=False) 
 {'Accuracy': 0.786, 'Precision': 0.5, 'Recall': 0.009, 'F1': 0.017, 'MCC': 0.043, 'AUC_PR': 0.541, 'AUC_ROC': 0.836, 'PREC_N_SCORES': 0.584}


VAE

In [None]:
from pyod.models.vae import VAE

model = VAE()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

Training:   0%|          | 0/30 [00:00<?, ?it/s]

Training: 100%|██████████| 30/30 [00:11<00:00,  2.57it/s]


VAE(batch_norm=False, batch_size=32, beta=1.0, capacity=0.0,
  compile_mode='default', contamination=0.1,
  decoder_neuron_list=[32, 64, 128], device=device(type='cpu'),
  dropout_rate=0.2, encoder_neuron_list=[128, 64, 32], epoch_num=30,
  hidden_activation_name='relu', latent_dim=2, lr=0.001,
  optimizer_name='adam', optimizer_params={'weight_decay': 1e-05},
  output_activation_name='sigmoid', preprocessing=True, random_state=42,
  use_compile=False, verbose=1) 
 {'Accuracy': 0.794, 'Precision': 0.532, 'Recall': 0.292, 'F1': 0.377, 'MCC': 0.283, 'AUC_PR': 0.446, 'AUC_ROC': 0.687, 'PREC_N_SCORES': 0.513}


GMM

In [None]:
from pyod.models.gmm import GMM

model = GMM()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

GMM(contamination=0.1, covariance_type='full', init_params='kmeans',
  max_iter=100, means_init=None, n_components=1, n_init=1,
  precisions_init=None, random_state=None, reg_covar=1e-06, tol=0.001,
  warm_start=False, weights_init=None) 
 {'Accuracy': 0.783, 'Precision': 0.482, 'Recall': 0.239, 'F1': 0.32, 'MCC': 0.225, 'AUC_PR': 0.426, 'AUC_ROC': 0.713, 'PREC_N_SCORES': 0.389}


DeepSVDD

In [None]:
from pyod.models.deep_svdd import DeepSVDD

# Determina il numero di feature
n_features = X_train2.shape[1]

model = DeepSVDD(n_features=n_features)
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

Epoch 1/100, Loss: 36.17359483242035
Epoch 2/100, Loss: 36.19166633486748
Epoch 3/100, Loss: 36.2466336786747
Epoch 4/100, Loss: 36.13528761267662
Epoch 5/100, Loss: 36.165921211242676
Epoch 6/100, Loss: 36.13916572928429
Epoch 7/100, Loss: 36.189294904470444
Epoch 8/100, Loss: 36.17238187789917
Epoch 9/100, Loss: 36.2117395401001
Epoch 10/100, Loss: 36.185857594013214
Epoch 11/100, Loss: 36.13321906328201
Epoch 12/100, Loss: 36.1584706902504
Epoch 13/100, Loss: 36.17630282044411
Epoch 14/100, Loss: 36.17380636930466
Epoch 15/100, Loss: 36.25334322452545
Epoch 16/100, Loss: 36.1712027490139
Epoch 17/100, Loss: 36.12485006451607
Epoch 18/100, Loss: 36.4436274766922
Epoch 19/100, Loss: 36.22374951839447
Epoch 20/100, Loss: 36.2115415930748
Epoch 21/100, Loss: 36.16678577661514
Epoch 22/100, Loss: 36.20809951424599
Epoch 23/100, Loss: 36.228652626276016
Epoch 24/100, Loss: 36.154085248708725
Epoch 25/100, Loss: 36.138443648815155
Epoch 26/100, Loss: 36.5161928832531
Epoch 27/100, Loss: 36

PCA

In [None]:
from pyod.models.pca import PCA

model = PCA()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

PCA(contamination=0.1, copy=True, iterated_power='auto', n_components=None,
  n_selected_components=None, random_state=None, standardization=True,
  svd_solver='auto', tol=0.0, weighted=True, whiten=False) 
 {'Accuracy': 0.779, 'Precision': 0.464, 'Recall': 0.23, 'F1': 0.308, 'MCC': 0.21, 'AUC_PR': 0.373, 'AUC_ROC': 0.612, 'PREC_N_SCORES': 0.363}


COPOD

In [None]:
from pyod.models.copod import COPOD

model = COPOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

COPOD(contamination=0.1, n_jobs=1) 
 {'Accuracy': 0.767, 'Precision': 0.4, 'Recall': 0.177, 'F1': 0.245, 'MCC': 0.147, 'AUC_PR': 0.328, 'AUC_ROC': 0.627, 'PREC_N_SCORES': 0.257}


SOS

In [None]:
from pyod.models.sos import SOS

model = SOS()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

SOS(contamination=0.1, eps=1e-05, metric='euclidean', perplexity=4.5) 
 {'Accuracy': 0.758, 'Precision': 0.364, 'Recall': 0.177, 'F1': 0.238, 'MCC': 0.125, 'AUC_PR': 0.308, 'AUC_ROC': 0.524, 'PREC_N_SCORES': 0.274}


ECOD

In [None]:
from pyod.models.ecod import ECOD

model = ECOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

ECOD(contamination=0.1, n_jobs=1) 
 {'Accuracy': 0.767, 'Precision': 0.396, 'Recall': 0.168, 'F1': 0.236, 'MCC': 0.14, 'AUC_PR': 0.34, 'AUC_ROC': 0.637, 'PREC_N_SCORES': 0.345}


# XGBOD

In [None]:
from pyod.models.xgbod import XGBOD

# Inizializza e addestra XGBOD
model = XGBOD()
model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)
# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)

#n_estimators=50,
#max_depth=3,
#learning_rate=0.1,
#random_state=SEED

Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=1, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=1, no...ax_features=1.0,
    max_samples='auto', n_estimators=200, n_jobs=1, random_state=0,
    verbose=0)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, F

#### Con metiche di Memoria e Tempo

In [None]:
import time
from memory_profiler import memory_usage
from pyod.models.xgbod import XGBOD

# Inizializza e addestra XGBOD
model = XGBOD(n_estimators=50, max_depth=3, learning_rate=0.1, random_state=SEED)

def train_model():
    start_time = time.time()
    mem_usage = memory_usage((model.fit, (X_train_scaled, y_train)))
    training_time = time.time() - start_time
    print(f"\n Tempo di addestramento: {training_time} secondi")
    print(f"Uso della memoria durante l'addestramento: {max(mem_usage)} MiB")
    return training_time, mem_usage

def inference_model():
    start_time = time.time()
    mem_usage_inference = memory_usage((model.predict, (X_test_scaled,)))
    inference_time = time.time() - start_time
    y_pred = model.predict(X_test_scaled)
    print(f"\n Tempo di inferenza: {inference_time} secondi")
    print(f"Uso della memoria durante l'inferenza: {max(mem_usage_inference)} MiB")
    return y_pred, inference_time, mem_usage_inference



### XGBOD più modelli unsupervised

In [None]:
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]
# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models)

model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)
# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)

Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, n...3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True], subsample=1) {'Accuracy': 0.968, 'Precision': 0.944, 'Recall': 0.903, 'F1': 0.923, 'MCC': 0.903, 'AUC_PR': 0.974, 'AUC_ROC': 0.991, 'PREC_N_SCORES': 0.92}


#### Con Metriche di Tempo e Memoria

In [None]:
import time
from memory_profiler import memory_usage
from pyod.models.xgbod import XGBOD

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]
# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models)

def train_model():
    start_time = time.time()
    mem_usage = memory_usage((model.fit, (X_train_scaled, y_train)))
    training_time = time.time() - start_time
    print(f"\n Tempo di addestramento: {training_time} secondi")
    print(f"Uso della memoria durante l'addestramento: {max(mem_usage)} MiB")
    return training_time, mem_usage

def inference_model():
    start_time = time.time()
    mem_usage_inference = memory_usage((model.predict, (X_test_scaled,)))
    inference_time = time.time() - start_time
    y_pred = model.predict(X_test_scaled)
    print(f"\n Tempo di inferenza: {inference_time} secondi")
    print(f"Uso della memoria durante l'inferenza: {max(mem_usage_inference)} MiB")
    return y_pred, inference_time, mem_usage_inference

# Addestramento del modello e monitoraggio delle metriche di efficientamento
training_time, mem_usage = train_model()

# Inferenza del modello e monitoraggio delle metriche di efficientamento
y_pred, inference_time, mem_usage_inference = inference_model()

# Calcola i punteggi di decisione
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche con le nuove metriche di efficientamento
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.3419463634490967 secondi
Uso della memoria durante l'addestramento: 815.8125 MiB

 Tempo di inferenza: 1.605494499206543 secondi
Uso della memoria durante l'inferenza: 815.79296875 MiB
XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, n...3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True], su

### XGBOD più modelli unsupervised e Parametri

In [None]:
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]

# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models,
              n_estimators=100,
              max_depth=3,
              learning_rate=0.2,
              n_jobs=-1,
              random_state=SEED
            )

model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)
print("")
print(metrics)

Parameters: { "silent" } are not used.




{'Accuracy': 0.97, 'Precision': 0.945, 'Recall': 0.912, 'F1': 0.928, 'MCC': 0.909, 'AUC_PR': 0.973, 'AUC_ROC': 0.992, 'PREC_N_SCORES': 0.92}


#### Con Metriche di Tempo e Memoria

In [None]:
import time
from memory_profiler import memory_usage
from pyod.models.xgbod import XGBOD

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]
# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models, n_estimators=100, max_depth=3, learning_rate=0.2, random_state=SEED)

def train_model():
    start_time = time.time()
    mem_usage = memory_usage((model.fit, (X_train_scaled, y_train)))
    training_time = time.time() - start_time
    print(f"\n Tempo di addestramento: {training_time} secondi")
    print(f"Uso della memoria durante l'addestramento: {max(mem_usage)} MiB")
    return training_time, mem_usage

def inference_model():
    start_time = time.time()
    mem_usage_inference = memory_usage((model.predict, (X_test_scaled,)))
    inference_time = time.time() - start_time
    y_pred = model.predict(X_test_scaled)
    print(f"\n Tempo di inferenza: {inference_time} secondi")
    print(f"Uso della memoria durante l'inferenza: {max(mem_usage_inference)} MiB")
    return y_pred, inference_time, mem_usage_inference

# Addestramento del modello e monitoraggio delle metriche di efficientamento
training_time, mem_usage = train_model()

# Inferenza del modello e monitoraggio delle metriche di efficientamento
y_pred, inference_time, mem_usage_inference = inference_model()

# Calcola i punteggi di decisione
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche con le nuove metriche di efficientamento
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.611022472381592 secondi
Uso della memoria durante l'addestramento: 816.11328125 MiB

 Tempo di inferenza: 1.9620587825775146 secondi
Uso della memoria durante l'inferenza: 816.078125 MiB
XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, n...3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   gamma=0, learning_rate=0.2, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=2137, reg_alpha=0,
   reg_lambda=1, scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True

### Early Stopping
Termina l'esecuzione anticipatamente se per un numero prestabilito di round non migliorano più i parametri

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]

# Divisione del dataset di allenamento per avere un set di validazione
X_train_sub, X_val, y_train_sub, y_val = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=SEED)

# Inizializzazione del modello
model = XGBOD(estimator_list=unsupervised_models, n_estimators=50, max_depth=3, learning_rate=0.2, n_jobs=-1, random_state=SEED)

best_score = -np.inf
patience = 10       # Numero di volte che il modello cercherà di migliorarsi
patience_counter = 0
n_iterations = 100      # Numero massimo di cicli del'allenamento

for i in range(n_iterations):  # Numero massimo di iterazioni
    model.fit(X_train_sub, y_train_sub)
    
    # Predizione sul set di validazione
    y_val_pred = model.predict(X_val)
    val_score = accuracy_score(y_val, y_val_pred)
    
    # Controllo early stopping
    if val_score > best_score:
        best_score = val_score
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping at iteration {i}")
            break
    model.n_estimators += 1  # Incrementa il numero di stimatori per la prossima iterazione

# Predizione sul set di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)
print("")
print(metrics)


Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.



Early stopping at iteration 12

{'Accuracy': 0.97, 'Precision': 0.971, 'Recall': 0.885, 'F1': 0.926, 'MCC': 0.909, 'AUC_PR': 0.969, 'AUC_ROC': 0.99, 'PREC_N_SCORES': 0.912}


### XGBOD con ricerca iperparametri con "grid"

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from pyod.models.xgbod import XGBOD
import numpy as np

# Definizione della griglia di parametri
param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5],
    'learning_rate': [0.01, 0.1]
}

# Inizializza il modello
model = XGBOD()

# Randomized search con meno iterazioni e parallelizzazione
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, cv=3, scoring='roc_auc', random_state=42, n_jobs=-1)
random_search.fit(X_train_scaled, y_train)

# Migliori parametri trovati
best_params = random_search.best_params_
print(f"Best parameters found: {best_params}")

# Riaddestramento del modello con i migliori parametri
model = XGBOD(**best_params)
model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)


Parameters: { "silent" } are not used.



Best parameters found: {'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.01}


Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=1, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=1, no...ax_features=1.0,
    max_samples='auto', n_estimators=200, n_jobs=1, random_state=0,
    verbose=0)],
   gamma=0, learning_rate=0.01, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=50, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, F

### FCNN

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Definisci il modello FCNN
model = Sequential([
    Conv1D(64, 3, activation='relu', input_shape=(X_train_scaled.shape[1], 1)),
    MaxPooling1D(2),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Poiché si tratta di una classificazione binaria
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Addestra il modello
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))

# Prevedi gli outlier nel dataset di test
y_pred = (model.predict(X_test_scaled) > 0.5).astype("int32")
y_predicted_score = model.predict(X_test_scaled)

metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.8006 - loss: 0.4877 - val_accuracy: 0.8885 - val_loss: 0.2546
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9154 - loss: 0.2390 - val_accuracy: 0.9244 - val_loss: 0.1969
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9334 - loss: 0.1862 - val_accuracy: 0.9168 - val_loss: 0.1949
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9408 - loss: 0.1831 - val_accuracy: 0.9452 - val_loss: 0.1793
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9474 - loss: 0.1629 - val_accuracy: 0.9471 - val_loss: 0.1570
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9424 - loss: 0.1595 - val_accuracy: 0.9546 - val_loss: 0.1572
Epoch 7/10
[1m50/50[0m [32m━━━━━━━━━

# Elaborazioni Dati OPS_SAT

In [1]:
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

# ============ Valutazione Metriche ================
def evaluate_metrics(y_test, y_pred, y_proba=None, digits=3):
    res = {"Accuracy": round(accuracy_score(y_test, y_pred), digits),
           "Precision": precision_score(y_test, y_pred).round(digits),
           "Recall": recall_score(y_test, y_pred).round(digits),
           "F1": f1_score(y_test, y_pred).round(digits),
           "MCC": round(matthews_corrcoef(y_test, y_pred), ndigits=digits)}
    if y_proba is not None:
        res["AUC_PR"] = average_precision_score(y_test, y_proba).round(digits)
        res["AUC_ROC"] = roc_auc_score(y_test, y_proba).round(digits)
        res["PREC_N_SCORES"] = precision_n_scores(y_test, y_proba).round(digits)
    return res


def set_seed_numpy(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

# ======================= ELABORAZIONE DATI TRAINING =============================
STEP = 250
# Lista per memorizzare i segmenti di training
X_train_final = []
y_train_final = []

# Leggi il file CSV
dfSegment = pd.read_csv("data/segments.csv", index_col="timestamp")

for channel in dfSegment["channel"].unique():
    # Itera su ogni segmento unico per il canale corrente
    for segment in dfSegment[dfSegment["channel"] == channel]["segment"].unique():
        mask = (dfSegment["train"] == 1) & (dfSegment["channel"] == channel) & (dfSegment["segment"] == segment)

        # Filtra i dati in base alla maschera
        X_trainS = dfSegment.loc[mask, "value"] #.reset_index(drop=True).values  # Estrarre solo 'value'
        y_trainS = dfSegment.loc[mask, "anomaly"] #.reset_index(drop=True).values  # Estrarre solo 'value'
        # print(X_trainS.shape)
        # Suddividi in sottoliste di STEP elementi
        for i in range(0, len(X_trainS) - STEP + 1, STEP):
            X_train_final.append(X_trainS[i:i + STEP])
            y_train_final.append(y_trainS[i])
        

# Converti la lista in un numpy array
X_train = np.array(X_train_final)
y_train = np.array(y_train_final)
# Reshape per ottenere la shape desiderata
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_train = X_train.transpose(0, 2, 1)
# print(X_train_final.shape)


# ======================= ELABORAZIONE DATI TEST =============================

# Predisposizione del test set
test_data = dfSegment[dfSegment["train"] == 0]
# Predisposizione del test set
X_test_final = []
y_test_final = []

for channel in dfSegment["channel"].unique():
    for segment in test_data[test_data["channel"] == channel]["segment"].unique():

        mask = (test_data["channel"] == channel) & (test_data["segment"] == segment)
        X_testS = test_data.loc[mask, "value"]#.reset_index(drop=True).values
        y_testS = test_data.loc[mask, "anomaly"]#.reset_index(drop=True).values
        
        for i in range(0, len(X_testS) - STEP + 1, STEP):
            X_test_final.append(X_testS[i:i + STEP])
            y_test_final.append(y_testS[i])


X_test = np.array(X_test_final)

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
X_test = X_test.transpose(0, 2, 1)
# print("X_test: ",X_test)
# X_test = np.array(X_test_final).reshape(len(X_test_final), STEP, 1)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)


y_test = np.array(y_test_final)
print("y_test: ",y_test)

# ======================= PRE-PROCESSING =============================

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[2])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[2])).reshape(X_test.shape)

  y_train_final.append(y_trainS[i])
  y_test_final.append(y_testS[i])


X_train shape: (347, 1, 250)
X_test shape: (130, 1, 250)
y_test:  [0 0 1 1 1 1 0 1 1 1 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 1 1 1 1 1 0 0 1 1 0 1 1 1 0 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 0 0
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 1 1 0 0 1 0 1 0 1 0 0 0 0 0 0
 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0]


# Rockad

### 2° Prova un canale -> miglioramento predizione

In [6]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler

from RockadFunction import ROCKAD, NearestNeighborOCC

RANDOM_STATE = 42
STEP = 250

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, matthews_corrcoef, average_precision_score

def evaluate_metrics(y_test, y_pred, y_proba=None, digits=3):
    res = {
        "Accuracy": round(accuracy_score(y_test, y_pred), digits),
        "Precision": precision_score(y_test, y_pred, average='weighted').round(digits),
        "Recall": recall_score(y_test, y_pred, average='weighted').round(digits),
        "F1": f1_score(y_test, y_pred, average='weighted').round(digits),
        "MCC": round(matthews_corrcoef(y_test, y_pred), ndigits=digits)
    }
    if y_proba is not None:
        res["AUC_PR"] = average_precision_score(y_test, y_proba, average='weighted').round(digits)
        res["AUC_ROC"] = roc_auc_score(y_test, y_proba).round(digits)
        res["PREC_N_SCORES"] = precision_n_scores(y_test, y_proba).round(digits)
    return res


# ======================= ELABORAZIONE DATI TRAINING =============================

# Lista per memorizzare i segmenti di training
X_train_final = []

# Leggi il file CSV
dfSegment = pd.read_csv("data/segments.csv", index_col="timestamp")
channelFix = "CADC0872"

# Itera su ogni segmento unico per il canale corrente
for segment in dfSegment[dfSegment["channel"] == channelFix]["segment"].unique():
    mask = (dfSegment["train"] == 1) & (dfSegment["channel"] == channelFix) & (dfSegment["segment"] == segment)

    # Filtra i dati in base alla maschera
    X_trainS = dfSegment.loc[mask, "value"] #.reset_index(drop=True).values  # Estrarre solo 'value'
    # print(X_trainS.shape)
    # Suddividi in sottoliste di STEP elementi
    for i in range(0, len(X_trainS) - STEP + 1, STEP):
        sublist = X_trainS[i:i + STEP]  # Estrarre una finestra di STEP elementi
        X_train_final.append(sublist)

# Converti la lista in un numpy array
X_train = np.array(X_train_final)
# Reshape per ottenere la shape desiderata
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_train = X_train.transpose(0, 2, 1)
# print(X_train_final.shape)


# ======================= ELABORAZIONE DATI TEST =============================

# Predisposizione del test set
test_data = dfSegment[dfSegment["train"] == 0]
# Predisposizione del test set
X_test_final = []
y_test_final = []

for segment in test_data[test_data["channel"] == channelFix]["segment"].unique():

    mask = (test_data["channel"] == channelFix) & (test_data["segment"] == segment)
    X_testS = test_data.loc[mask, "value"]#.reset_index(drop=True).values
    y_testS = test_data.loc[mask, "anomaly"]#.reset_index(drop=True).values
    
    for i in range(0, len(X_testS) - STEP + 1, STEP):
        X_test_final.append(X_testS[i:i + STEP])
        y_test_final.append(y_testS[i])


X_test = np.array(X_test_final)

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
X_test = X_test.transpose(0, 2, 1)
# print("X_test: ",X_test)
# X_test = np.array(X_test_final).reshape(len(X_test_final), STEP, 1)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)


y_test = np.array(y_test_final)
print("y_test: ",y_test)

# ======================= PRE-PROCESSING =============================

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[2])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[2])).reshape(X_test.shape)

# ======================= FIT e PREDICT e SCORE =============================


# Inizializza e addestra il modello ROCKAD
rockad = ROCKAD(n_neighbors=5 , n_jobs=-1, n_estimators=10, n_kernels=10000, random_state=RANDOM_STATE, power_transform=False)
rockad.fit(X_train)
print("End Train")


# print("mean_train", mean_train)
# print("std_train", std_train)
# Predict anomaly scores
score_train = rockad.predict_proba(X_train)

# print("Score:", scores)

# Initialize and fit NearestNeigbor One Class Classifier

decision_func = NearestNeighborOCC().fit(score_train)
score_test = rockad.predict_proba(X_test)
print("score_test: ", score_test.shape)
print(score_test)

result = decision_func.predict(score_test)
result_binary = np.where(result == -1, 0, 1)

# result2 = knn.predict(score_test)
print("RISULTATI: ", result_binary)
#print("RISULTATI: ", result2)




metrics = evaluate_metrics(y_test, result_binary, score_test)
print("Metriche di valutazione:\n", metrics)



  y_test_final.append(y_testS[i])


X_train shape: (53, 1, 250)
X_test shape: (15, 1, 250)
y_test:  [0 0 1 1 1 1 0 1 1 1 1 0 1 0 0]
End Train
score_test:  (15,)
[ 59.52569593  97.91287411 104.37438939 306.12021652 117.83811987
 206.00498512  96.27331365 112.26971517  85.6070828  154.73770951
  68.09927101  68.40500052  69.69060948  70.24381485  62.94952254]
RISULTATI:  [0 0 1 1 1 0 0 1 1 1 0 0 0 1 0]
Metriche di valutazione:
 {'Accuracy': 0.733, 'Precision': 0.764, 'Recall': 0.733, 'F1': 0.736, 'MCC': 0.491, 'AUC_PR': 0.911, 'AUC_ROC': 0.833, 'PREC_N_SCORES': 0.778}


#### NORMALE

In [7]:
import numpy as np
import pandas as pd

from RockadFunction import ROCKAD, NearestNeighborOCC

features = ["channel", "segment", "value", "anomaly"]
RANDOM_STATE = 42
STEP = 250

# Lista per memorizzare i segmenti di training
X_train_final = []

# Leggi il file CSV
dfSegment = pd.read_csv("data/segments.csv", index_col="timestamp")
channelFix = "CADC0872"

# Itera su ogni segmento unico per il canale corrente
for segment in dfSegment[dfSegment["channel"] == channelFix]["segment"].unique():
    mask = (dfSegment["train"] == 1) & (dfSegment["channel"] == channelFix) & (dfSegment["segment"] == segment)

    # Filtra i dati in base alla maschera
    X_trainS = dfSegment.loc[mask, "value"] #.reset_index(drop=True).values  # Estrarre solo 'value'
    # print(X_trainS.shape)
    # Suddividi in sottoliste di STEP elementi
    for i in range(0, len(X_trainS) - STEP + 1, STEP):
        sublist = X_trainS[i:i + STEP]  # Estrarre una finestra di STEP elementi
        X_train_final.append(sublist)

# Converti la lista in un numpy array
X_train = np.array(X_train_final)
# print(X_train_final)

# Reshape per ottenere la shape desiderata
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
# print(X_train_final.shape)
print("X_train:", X_train.shape)

# y_train = dfSegment[dfSegment["train"] == 1]["anomaly"].values[:X_train_final.shape[0]]

# # Senza non torna perchè richiede che tutti abbiano una shape>0
# X_train_filtered, y_train_filtered = zip(*[
#     (x, y) for x, y in zip(X_train_final, y_train) if not np.any(x == 0)
# ])
# X_train_filtered = np.array(X_train_filtered)



# X_normal_train = X_train_final[y_train == 0]
#  print("Shape X_normal_train:", X_normal_train.shape)

# Inizializza e addestra il modello ROCKAD
rockad = ROCKAD(n_estimators=100, n_kernels=100, random_state=RANDOM_STATE)
rockad.fit(X_train)
print("End Train")

# Predisposizione del test set
test_data = dfSegment[dfSegment["train"] == 0]

# Predisposizione del test set
X_test_final = []
y_test_final = []

for segment in test_data[test_data["channel"] == channelFix]["segment"].unique():

    mask = (test_data["channel"] == channelFix) & (test_data["segment"] == segment)
    X_testS = test_data.loc[mask, "value"]#.reset_index(drop=True).values
    y_testS = test_data.loc[mask, "anomaly"]#.reset_index(drop=True).values
    
    for i in range(0, len(X_testS) - STEP + 1, STEP):
        X_test_final.append(X_testS[i:i + STEP])
        y_test_final.append(y_testS[i])


X_test = np.array(X_test_final)

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
# print("X_test: ",X_test)
# X_test = np.array(X_test_final).reshape(len(X_test_final), STEP, 1)

y_test = np.array(y_test_final)
print("y_test: ",y_test)


# Predict anomaly scores
score_train = rockad.predict_proba(X_train)
# print("Score:", scores)

# Initialize and fit NearestNeigbor One Class Classifier

decision_func = NearestNeighborOCC().fit(score_train)
score_test = rockad.predict_proba(X_test)
print("score_test: ", score_test.shape)
print(score_test)

result = decision_func.predict(score_test)
# result2 = knn.predict(score_test)
print("RISULTATI: ", result)
#print("RISULTATI: ", result2)

metrics = evaluate_metrics(y_test, result, score_test)
print("Metriche di valutazione:\n", metrics)

X_train: (53, 250, 1)
End Train


  y_test_final.append(y_testS[i])


y_test:  [0 0 1 1 1 1 0 1 1 1 1 0 1 0 0]
score_test:  (15,)
[3.68062169e-07 3.68062169e-07 3.68062169e-07 3.68062169e-07
 3.68062169e-07 3.68062169e-07 3.68062169e-07 3.68062169e-07
 3.68062169e-07 3.68062169e-07 3.68062169e-07 3.68062169e-07
 3.68062169e-07 3.68062169e-07 3.68062169e-07]
RISULTATI:  [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.6, 'Precision': 0.6, 'Recall': 1.0, 'F1': 0.75, 'MCC': 0.0, 'AUC_PR': 0.6, 'AUC_ROC': 0.5, 'PREC_N_SCORES': 0.0}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


### Più Canali e Miglioramento

In [None]:
import numpy as np
import pandas as pd

from pyod.models.knn import KNN

from RockadFunction import ROCKAD, NearestNeighborOCC

features = ["channel", "segment", "value", "anomaly"]
RANDOM_STATE = 42
STEP = 250

# ======================= ELABORAZIONE DATI TRAINING =============================

# Lista per memorizzare i segmenti di training
X_train_final = []

# Leggi il file CSV
dfSegment = pd.read_csv("data/segments.csv", index_col="timestamp")

for channel in dfSegment["channel"].unique():
    # Itera su ogni segmento unico per il canale corrente
    for segment in dfSegment[dfSegment["channel"] == channel]["segment"].unique():
        mask = (dfSegment["train"] == 1) & (dfSegment["channel"] == channel) & (dfSegment["segment"] == segment)

        # Filtra i dati in base alla maschera
        X_trainS = dfSegment.loc[mask, "value"] #.reset_index(drop=True).values  # Estrarre solo 'value'
        # print(X_trainS.shape)
        # Suddividi in sottoliste di STEP elementi
        for i in range(0, len(X_trainS) - STEP + 1, STEP):
            sublist = X_trainS[i:i + STEP]  # Estrarre una finestra di STEP elementi
            X_train_final.append(sublist)

# Converti la lista in un numpy array
X_train = np.array(X_train_final)
# print(X_train_final)

# Reshape per ottenere la shape desiderata
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_train = X_train.transpose(0, 2, 1)
print(X_train.shape)
# print("X_train_final:", X_train_final)

# ======================= ELABORAZIONE DATI TEST =============================

# Predisposizione del test set
test_data = dfSegment[dfSegment["train"] == 0]

# Predisposizione del test set
X_test_final = []
y_test_final = []

for channel in dfSegment["channel"].unique():
    for segment in test_data[test_data["channel"] == channel]["segment"].unique():

        mask = (test_data["channel"] == channel) & (test_data["segment"] == segment)
        X_testS = test_data.loc[mask, "value"]#.reset_index(drop=True).values
        y_testS = test_data.loc[mask, "anomaly"]#.reset_index(drop=True).values
        
        for i in range(0, len(X_testS) - STEP + 1, STEP):
            X_test_final.append(X_testS[i:i + STEP])
            y_test_final.append(y_testS[i])


X_test = np.array(X_test_final)

X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
X_test = X_test.transpose(0, 2, 1)
# print("X_test: ",X_test)
# X_test = np.array(X_test_final).reshape(len(X_test_final), STEP, 1)

y_test = np.array(y_test_final)
print("y_test: ",y_test.shape)

# ======================= PRE-PROCESSING =============================

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[2])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[2])).reshape(X_test.shape)

# ======================= FIT e PREDICT e SCORE =============================

# Inizializza e addestra il modello ROCKAD
rockad = ROCKAD(n_estimators=10, n_kernels=1000, n_jobs=-1, random_state=RANDOM_STATE, power_transform=False)
rockad.fit(X_train)
print("End Train")

# Predict anomaly scores
score_train = rockad.predict_proba(X_train)
# print("Score:", scores)

# Initialize and fit NearestNeigbor One Class Classifier

decision_func = NearestNeighborOCC().fit(score_train)
score_test = rockad.predict_proba(X_test)
# print("score_test: ", score_test.shape)
# print(score_test)

result = decision_func.predict(score_test)
# result2 = knn.predict(score_test)
result_binary = np.where(result == -1, 0, 1)
print("RISULTATI: ", result_binary)
#print("RISULTATI: ", result2)

metrics = evaluate_metrics(y_test, result_binary, score_test)
print("Metriche di valutazione:\n", metrics)
# Senza parametri con standard scalar: {'Accuracy': 0.454, 'Precision': 0.447, 'Recall': 0.613, 'F1': 0.517, 'MCC': -0.082, 'AUC_PR': 0.407, 'AUC_ROC': 0.297, 'PREC_N_SCORES': 0.29}
# Senza standard scalar e parametri: {'Accuracy': 0.523, 'Precision': 0.5, 'Recall': 0.758, 'F1': 0.603, 'MCC': 0.075, 'AUC_PR': 0.753, 'AUC_ROC': 0.707, 'PREC_N_SCORES': 0.677}
# con parametri: {'Accuracy': 0.546, 'Precision': 0.515, 'Recall': 0.823, 'F1': 0.634, 'MCC': 0.137, 'AUC_PR': 0.757, 'AUC_ROC': 0.704, 'PREC_N_SCORES': 0.677}
# con standard scaler e parametri: {'Accuracy': 0.492, 'Precision': 0.476, 'Recall': 0.645, 'F1': 0.548, 'MCC': -0.002, 'AUC_PR': 0.407, 'AUC_ROC': 0.305, 'PREC_N_SCORES': 0.29} (10,10000)

# scaler (20, 10000): {'Accuracy': 0.531, 'Precision': 0.506, 'Recall': 0.694, 'F1': 0.585, 'MCC': 0.08, 'AUC_PR': 0.409, 'AUC_ROC': 0.305, 'PREC_N_SCORES': 0.29}
# solo param: {'Accuracy': 0.454, 'Precision': 0.447, 'Recall': 0.613, 'F1': 0.517, 'MCC': -0.082, 'AUC_PR': 0.758, 'AUC_ROC': 0.705, 'PREC_N_SCORES': 0.677}
# (30, 10000): {'Accuracy': 0.5, 'Precision': 0.481, 'Recall': 0.613, 'F1': 0.539, 'MCC': 0.01, 'AUC_PR': 0.406, 'AUC_ROC': 0.304, 'PREC_N_SCORES': 0.29}
# (35, 10000): {'Accuracy': 0.523, 'Precision': 0.5, 'Recall': 0.597, 'F1': 0.544, 'MCC': 0.053, 'AUC_PR': 0.407, 'AUC_ROC': 0.304, 'PREC_N_SCORES': 0.29}
# (40, 10000): {'Accuracy': 0.454, 'Precision': 0.443, 'Recall': 0.565, 'F1': 0.496, 'MCC': -0.084, 'AUC_PR': 0.409, 'AUC_ROC': 0.305, 'PREC_N_SCORES': 0.29}

# (10, 20000): 
# (10, 20000) + no StandardScalar:

(347, 1, 250)


  y_test_final.append(y_testS[i])


y_test:  (130,)


NameError: name 'StandardScaler' is not defined

## ROCKAD su NASA

In [28]:
import numpy as np
import pandas as pd

from pyod.models.knn import KNN

from RockadFunction import ROCKAD, NearestNeighborOCC
from NASA.nasa import NASA
from valutazione_metriche import evaluate_metrics

RANDOM_STATE = 42
STEP = 250
OFFSET = 50
OUTPUT_FILE = "risultatiNASA_ROCKAD.csv"


results_df = pd.DataFrame(columns=["Channel", "Accuracy", "Precision", "Recall", "F1", "MCC", "AUC_ROC", "AUC_PR"])

# ======================= ELABORAZIONE DATI TRAINING =============================
# Itera su tutti i canali del dataset
for channel_id in NASA.channel_ids:
    # if channel_id == "D-12" or channel_id == "T-10" or channel_id == "T-9":   # Non uso questi perchè NeirestNeigthbor dato che necessita di avere più 
    #     continue
    if channel_id == "T-10":
        continue
    print(f"Processing channel: {channel_id}")

    # Lista per memorizzare i segmenti di training
    X_train_final = []

    # Uso del dataset NASA per tutti i canali
    dataset = NASA("./datasets", channel_id, mode="anomaly")
    # print(dataset.data.shape)
    data = dataset.data
    train = []
    for i in range(0, data.shape[0] - STEP +1, OFFSET): 
        train.append(data[i:i+STEP])

    train = np.stack(train)
    # print("train: ", train.shape)  # Mostra le prime 5 righe dell'array

    # ======================= FIT e PREDICT e SCORE =============================

    # Inizializza e addestra il modello ROCKAD
    rockad = ROCKAD(n_neighbors=1, n_estimators=10, n_kernels=1000, n_jobs=-1, random_state=RANDOM_STATE, power_transform=False)
    rockad.fit(train)
    print("==== End Train ====")

    # Predict anomaly scores
    score_train = rockad.predict_proba(train)
    # print("Score:", scores)

    dataset = NASA("./datasets", channel_id, mode="anomaly", train=False)
    data = dataset.data
    Test = []
    output = []
    o = np.zeros(data.shape[0])
    for start,end in dataset.anomalies:
        o[start:end] = 1
    for i in range(0, data.shape[0] - STEP +1, STEP): 
        Test.append(data[i:i+STEP])
        output.append(o[i:i+STEP])

    output = np.stack(output)
    Test = np.stack(Test)
    # print("TEST: ", Test.shape)  # Mostra le prime 5 righe dell'array

    # Initialize and fit NearestNeigbor One Class Classifier

    decision_func = NearestNeighborOCC().fit(score_train)
    
    print("Train: ", train.shape)
    print("Test: ", Test.shape)
    print("output: ", output.shape)
    score_test = rockad.predict_proba(Test)
    # print("score_test: ", score_test.shape)
    # print(score_test)

    result = decision_func.predict(score_test)
    result_binary = np.where(result == -1, 0, 1)


    # Scegliere se una sequenda è un anomalia o no -> 10%
    threshold = 1 # -> 10%
    # Conta il numero di 1 in ogni lista
    counts = np.sum(output, axis=1)
    output = np.where(counts >= threshold, 1, 0)
    print("counts: ", counts)
    print("output: ", output)
    metrics = evaluate_metrics(output, result_binary, score_test)
    print("Metriche di valutazione:\n", metrics)

        # Calcolo di VP, VN, FP, FN
    TP = ((output == 1) & (result_binary == 1)).sum()
    TN = ((output == 0) & (result_binary == 0)).sum()
    FP = ((output == 0) & (result_binary == 1)).sum()
    FN = ((output == 1) & (result_binary == 0)).sum()

    results_df = pd.concat([results_df, pd.DataFrame([{
        "Channel": channel_id,
        "Accuracy": metrics.get("Accuracy", 0),
        "Precision": metrics.get("Precision", 0),
        "Recall": metrics.get("Recall", 0),
        "MCC": metrics.get("MCC", 0),
        "AUC_PR": metrics.get("AUC_PR", 0),
        "AUC_ROC": metrics.get("AUC_ROC", 0),
        "F1": metrics.get("F1", 0),
        "TP": TP,
        "TN": TN,
        "FP": FP,
        "FN": FN,
    }])], ignore_index=True)

    print("=========================FINE CHANNEL=============================")
# ======================= SALVATAGGIO RISULTATI =============================
results_df.to_csv(OUTPUT_FILE, index=False)
print(f"Risultati salvati in {OUTPUT_FILE}")

Processing channel: A-1
==== End Train ====
Train:  (53, 250, 25)
Test:  (34, 250, 25)
output:  (34, 250)
counts:  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 60. 24.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.471, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.236, 'AUC_PR': 0.75, 'AUC_ROC': 0.969, 'PREC_N_SCORES': 0.5}
Processing channel: A-2
==== End Train ====


  results_df = pd.concat([results_df, pd.DataFrame([{


Train:  (48, 250, 25)
Test:  (31, 250, 25)
output:  (31, 250)
counts:  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 50.
 60.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.806, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.101, 'AUC_PR': 0.625, 'AUC_ROC': 0.897, 'PREC_N_SCORES': 0.5}
Processing channel: A-3
==== End Train ====
Train:  (50, 250, 25)
Test:  (32, 250, 25)
output:  (32, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0. 175.  10.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.25, 'Precision': 0.042, 'Recall': 0.5, 'F1': 0.077, 'MCC': -0.149, 'AUC_PR': 0.533, 'AUC_ROC': 0.533, 'PREC_N_SCORES': 0.5}
Processing channel: A-4
==== End Train ====
Trai

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (10, 250, 25)
Test:  (18, 250, 25)
output:  (18, 250)
counts:  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 50.  0.  0.  0.  0.  0.  0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.944, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Processing channel: A-6
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (9, 250, 25)
Test:  (17, 250, 25)
output:  (17, 250)
counts:  [ 0.  0.  0.  0.  0.  0.  0. 40.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
output:  [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.0, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -1.0, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Processing channel: A-7
==== End Train ====
Train:  (53, 250, 25)
Test:  (34, 250, 25)
output:  (34, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  50. 250. 250. 250.
 250. 250. 250. 250. 250. 250.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.971, 'Precision': 1.0, 'Recall': 0.9, 'F1': 0.947, 'MCC': 0.93, 'AUC_PR': 0.187, 'AUC_ROC': 0.071, 'PREC_N_SCORES': 0.1}
Processing channel: A-8
==== End Train ====
Train:  (11, 250, 25)
Test:  (33, 250, 25)
output:  (33, 250)
counts:  [  0.   0.   0. 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (39, 250, 55)
Test:  (9, 250, 55)
output:  (9, 250)
counts:  [  0.   0. 200.   0.   0.   0.   0.   0. 110.]
output:  [0 0 1 0 0 0 0 0 1]
Metriche di valutazione:
 {'Accuracy': 0.778, 'Precision': 0.5, 'Recall': 0.5, 'F1': 0.5, 'MCC': 0.357, 'AUC_PR': 0.361, 'AUC_ROC': 0.429, 'PREC_N_SCORES': 0.5}
Processing channel: C-2
==== End Train ====
Train:  (11, 250, 55)
Test:  (8, 250, 55)
output:  (8, 250)
counts:  [  0. 100.   0.   0.   0.   0.  35.   0.]
output:  [0 1 0 0 0 0 1 0]
Metriche di valutazione:
 {'Accuracy': 0.75, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.643, 'AUC_ROC': 0.583, 'PREC_N_SCORES': 0.5}
Processing channel: D-1
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (52, 250, 25)
Test:  (34, 250, 25)
output:  (34, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0. 250. 250. 250. 250. 250. 250. 250.
 250. 250. 250. 250. 250. 250.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.559, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.197, 'AUC_PR': 0.956, 'AUC_ROC': 0.938, 'PREC_N_SCORES': 0.923}
Processing channel: D-11
==== End Train ====
Train:  (48, 250, 25)
Test:  (29, 250, 25)
output:  (29, 250)
counts:  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. 60.
  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.448, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.196, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Processing channel: D-12
==== End Train ====
Train

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


==== End Train ====
Train:  (69, 250, 55)
Test:  (10, 250, 55)
output:  (10, 250)
counts:  [  0.   0.   0.   0.   0.   0.  20. 200.   0.   0.]
output:  [0 0 0 0 0 0 1 1 0 0]
Metriche di valutazione:
 {'Accuracy': 0.8, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Processing channel: D-15
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (37, 250, 55)
Test:  (8, 250, 55)
output:  (8, 250)
counts:  [  0.   0.   0.   0.   0.   0. 250. 250.]
output:  [0 0 0 0 0 0 1 1]
Metriche di valutazione:
 {'Accuracy': 0.75, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.583, 'AUC_ROC': 0.833, 'PREC_N_SCORES': 0.5}
Processing channel: D-16
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (25, 250, 55)
Test:  (8, 250, 55)
output:  (8, 250)
counts:  [  0.   0. 150. 250. 250.   0.   0.   0.]
output:  [0 0 1 1 1 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.625, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.383, 'AUC_ROC': 0.4, 'PREC_N_SCORES': 0.0}
Processing channel: D-2
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (53, 250, 25)
Test:  (34, 250, 25)
output:  (34, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0. 181. 250. 250. 250. 250. 250. 250. 250. 250. 250. 250.
 250. 250. 250. 250. 250. 250.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.147, 'Precision': 0.071, 'Recall': 0.059, 'F1': 0.065, 'MCC': -0.717, 'AUC_PR': 0.983, 'AUC_ROC': 0.976, 'PREC_N_SCORES': 0.941}
Processing channel: D-3
==== End Train ====
Train:  (53, 250, 25)
Test:  (34, 250, 25)
output:  (34, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.  25. 250. 250. 250. 250. 250. 250. 250.
 250. 250. 250. 250. 250. 250.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.471, 'Precision': 0.167, 'Recall': 0.071, 'F1': 0.1, 'MCC': -0.231, 'AUC_PR': 0.849, 'AUC_ROC': 0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (47, 250, 25)
Test:  (31, 250, 25)
output:  (31, 250)
counts:  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0. 80.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.419, 'Precision': 0.053, 'Recall': 1.0, 'F1': 0.1, 'MCC': 0.145, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Processing channel: D-7
==== End Train ====
Train:  (47, 250, 25)
Test:  (30, 250, 25)
output:  (30, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.  60. 250. 250. 250. 250. 250. 250. 250. 250.
 250. 250.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.167, 'Precision': 0.208, 'Recall': 0.455, 'F1': 0.286, 'MCC': -0.657, 'AUC_PR': 0.853, 'AUC_ROC': 0.861, 'PREC_N_SCORES': 0.727}
Processing channel: D-8
==== End Train ====
Train:  (48, 250

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (53, 250, 25)
Test:  (33, 250, 25)
output:  (33, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0. 156. 250. 250. 250. 250. 250. 250. 250.
 250. 250. 250. 250. 250.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.485, 'Precision': 0.3, 'Recall': 0.231, 'F1': 0.261, 'MCC': -0.127, 'AUC_PR': 0.766, 'AUC_ROC': 0.742, 'PREC_N_SCORES': 0.615}
Processing channel: E-4
==== End Train ====
Train:  (53, 250, 25)
Test:  (33, 250, 25)
output:  (33, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.  50. 250. 250. 250. 250. 250. 250.
 250. 250. 250. 250. 250.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.636, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.277, 'AUC_ROC': 0.274, 'PREC_N_SCORES': 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (53, 250, 25)
Test:  (33, 250, 25)
output:  (33, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0. 150. 170.   0.   0.   0.   0.
   0.   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.879, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.065, 'AUC_PR': 0.167, 'AUC_ROC': 0.726, 'PREC_N_SCORES': 0.0}
Processing channel: E-6
==== End Train ====
Train:  (53, 250, 25)
Test:  (33, 250, 25)
output:  (33, 250)
counts:  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
  0.  0.  0.  0. 65.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.939, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.031, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Processing channel: E-7
==== End Tra

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (53, 250, 25)
Test:  (34, 250, 25)
output:  (34, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0. 100. 250. 250.  22.   0.   0.   0.
   0.   0.   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.824, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.091, 'AUC_PR': 0.589, 'AUC_ROC': 0.725, 'PREC_N_SCORES': 0.5}
Processing channel: E-9
==== End Train ====
Train:  (53, 250, 25)
Test:  (33, 250, 25)
output:  (33, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0. 200. 150.   0.   0.   0.   0.
   0.   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.939, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.361, 'AUC_ROC': 0.871, 'PREC_N_SCORES':

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (53, 250, 25)
Test:  (34, 250, 25)
output:  (34, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0. 100.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.941, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.03, 'AUC_PR': 0.111, 'AUC_ROC': 0.758, 'PREC_N_SCORES': 0.0}
Processing channel: F-2
==== End Train ====
Train:  (53, 250, 25)
Test:  (34, 250, 25)
output:  (34, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.   0.  81. 250. 250. 250. 250. 250.
 250. 250. 250. 250. 250. 250.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.471, 'Precision': 0.35, 'Recall': 0.583, 'F1': 0.438, 'MCC': -0.007, 'AUC_PR': 0.388, 'AUC_ROC': 0.439, 'P

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (36, 250, 55)
Test:  (8, 250, 55)
output:  (8, 250)
counts:  [  0.   0.   0.   0.   0. 250.   0.   0.]
output:  [0 0 0 0 0 1 0 0]
Metriche di valutazione:
 {'Accuracy': 0.75, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.143, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Processing channel: M-4
==== End Train ====
Train:  (37, 250, 55)
Test:  (8, 250, 55)
output:  (8, 250)
counts:  [  0.   0.   0.   0.   0. 250.   0.   0.]
output:  [0 0 0 0 0 1 0 0]
Metriche di valutazione:
 {'Accuracy': 0.875, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Processing channel: M-5
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (36, 250, 55)
Test:  (9, 250, 55)
output:  (9, 250)
counts:  [  0.   0.   0.   0.   0. 250.  50.   0.   0.]
output:  [0 0 0 0 0 1 1 0 0]
Metriche di valutazione:
 {'Accuracy': 0.778, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.375, 'AUC_ROC': 0.5, 'PREC_N_SCORES': 0.5}
Processing channel: M-6
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (27, 250, 55)
Test:  (8, 250, 55)
output:  (8, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0. 150.]
output:  [0 0 0 0 0 0 0 1]
Metriche di valutazione:
 {'Accuracy': 0.875, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Processing channel: M-7
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (27, 250, 55)
Test:  (8, 250, 55)
output:  (8, 250)
counts:  [ 0.  0.  0. 60. 40.  0.  0.  0.]
output:  [0 0 0 1 1 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.75, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.833, 'AUC_ROC': 0.917, 'PREC_N_SCORES': 0.5}
Processing channel: P-1
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (53, 250, 25)
Test:  (34, 250, 25)
output:  (34, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0. 101.  99.   0.   0.   0.   0.
 211.  29.   0.   0. 214.  94.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.824, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.123, 'AUC_ROC': 0.173, 'PREC_N_SCORES': 0.0}
Processing channel: P-10


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


==== End Train ====
Train:  (82, 250, 55)
Test:  (24, 250, 55)
output:  (24, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0. 130.   0.   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.958, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.167, 'AUC_ROC': 0.783, 'PREC_N_SCORES': 0.0}
Processing channel: P-11


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


==== End Train ====
Train:  (75, 250, 55)
Test:  (14, 250, 55)
output:  (14, 250)
counts:  [  0.   0.   0.   0.  12.  94.   0. 120.   0.   0.   0.   0.   0.   0.]
output:  [0 0 0 0 1 1 0 1 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.643, 'Precision': 0.25, 'Recall': 0.333, 'F1': 0.286, 'MCC': 0.055, 'AUC_PR': 0.369, 'AUC_ROC': 0.606, 'PREC_N_SCORES': 0.333}
Processing channel: P-14
==== End Train ====
Train:  (53, 250, 55)
Test:  (24, 250, 55)
output:  (24, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0. 175.   5.   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.792, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.114, 'AUC_PR': 0.267, 'AUC_ROC': 0.818, 'PREC_N_SCORES': 0.0}
Processing channel: P-15
==== End Train ====
Train:  (69, 250, 55)
Test:  (11, 250, 55)
output:  (11, 250)
counts:  [ 0.  0.  0.  0.  0. 20.  0.  0.  0.  0.  0.]
output: 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (53, 250, 25)
Test:  (33, 250, 25)
output:  (33, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.  99. 250. 250. 250. 250. 236.   0.
   0.   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.818, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.931, 'AUC_ROC': 0.981, 'PREC_N_SCORES': 0.833}
Processing channel: P-4
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (48, 250, 25)
Test:  (31, 250, 25)
output:  (31, 250)
counts:  [  0.   0.   0.  50.  80.   0.   0.   0. 100. 100.   0.   0.   0.   0.
   0.   0.   0.   0.   0. 110.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.]
output:  [0 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.871, 'Precision': 1.0, 'Recall': 0.2, 'F1': 0.333, 'MCC': 0.416, 'AUC_PR': 0.647, 'AUC_ROC': 0.754, 'PREC_N_SCORES': 0.4}
Processing channel: P-7
==== End Train ====
Train:  (53, 250, 25)
Test:  (32, 250, 25)
output:  (32, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.  50. 250. 250. 250. 250. 250. 250. 100.   0.
   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.75, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.646, 'AUC_ROC': 0.714, 'PREC_N_SCORES': 0.625}
Processing channel: 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (53, 250, 25)
Test:  (28, 250, 25)
output:  (28, 250)
counts:  [ 0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.
 80.  0.  0.  0.  0.  0.  0.  0.  0.  0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.964, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.333, 'AUC_ROC': 0.926, 'PREC_N_SCORES': 0.0}
Processing channel: S-1
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (52, 250, 25)
Test:  (29, 250, 25)
output:  (29, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0. 200. 247.   0.   0.   0.   0.   0.
   0.]
output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.276, 'Precision': 0.087, 'Recall': 1.0, 'F1': 0.16, 'MCC': 0.139, 'AUC_PR': 0.067, 'AUC_ROC': 0.241, 'PREC_N_SCORES': 0.0}
Processing channel: S-2
==== End Train ====
Train:  (14, 250, 55)
Test:  (7, 250, 55)
output:  (7, 250)
counts:  [ 0.  0.  0. 10.  0.  0.  0.]
output:  [0 0 0 1 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.857, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.5, 'AUC_ROC': 0.833, 'PREC_N_SCORES': 0.0}
Processing channel: T-1
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (53, 250, 25)
Test:  (34, 250, 25)
output:  (34, 250)
counts:  [  0.   0.   0.   0.   0.   0.   0.   0.   0. 101. 250. 250. 250. 250.
 250. 148.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.  35.   0.
   0.   0.   0.   0.   0.   0.]
output:  [0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.647, 'Precision': 0.375, 'Recall': 0.75, 'F1': 0.5, 'MCC': 0.311, 'AUC_PR': 0.292, 'AUC_ROC': 0.322, 'PREC_N_SCORES': 0.125}
Processing channel: T-12
==== End Train ====
Train:  (18, 250, 55)
Test:  (9, 250, 55)
output:  (9, 250)
counts:  [  0.   0. 120.   0.   0.   0.   0.   0.   0.]
output:  [0 0 1 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.444, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': -0.316, 'AUC_PR': 0.25, 'AUC_ROC': 0.625, 'PREC_N_SCORES': 0.0}
Processing channel: T-13
==== End Train ====
Train:  (18, 250, 55)
Test:  (9, 250, 55)
output:  (9, 250)
counts:  [  0.   0.  60.  40.   0.   0.   0. 100.  50.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (41, 250, 55)
Test:  (8, 250, 55)
output:  (8, 250)
counts:  [ 0.  0.  0.  0. 25.  0.  0.  0.]
output:  [0 0 0 0 1 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.875, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Processing channel: T-8
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (10, 250, 55)
Test:  (6, 250, 55)
output:  (6, 250)
counts:  [ 0.  0.  0. 60.  0. 40.]
output:  [0 0 0 1 0 1]
Metriche di valutazione:
 {'Accuracy': 0.667, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 0.583, 'AUC_ROC': 0.75, 'PREC_N_SCORES': 0.5}
Processing channel: T-9
==== End Train ====


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train:  (4, 250, 55)
Test:  (4, 250, 55)
output:  (4, 250)
counts:  [  0.   0.   0. 110.]
output:  [0 0 0 1]
Metriche di valutazione:
 {'Accuracy': 0.75, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0, 'AUC_PR': 1.0, 'AUC_ROC': 1.0, 'PREC_N_SCORES': 1.0}
Risultati salvati in risultatiNASA_ROCKAD.csv


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [44]:
from io import StringIO
import math
file_path = "risultatiNASA_ROCKAD.csv"
df = pd.read_csv(file_path)

# Calcola la media delle colonne numeriche
column_means = df.mean(numeric_only=True)

# Stampa le medie
print("Medie delle colonne numeriche:")
print(column_means)

print("========= CALCOLI CON TP...")
df = pd.read_csv(file_path)
TP = df["TP"].sum()
TN = df["TN"].sum()
FP = df["FP"].sum()
FN = df["FN"].sum()

accuracy = (TP + TN) / (TP + TN + FP + FN)
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)
mcc = (TP*TN-FP*FN)/(math.sqrt((TP + FP)*(TP+FN)*(TN+TP)*(TN+FN)))


print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1_score)
print("MCC:", mcc)

Medie delle colonne numeriche:
Accuracy      0.616765
Precision     0.093679
Recall        0.145272
F1            0.086543
MCC          -0.099062
AUC_ROC       0.745765
AUC_PR        0.589877
TP            0.716049
TN           14.123457
FP            6.555556
FN            3.320988
dtype: float64
Accuracy: 0.6003996003996004
Precision: 0.09847198641765705
Recall: 0.17737003058103976
F1 Score: 0.12663755458515283
MCC: -0.13373121449680916


---

# ROCKET

In [6]:
from sktime.transformations.panel.rocket import Rocket
import numpy as np

def detect_anomalies_with_threshold(scores, threshold):
    return (scores > threshold).astype(int)

# ======================= FIT e PREDICT e SCORE =============================

# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 10000

rocket_transformer = Rocket(num_kernels = num_kernels, n_jobs=-1)


# Applica i kernel alle serie temporali
features_train = rocket_transformer.fit_transform(X_train)
features_test = rocket_transformer.transform(X_test)
print("features_train: ", features_train.shape)
print("features_test: ", features_test.shape)
# Sintesi delle caratteristiche per esempio
anomaly_scores_train = np.mean(features_train, axis=1)  
anomaly_scores_test = np.mean(features_test, axis=1)  

# Rilevamento delle anomalie
threshold = np.percentile(anomaly_scores_train , 95)
anomaly_labels_train = detect_anomalies_with_threshold(anomaly_scores_train , threshold)
anomaly_labels_test = detect_anomalies_with_threshold(anomaly_scores_test , threshold)

print(np.unique(anomaly_labels_test, return_counts=True))

# Visualizzazione dei risultati
print("Anomalie rilevate nel training set:", anomaly_labels_train)
print("Anomalie rilevate nel test set:", anomaly_labels_test)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, anomaly_labels_test)
print("Metriche di valutazione sul test set:\n", metrics)
# {'Accuracy': 0.546, 'Precision': 1.0, 'Recall': 0.048, 'F1': 0.092, 'MCC': 0.161}

features_train:  (347, 20000)
features_test:  (130, 20000)
(array([0, 1]), array([127,   3], dtype=int64))
Anomalie rilevate nel training set: 0      0
1      0
2      0
3      0
4      0
      ..
342    1
343    0
344    0
345    0
346    0
Length: 347, dtype: int32
Anomalie rilevate nel test set: 0      0
1      0
2      0
3      0
4      0
      ..
125    0
126    0
127    0
128    0
129    0
Length: 130, dtype: int32
Metriche di valutazione sul test set:
 {'Accuracy': 0.546, 'Precision': 1.0, 'Recall': 0.048, 'F1': 0.092, 'MCC': 0.161}


### KNN ( UNSUPERVISED )

In [None]:
from pyod.models.knn import KNN
from sktime.transformations.panel.rocket import Rocket

# Genera kernel convoluzionali casuali
num_kernels = 10000
rocket_transformer = Rocket(num_kernels = num_kernels, n_jobs=-1)

# Applica i kernel alle serie temporali
features_train = rocket_transformer.fit_transform(X_train)
features_test = rocket_transformer.transform(X_test)


# Addestramento del modello supervisionato
model = KNN()
model.fit(features_train)

# Predizione delle anomalie nei dati di test
y_pred = model.predict(features_test)
y_proba = model.decision_function(features_test)

# Visualizzazione dei risultati
print("Predizioni nel test set:", y_pred.shape)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_proba)
print("Metriche di valutazione:\n", metrics)
# {'Accuracy': 0.531, 'Precision': 0.6, 'Recall': 0.048, 'F1': 0.09, 'MCC': 0.049, 'AUC_PR': 0.407, 'AUC_ROC': 0.294, 'PREC_N_SCORES': 0.29}

Predizioni nel test set: (130,)
Metriche di valutazione:
 {'Accuracy': 0.531, 'Precision': 0.6, 'Recall': 0.048, 'F1': 0.09, 'MCC': 0.049, 'AUC_PR': 0.407, 'AUC_ROC': 0.294, 'PREC_N_SCORES': 0.29}


## Rilevamento di anomalie ROCKET SUPERVISED
Utilizzo di vari algoritmi unsupervised e non con kernel ROCKET

### Regressione Logistica -> Classificatore lineare ( SUPERVISED )

In [8]:
from sklearn.linear_model import LogisticRegression

from rocket_functions import generate_kernels, apply_kernels

# Genera kernel convoluzionali casuali
num_kernels = 10000
rocket_transformer = Rocket(num_kernels = num_kernels, n_jobs=-1)

# Applica i kernel alle serie temporali
features_train = rocket_transformer.fit_transform(X_train)
features_test = rocket_transformer.transform(X_test)


# Addestramento del modello supervisionato
model = LogisticRegression(max_iter=1000)
model.fit(features_train, y_train)

# Predizione delle anomalie nei dati di test
y_pred = model.predict(features_test)
y_proba = model.decision_function(features_test)

# Visualizzazione dei risultati
print("Predizioni nel test set:", y_pred)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_proba)
print("Metriche di valutazione:\n", metrics)
# {'Accuracy': 0.977, 'Precision': 0.972, 'Recall': 0.92, 'F1': 0.945, 'MCC': 0.932, 'AUC_PR': 0.962, 'AUC_ROC': 0.984, 'PREC_N_SCORES': 0.929}

Predizioni nel test set: [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.777, 'Precision': 0.704, 'Recall': 0.919, 'F1': 0.797, 'MCC': 0.584, 'AUC_PR': 0.773, 'AUC_ROC': 0.837, 'PREC_N_SCORES': 0.774}


### Prova con Dettagli dal GitHub del Paper

In [9]:
from sklearn.linear_model import Ridge

from rocket_functions import generate_kernels, apply_kernels

def detect_anomalies_with_threshold(scores, threshold):
    return (scores > threshold).astype(int)


# Genera kernel convoluzionali casuali
num_kernels = 10000
rocket_transformer = Rocket(num_kernels = num_kernels, n_jobs=-1)

# Applica i kernel alle serie temporali
features_train = rocket_transformer.fit_transform(X_train)
features_test = rocket_transformer.transform(X_test)


# Addestramento del modello supervisionato
model = Ridge(alpha=1.0)
model.fit(features_train, y_train)

# Predizione delle anomalie nei dati di test
anomaly_scores_test = model.predict(features_test)
anomaly_scores_train = model.predict(features_train)

# Rilevamento delle anomalie
threshold = np.percentile(anomaly_scores_train , 95)
anomaly_labels_train = detect_anomalies_with_threshold(anomaly_scores_train , threshold)
anomaly_labels_test = detect_anomalies_with_threshold(anomaly_scores_test , threshold)

# Visualizzazione dei risultati
print("Predizioni nel test set:", anomaly_labels_test)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, anomaly_labels_test, y_proba=anomaly_scores_test)
print("Metriche di valutazione:\n", metrics)
#  {'Accuracy': 0.888, 'Precision': 0.966, 'Recall': 0.496, 'F1': 0.655, 'MCC': 0.644, 'AUC_PR': 0.922, 'AUC_ROC': 0.962, 'PREC_N_SCORES': 0.912}

Predizioni nel test set: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.538, 'Precision': 1.0, 'Recall': 0.032, 'F1': 0.062, 'MCC': 0.131, 'AUC_PR': 0.779, 'AUC_ROC': 0.844, 'PREC_N_SCORES': 0.758}


## LogisticClassifierCV ( SUPERVISED )

In [None]:
import numpy as np
from sklearn.linear_model import RidgeClassifierCV
from scipy.special import softmax

# ======================= FIT e PREDICT e SCORE =============================

# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 10000
rocket_transformer = Rocket(num_kernels = num_kernels, n_jobs=-1)

# Applica i kernel alle serie temporali
features_train = rocket_transformer.fit_transform(X_train)
features_test = rocket_transformer.transform(X_test)
print("features_train: ", features_train)

# Addestramento del modello supervisionato
model = RidgeClassifierCV(alphas = np.logspace(-3, 3, 10))
model.fit(features_train, y_train)

# Predizione delle anomalie nei dati di test
y_pred = model.predict(features_test)

# Per separare multiclasse o monoclasse
if  len(np.unique(y_test)) > 2:
    y_proba = softmax(model.decision_function(features_test), axis=1)
else:
    y_proba = softmax(model.decision_function(features_test), axis=0)

# Visualizzazione dei risultati
print("Predizioni nel test set:", y_pred)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_proba)
print("Metriche di valutazione:\n", metrics)

features_train:          0         1      2         3      4         5      6         7      \
0    0.000000 -0.506472  0.116  3.032165  0.264  1.902368  0.605  2.077979   
1    0.000000 -0.506838  0.116  3.042692  0.264  1.903324  0.605  2.079800   
2    0.000000 -0.505849  0.116  3.041053  0.264  1.904561  0.605  2.081079   
3    0.000000 -0.506395  0.116  3.044194  0.264  1.903279  0.605  2.078549   
4    0.000000 -0.506292  0.116  3.041560  0.264  1.905488  0.605  2.078733   
..        ...       ...    ...       ...    ...       ...    ...       ...   
342  0.070833  1.137780  0.192  5.092749  0.384  9.027847  1.000  5.501685   
343  0.004167  0.001192  0.480  1.313994  0.432  4.937717  0.700  4.773400   
344  0.000000 -0.817197  0.060  3.949451  0.220  1.828851  0.580  0.880673   
345  0.016667  0.545645  0.236  4.010215  0.436  8.576636  1.000  4.302028   
346  0.000000 -0.620669  0.060  2.056262  0.236  1.762436  0.640  1.574257   

        8         9      ...  19990     19991 

In [31]:
print("features_train: ", features_train.shape)
print("y_train: ", y_train.shape)

features_train:  (53, 20000)
y_train:  (0,)


# Test Rocket su NASA

### ROCKET con NASA -> Unsupervised

In [None]:
import numpy as np
import pandas as pd
from sktime.transformations.panel.rocket import Rocket

from pyod.models.knn import KNN

from NASA.nasa import NASA
from valutazione_metriche import evaluate_metrics
from sklearn.metrics import roc_auc_score, average_precision_score, precision_recall_fscore_support

RANDOM_STATE = 42
STEP = 250
OFFSET = 20
OUTPUT_FILE = "risultatiNASA_ROCKET.csv"

# ======================= ELABORAZIONE DATI TRAINING =============================

def evaluate_metrics(y_test, y_pred, y_proba=None, digits=3):
    res = {}
    res["Accuracy"] = (y_test == y_pred).mean().round(digits)
    precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred, average="binary", zero_division=0)
    res["Precision"] = round(precision, digits)
    res["Recall"] = round(recall, digits)
    res["F1"] = round(f1, digits)

    # Verifica per AUC solo se ci sono entrambe le classi
    if y_proba is not None:
        unique_classes = np.unique(y_test)
        if len(unique_classes) > 1:
            res["AUC_PR"] = round(average_precision_score(y_test, y_proba), digits)
            res["AUC_ROC"] = round(roc_auc_score(y_test, y_proba), digits)
        else:
            res["AUC_PR"] = None
            res["AUC_ROC"] = None
    return res

results_df = pd.DataFrame(columns=["Channel", "Accuracy", "Precision", "Recall", "F1", "MCC", "AUC_ROC", "AUC_PR"])

# ======================= ELABORAZIONE DATI TRAINING =============================
# Itera su tutti i canali del dataset
for channel_id in NASA.channel_ids:
    # if channel_id == "D-12" or channel_id == "T-10" or channel_id == "T-9":   # Non uso questi perchè NeirestNeigthbor dato che necessita di avere più 
    #     continue
    if channel_id == "T-10":
        continue
    print(f"Processing channel: {channel_id}")

    # Lista per memorizzare i segmenti di training
    X_train_final = []

    # Uso del dataset NASA per tutti i canali
    dataset = NASA("./datasets", channel_id, mode="anomaly")
    # print(dataset.data.shape)
    data = dataset.data
    train = []
    for i in range(0, data.shape[0] - STEP +1, OFFSET): 
        train.append(data[i:i+STEP])

    train = np.stack(train)
    # print("train: ", train.shape)  # Mostra le prime 5 righe dell'array

    # ======================= FIT e PREDICT e SCORE =============================
    dataset = NASA("./datasets", channel_id, mode="anomaly", train=False)
    data = dataset.data
    Test = []
    output = []
    o = np.zeros(data.shape[0])
    for start,end in dataset.anomalies:
        o[start:end] = 1
    for i in range(0, data.shape[0] - STEP +1, STEP): 
        Test.append(data[i:i+STEP])
        output.append(o[i:i+STEP])

    output = np.stack(output)
    Test = np.stack(Test)

    num_kernels = 10000
    rocket_transformer = Rocket(num_kernels = num_kernels, n_jobs=-1)


    # Applica i kernel alle serie temporali
    features_train = rocket_transformer.fit_transform(train)
    features_test = rocket_transformer.transform(Test)

    # Addestramento del modello supervisionato
    # Addestramento del modello supervisionato
    model = KNN(n_neighbors=2)
    model.fit(features_train)

    # Predizione delle anomalie nei dati di test
    y_pred = model.predict(features_test)
    y_proba = model.decision_function(features_test)

    # Visualizzazione dei risultati
    # print("Predizioni nel test set:", y_pred)
    # print("Predizioni nel test set:", y_proba)

    # Scegliere se una sequenda è un anomalia o no -> 10%
    threshold = 25 # -> 10%
    # Conta il numero di 1 in ogni lista
    counts = np.sum(output, axis=1)
    output = np.where(counts >= threshold, 1, 0)


    print("Output: ", output)
    print("predicted_anomalies: ", y_pred)
    metrics = evaluate_metrics(output, y_pred, y_proba)
    print("Metriche di valutazione:\n", metrics)

    # Calcolo di VP, VN, FP, FN
    TP = ((output == 1) & (y_pred == 1)).sum()
    TN = ((output == 0) & (y_pred == 0)).sum()
    FP = ((output == 0) & (y_pred == 1)).sum()
    FN = ((output == 1) & (y_pred == 0)).sum()

    results_df = pd.concat([results_df, pd.DataFrame([{
        "Channel": channel_id,
        "Accuracy": metrics.get("Accuracy", 0),
        "Precision": metrics.get("Precision", 0),
        "Recall": metrics.get("Recall", 0),
        "MCC": metrics.get("MCC", 0),
        "AUC_PR": metrics.get("AUC_PR", 0),
        "AUC_ROC": metrics.get("AUC_ROC", 0),
        "F1": metrics.get("F1", 0),
        "TP": TP,
        "TN": TN,
        "FP": FP,
        "FN": FN,
    }])], ignore_index=True)

    print("=========================FINE CHANNEL=============================")
# ======================= SALVATAGGIO RISULTATI =============================
results_df.to_csv(OUTPUT_FILE, index=False)
print(f"Risultati salvati in {OUTPUT_FILE}")

Processing channel: A-1
Output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
predicted_anomalies:  [0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1 1 0 0 0 0 1]
Metriche di valutazione:
 {'Accuracy': 0.735, 'Precision': 0.1, 'Recall': 1.0, 'F1': 0.182, 'AUC_PR': 0.333, 'AUC_ROC': 0.939}
Processing channel: A-2


  results_df = pd.concat([results_df, pd.DataFrame([{


Output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0]
predicted_anomalies:  [1 1 1 1 1 0 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.161, 'Precision': 0.071, 'Recall': 1.0, 'F1': 0.133, 'AUC_PR': 0.571, 'AUC_ROC': 0.793}
Processing channel: A-3
Output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
predicted_anomalies:  [0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.938, 'Precision': 0.333, 'Recall': 1.0, 'F1': 0.5, 'AUC_PR': 1.0, 'AUC_ROC': 1.0}
Processing channel: A-4
Output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]
predicted_anomalies:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.969, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'AUC_PR': 0.5, 'AUC_ROC': 0.968}
Processing channel: A-5
Output:  [0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]
predicted_anomalies: 

  results_df = pd.concat([results_df, pd.DataFrame([{


Output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0]
predicted_anomalies:  [0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.938, 'Precision': 0.833, 'Recall': 0.833, 'F1': 0.833, 'AUC_PR': 0.866, 'AUC_ROC': 0.84}
Processing channel: P-3
Output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 0 0]
predicted_anomalies:  [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Metriche di valutazione:
 {'Accuracy': 0.182, 'Precision': 0.182, 'Recall': 1.0, 'F1': 0.308, 'AUC_PR': 0.958, 'AUC_ROC': 0.988}
Processing channel: P-4
Output:  [0 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
predicted_anomalies:  [0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.871, 'Precision': 0.667, 'Recall': 0.4, 'F1': 0.5, 'AUC_PR': 0.673, 'AUC_ROC': 0.923}
Processing channel: P-7
Output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1

  results_df = pd.concat([results_df, pd.DataFrame([{


Output:  [0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
predicted_anomalies:  [1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.735, 'Precision': 0.333, 'Recall': 0.125, 'F1': 0.182, 'AUC_PR': 0.331, 'AUC_ROC': 0.481}
Processing channel: T-12
Output:  [0 0 1 0 0 0 0 0 0]
predicted_anomalies:  [0 1 0 0 0 1 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.667, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'AUC_PR': 0.25, 'AUC_ROC': 0.625}
Processing channel: T-13
Output:  [0 0 1 1 0 0 0 1 1]
predicted_anomalies:  [0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.556, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'AUC_PR': 0.51, 'AUC_ROC': 0.45}
Processing channel: T-2
Output:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1]
predicted_anomalies:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.794, 'Precision': 0.0, 

In [45]:
from io import StringIO
df = pd.read_csv("risultatiNASA_ROCKET.csv")

# Calcola la media delle colonne numeriche
column_means = df.mean(numeric_only=True)

# Stampa le medie
print("Medie delle colonne numeriche:")
print(column_means)

print("========= CALCOLI CON TP...")
df = pd.read_csv(file_path)
TP = df["TP"].sum()
TN = df["TN"].sum()
FP = df["FP"].sum()
FN = df["FN"].sum()

accuracy = (TP + TN) / (TP + TN + FP + FN)
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)
mcc = (TP*TN-FP*FN)/(math.sqrt((TP + FP)*(TP+FN)*(TN+TP)*(TN+FN)))


print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1_score)
print("MCC:", mcc)

Medie delle colonne numeriche:
Accuracy      0.725074
Precision     0.307284
Recall        0.548975
F1            0.338012
MCC           0.000000
AUC_ROC       0.751304
AUC_PR        0.575190
TP            2.024691
TN           15.987654
FP            4.802469
FN            1.901235
dtype: float64
Accuracy: 0.6003996003996004
Precision: 0.09847198641765705
Recall: 0.17737003058103976
F1 Score: 0.12663755458515283
MCC: -0.13373121449680916


#### Senza KNN

In [15]:
import numpy as np
import pandas as pd

from pyod.models.knn import KNN

from NASA.nasa import NASA
from valutazione_metriche import evaluate_metrics
from rocket_functions import generate_kernels, apply_kernels

RANDOM_STATE = 42
STEP = 250

# ======================= ELABORAZIONE DATI TRAINING =============================

# Lista per memorizzare i segmenti di training
X_train_final = []

# Uso del dataset NASA per tutti i canali
dataset = NASA("./datasets", NASA.channel_ids[1], mode="anomaly")
print(dataset.data.shape)
data = dataset.data
train = []
for i in range(0, data.shape[0] - STEP +1, STEP): 
    train.append(data[i:i+STEP])

train = np.stack(train)


# ======================= ELABORAZIONE DATI TEST =============================

dataset = NASA("./datasets", NASA.channel_ids[1], mode="anomaly", train=False)
data = dataset.data
Test = []
output = []
o = np.zeros(data.shape[0])
for start,end in dataset.anomalies:
    o[start:end] = 1
for i in range(0, data.shape[0] - STEP +1, STEP): 
    Test.append(data[i:i+STEP])
    output.append(o[i:i+STEP])

output = np.stack(output)
Test = np.stack(Test)


# ======================= PRE-PROCESSING =============================

# scaler = StandardScaler()
# X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[2])).reshape(X_train.shape)
# X_test = scaler.transform(X_test.reshape(-1, X_test.shape[2])).reshape(X_test.shape)

# ======================= FIT e PREDICT e SCORE =============================
# input_length = train.shape[0]
num_kernels = 10000

train = train.reshape(train.shape[0], -1)  # Da 3D a 2D
Test = Test.reshape(Test.shape[0], -1)
print("train: ", train.shape) 
print("TEST: ", Test.shape)

kernels = generate_kernels(STEP, num_kernels)

train = train.astype(np.float64)
features_train = apply_kernels(train, kernels)

Test = Test.astype(np.float64)
features_test = apply_kernels(Test, kernels)


# Applica i kernel alle serie temporali
features_train = apply_kernels(train, kernels)

features_test = apply_kernels(Test, kernels)



# RImozioni valori infiniti
features_train = np.nan_to_num(features_train, nan=0.0, posinf=np.finfo(np.float32).max, neginf=np.finfo(np.float32).min)
features_test= np.nan_to_num(features_test, nan=0.0, posinf=np.finfo(np.float32).max, neginf=np.finfo(np.float32).min)

# Sintesi delle caratteristiche per esempio
anomaly_scores_train = np.mean(features_train, axis=1)  
anomaly_scores_test = np.mean(features_test, axis=1)  

# Rilevamento delle anomalie
threshold = np.percentile(anomaly_scores_train , 95)
anomaly_labels_train = detect_anomalies_with_threshold(anomaly_scores_train , threshold)
anomaly_labels_test = detect_anomalies_with_threshold(anomaly_scores_test , threshold)

# Scegliere se una sequenda è un anomalia o no -> 10%
threshold = 25 # -> 10%
# Conta il numero di 1 in ogni lista
counts = np.sum(output, axis=1)
output = np.where(counts >= threshold, 1, 0)


print(output)
metrics = evaluate_metrics(output, anomaly_labels_test)
print("Metriche di valutazione:\n", metrics)


(2648, 25)
train:  (10, 6250)
TEST:  (31, 6250)
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0]
Metriche di valutazione:
 {'Accuracy': 0.935, 'Precision': 0.0, 'Recall': 0.0, 'F1': 0.0, 'MCC': 0.0}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


# Finale Prove ROCKAD -> ModelSelection

In [66]:
# ======================= ELABORAZIONE DATI TRAINING =============================
X_train_final = []
y_train_final = []

for channel in dfSegment["channel"].unique():
    for segment in dfSegment[dfSegment["channel"] == channel]["segment"].unique():
        mask = (dfSegment["train"] == 1) & (dfSegment["channel"] == channel) & (dfSegment["segment"] == segment)
        X_trainS = dfSegment.loc[mask, "value"]
        y_trainS = dfSegment.loc[mask, "anomaly"].reset_index(drop=True).values
        
        for i in range(0, len(X_trainS) - STEP + 1, STEP):
            X_train_final.append(X_trainS[i:i + STEP])
            y_train_final.append(y_trainS[i])

X_train = np.array(X_train_final).reshape(-1, STEP, 1).transpose(0, 2, 1)
y_train = np.array(y_train_final)

# ======================= ELABORAZIONE DATI TEST =============================
X_test_final = []
y_test_final = []

for channel in dfSegment["channel"].unique():
    for segment in test_data[test_data["channel"] == channel]["segment"].unique():
        mask = (test_data["channel"] == channel) & (test_data["segment"] == segment)
        X_testS = test_data.loc[mask, "value"]
        y_testS = test_data.loc[mask, "anomaly"]
        
        for i in range(0, len(X_testS) - STEP + 1, STEP):
            X_test_final.append(X_testS[i:i + STEP])
            y_test_final.append(y_testS[i])

X_test = np.array(X_test_final).reshape(-1, STEP, 1).transpose(0, 2, 1)
y_test = np.array(y_test_final)

# ======================= PRE-PROCESSING =============================
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[2])).reshape(X_train.shape)
X_test = scaler.transform(X_test.reshape(-1, X_test.shape[2])).reshape(X_test.shape)

# ======================= FIT e PREDICT e SCORE =============================
rockad = ROCKAD(n_estimators=10, n_kernels=20000, n_jobs=-1, random_state=RANDOM_STATE, power_transform=False)
rockad.fit(X_train)

score_train = rockad.predict_proba(X_train).reshape(-1, 1)
score_test = rockad.predict_proba(X_test).reshape(-1, 1)

decision_func = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
decision_func.fit(score_train, y_train)

result = decision_func.predict(score_test)
result_binary = np.where(result == -1, 0, 1)

metrics = evaluate_metrics(y_test, result_binary, score_test)
print("Metriche di valutazione:\n", metrics)


  y_test_final.append(y_testS[i])


Metriche di valutazione:
 {'Accuracy': 0.477, 'Precision': 0.477, 'Recall': 1.0, 'F1': 0.646, 'MCC': 0.0, 'AUC_PR': 0.405, 'AUC_ROC': 0.299, 'PREC_N_SCORES': 0.29}
