In [3]:
import os
import random
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, matthews_corrcoef, average_precision_score
from pyod.utils.data import precision_n_scores
from pyod.models.iforest import IForest
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
# Per l'uso della memoria degli algoritmi
from memory_profiler import memory_usage
# Per la metrica sul tempo di Addestramento e Inferenza
import time

In [4]:
def evaluate_metrics(y_test, y_pred, y_proba=None, digits=3):
    res = {"Accuracy": round(accuracy_score(y_test, y_pred), digits),
           "Precision": precision_score(y_test, y_pred).round(digits),
           "Recall": recall_score(y_test, y_pred).round(digits),
           "F1": f1_score(y_test, y_pred).round(digits),
           "MCC": round(matthews_corrcoef(y_test, y_pred), ndigits=digits)}
    if y_proba is not None:
        res["AUC_PR"] = average_precision_score(y_test, y_proba).round(digits)
        res["AUC_ROC"] = roc_auc_score(y_test, y_proba).round(digits)
        res["PREC_N_SCORES"] = precision_n_scores(y_test, y_proba).round(digits)
    return res


def set_seed_numpy(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)

In [5]:
features = [
    "mean", "var", "std", "len", "duration", "len_weighted", "gaps_squared", "n_peaks",
    "smooth10_n_peaks", "smooth20_n_peaks", "var_div_duration", "var_div_len",
    "diff_peaks", "diff2_peaks", "diff_var", "diff2_var", "kurtosis", "skew",
]
SEED = 2137

In [6]:
df = pd.read_csv("data/dataset.csv", index_col="segment")

X_train, y_train = df.loc[df.train==1, features], df.loc[df.train==1, "anomaly"]
X_test, y_test = df.loc[df.train==0, features], df.loc[df.train==0, "anomaly"]
X_train_nominal = df.loc[(df.anomaly==0)&(df.train==1), features]

prep = StandardScaler()
X_train_nominal2 = prep.fit_transform(X_train_nominal)
X_train2 = prep.transform(X_train)
X_test2 = prep.transform(X_test)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
set_seed_numpy(SEED) 

# Supervised Model

In [8]:
model = AdaBoostClassifier(random_state=SEED)
model.fit(X_train2, y_train)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))



AdaBoostClassifier(random_state=2137) 
 {'Accuracy': 0.934, 'Precision': 0.89, 'Recall': 0.788, 'F1': 0.836, 'MCC': 0.797, 'AUC_PR': 0.923, 'AUC_ROC': 0.962, 'PREC_N_SCORES': 0.841}


In [9]:
import xgboost as xgb

y_train_np = y_train

model = xgb.XGBClassifier (
    n_estimators=50,
    max_depth=3,
    learning_rate=0.1,
    random_state=SEED
)
model.fit(X_train, y_train)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test)
y_predicted_score = model.predict_proba(X_test)[:, 1]  # Probabilità per la classe positiva
# Questa è la probabilità che la classificazione sia corretta

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=50, n_jobs=None,
              num_parallel_tree=None, random_state=2137, ...) 
 {'Accuracy': 0.957, 'Precision': 0.959, 'Recall': 0.832, 'F1': 0.891, 'MCC': 0.867, 'AUC_PR': 0.961, 'AUC_ROC': 0.986, 'PREC_N_SCORES': 0.876}


In [10]:
import xgboost as xgb

y_train_np = y_train

model = xgb.XGBClassifier (
    n_estimators=50,
    max_depth=3,
    learning_rate=0.1,
    random_state=SEED
)
model.fit(X_train_scaled, y_train)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test_scaled)
y_predicted_score = model.predict_proba(X_test_scaled)[:, 1]  # Probabilità per la classe positiva
# Questa è la probabilità che la classificazione sia corretta

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=3, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              multi_strategy=None, n_estimators=50, n_jobs=None,
              num_parallel_tree=None, random_state=2137, ...) 
 {'Accuracy': 0.953, 'Precision': 0.94, 'Recall': 0.832, 'F1': 0.883, 'MCC': 0.856, 'AUC_PR': 0.949, 'AUC_ROC': 0.976, 'PREC_N_SCORES': 0.867}


In [11]:
from sklearn.svm import LinearSVC

# Inizializza e addestra il modello
model = LinearSVC()
model.fit(X_train2, y_train)

# Predizione
y_test_scores = model.decision_function(X_test2)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test2)

# Questa è la probabilità che la classificazione sia corretta
print(evaluate_metrics(y_test, y_predicted, y_predicted_score))

{'Accuracy': 0.928, 'Precision': 0.921, 'Recall': 0.726, 'F1': 0.812, 'MCC': 0.777, 'AUC_PR': 0.949, 'AUC_ROC': 0.976, 'PREC_N_SCORES': 0.867}


In [12]:
from sklearn.linear_model import LogisticRegression

# Inizializza e addestra il modello
model = LogisticRegression(max_iter=500)
model.fit(X_train2, y_train)

# Predizione
y_test_scores = model.decision_function(X_test2)
# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test2)

# Questa è la probabilità che la classificazione sia corretta
print(evaluate_metrics(y_test, y_predicted, y_predicted_score))

{'Accuracy': 0.924, 'Precision': 0.92, 'Recall': 0.708, 'F1': 0.8, 'MCC': 0.764, 'AUC_PR': 0.949, 'AUC_ROC': 0.976, 'PREC_N_SCORES': 0.867}


## Unsupervised Model

MO_GAAL

In [13]:
from pyod.models.mo_gaal import MO_GAAL
import os
os.environ['TF_USE_LEGACY_KERAS'] = 'True'

model = MO_GAAL()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))
 # {'Accuracy': 0.896, 'Precision': 0.939, 'Recall': 0.549, 'F1': 0.693, 'MCC': 0.669, 'AUC_PR': 0.771, 'AUC_ROC': 0.849, 'PREC_N_SCORES': 0.699}

KeyboardInterrupt: 

ANO-GAAL

Non funzionante

In [None]:
import os
os.environ["TF_USE_LEGACY_KERAS"] = "True"

# Ora importa PyOD e usa AnoGAN come prima
from pyod.models.anogan import AnoGAN
import tensorflow as tf

model = AnoGAN(verbose=1) # per stampare più cose
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))


Train iter: 1
Train iter: 2
Train iter: 3
Train iter: 4
Train iter: 5
Train iter: 6
Train iter: 7
Train iter: 8
Train iter: 9
Train iter: 10
Train iter: 11
Train iter: 12
Train iter: 13
Train iter: 14
Train iter: 15
Train iter: 16
Train iter: 17
Train iter: 18
Train iter: 19
Train iter: 20
Train iter: 21
Train iter: 22
Train iter: 23
Train iter: 24
Train iter: 25
Train iter: 26
Train iter: 27
Train iter: 28
Train iter: 29
Train iter: 30
Train iter: 31
Train iter: 32
Train iter: 33
Train iter: 34
Train iter: 35
Train iter: 36
Train iter: 37
Train iter: 38
Train iter: 39
Train iter: 40
Train iter: 41
Train iter: 42
Train iter: 43
Train iter: 44
Train iter: 45
Train iter: 46
Train iter: 47
Train iter: 48
Train iter: 49
Train iter: 50
Train iter: 51
Train iter: 52
Train iter: 53
Train iter: 54
Train iter: 55
Train iter: 56
Train iter: 57
Train iter: 58
Train iter: 59
Train iter: 60
Train iter: 61
Train iter: 62
Train iter: 63
Train iter: 64
Train iter: 65
Train iter: 66
Train iter: 67
Trai

KeyboardInterrupt: 

SO_GAAL

Non funzionante

In [None]:
from pyod.models.so_gaal import SO_GAAL

# Verifica le dimensioni dei dati generati
print("Dimensione X_train:", X_train.shape)
print("Dimensione y_train:", y_train.shape)
print("Dimensione X_test:", X_test.shape)
print("Dimensione y_test:", y_test.shape)

model = SO_GAAL()
model.fit(X_train2[:len(X_train2) // 500 * 500])

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

# Valutazione del modello
print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))


Dimensione X_train: (1594, 18)
Dimensione y_train: (1594,)
Dimensione X_test: (529, 18)
Dimensione y_test: (529,)
Epoch 1 of 60
Epoch 2 of 60
Epoch 3 of 60
Epoch 4 of 60
Epoch 5 of 60
Epoch 6 of 60
Epoch 7 of 60
Epoch 8 of 60
Epoch 9 of 60
Epoch 10 of 60
Epoch 11 of 60
Epoch 12 of 60
Epoch 13 of 60
Epoch 14 of 60
Epoch 15 of 60
Epoch 16 of 60
Epoch 17 of 60
Epoch 18 of 60
Epoch 19 of 60
Epoch 20 of 60
Epoch 21 of 60
Epoch 22 of 60
Epoch 23 of 60
Epoch 24 of 60
Epoch 25 of 60
Epoch 26 of 60
Epoch 27 of 60
Epoch 28 of 60
Epoch 29 of 60
Epoch 30 of 60
Epoch 31 of 60
Epoch 32 of 60
Epoch 33 of 60
Epoch 34 of 60
Epoch 35 of 60
Epoch 36 of 60
Epoch 37 of 60
Epoch 38 of 60
Epoch 39 of 60
Epoch 40 of 60
Epoch 41 of 60
Epoch 42 of 60
Epoch 43 of 60
Epoch 44 of 60
Epoch 45 of 60
Epoch 46 of 60
Epoch 47 of 60
Epoch 48 of 60
Epoch 49 of 60
Epoch 50 of 60
Epoch 51 of 60
Epoch 52 of 60
Epoch 53 of 60
Epoch 54 of 60
Epoch 55 of 60
Epoch 56 of 60
Epoch 57 of 60
Epoch 58 of 60
Epoch 59 of 60
Epoch 60 o

RF+ICCS

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Inizializza e addestra il modello
model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
model.fit(X_train, y_train)

# Previsioni e probabilità di previsione
y_predicted = model.predict(X_test)
# Predizione
y_test_scores = model.predict_proba(X_test)

# Questa è la probabilità che la classificazione sia corretta
print(evaluate_metrics(y_test, y_predicted, y_predicted_score))

NameError: name 'X_train' is not defined

Linear+L2

In [None]:
from sklearn.linear_model import RidgeClassifier

# Inizializza e addestra il modello Ridge Classifier (Linear + L2)
model = RidgeClassifier(alpha=1.0)  # 'alpha' è il parametro di regolarizzazione L2
model.fit(X_train, y_train)

# Predizione delle etichette di classe
y_predicted = model.predict(X_test)

# Ottieni le probabilità della classe positiva per AUC (si utilizza decision_function per ottenere punteggi di decisione)
y_test_scores = model.decision_function(X_test)

# Calcola e stampa le metriche
metrics = evaluate_metrics(y_test, y_predicted, y_test_scores)
print(metrics)


{'Accuracy': 0.902, 'Precision': 0.969, 'Recall': 0.558, 'F1': 0.708, 'MCC': 0.69, 'AUC_PR': 0.889, 'AUC_ROC': 0.95, 'PREC_N_SCORES': 0.814}


Isolation Forest

In [None]:
model = IForest(random_state=SEED, contamination=.2)
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

IForest(behaviour='old', bootstrap=False, contamination=0.2, max_features=1.0,
    max_samples='auto', n_estimators=100, n_jobs=1, random_state=2137,
    verbose=0) 
 {'Accuracy': 0.701, 'Precision': 0.297, 'Recall': 0.292, 'F1': 0.295, 'MCC': 0.105, 'AUC_PR': 0.347, 'AUC_ROC': 0.635, 'PREC_N_SCORES': 0.301}


KNN

In [None]:
from pyod.models.knn import KNN

model = KNN()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0) 
 {'Accuracy': 0.849, 'Precision': 0.78, 'Recall': 0.407, 'F1': 0.535, 'MCC': 0.489, 'AUC_PR': 0.658, 'AUC_ROC': 0.852, 'PREC_N_SCORES': 0.593}


OCSVM

In [None]:
from pyod.models.ocsvm import OCSVM

model = OCSVM()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

OCSVM(cache_size=200, coef0=0.0, contamination=0.1, degree=3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False) 
 {'Accuracy': 0.837, 'Precision': 0.721, 'Recall': 0.389, 'F1': 0.506, 'MCC': 0.447, 'AUC_PR': 0.659, 'AUC_ROC': 0.788, 'PREC_N_SCORES': 0.655}


ABOD

In [None]:
from pyod.models.abod import ABOD

model = ABOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

ABOD(contamination=0.1, method='fast', n_neighbors=5) 
 {'Accuracy': 0.845, 'Precision': 0.782, 'Recall': 0.381, 'F1': 0.512, 'MCC': 0.472, 'AUC_PR': 0.644, 'AUC_ROC': 0.843, 'PREC_N_SCORES': 0.584}


INNE

In [None]:
from pyod.models.inne import INNE

model = INNE()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

INNE(contamination=0.1, max_samples='auto', n_estimators=200,
   random_state=None) 
 {'Accuracy': 0.832, 'Precision': 0.694, 'Recall': 0.381, 'F1': 0.491, 'MCC': 0.427, 'AUC_PR': 0.636, 'AUC_ROC': 0.805, 'PREC_N_SCORES': 0.655}


ALAD

In [None]:
from pyod.models.alad import ALAD

model = ALAD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

ALAD(activation_hidden_disc='tanh', activation_hidden_gen='tanh',
   add_disc_zz_loss=True, add_recon_loss=False, batch_size=32,
   contamination=0.1, dec_layers=[5, 10, 25], device=device(type='cpu'),
   disc_xx_layers=[25, 10, 5], disc_xz_layers=[25, 10, 5],
   disc_zz_layers=[25, 10, 5], dropout_rate=0.2, enc_layers=[25, 10, 5],
   epochs=200, lambda_recon_loss=0.1, latent_dim=2,
   learning_rate_disc=0.0001, learning_rate_gen=0.0001,
   output_activation=None, preprocessing=False,
   spectral_normalization=False, verbose=0) 
 {'Accuracy': 0.783, 'Precision': 0.485, 'Recall': 0.283, 'F1': 0.358, 'MCC': 0.25, 'AUC_PR': 0.426, 'AUC_ROC': 0.626, 'PREC_N_SCORES': 0.407}


LMDD

In [None]:
from pyod.models.lmdd import LMDD

model = LMDD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

LMDD(contamination=0.1, dis_measure='aad', n_iter=50, random_state=None) 
 {'Accuracy': 0.822, 'Precision': 1.0, 'Recall': 0.168, 'F1': 0.288, 'MCC': 0.37, 'AUC_PR': 0.624, 'AUC_ROC': 0.765, 'PREC_N_SCORES': 0.663}


SOD

In [None]:
from pyod.models.sod import SOD

model = SOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

SOD(alpha=0.8, contamination=0.1, n_neighbors=20, ref_set=10) 
 {'Accuracy': 0.826, 'Precision': 0.611, 'Recall': 0.513, 'F1': 0.558, 'MCC': 0.453, 'AUC_PR': 0.621, 'AUC_ROC': 0.797, 'PREC_N_SCORES': 0.549}


COF

In [None]:
from pyod.models.cof import COF

model = COF()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

COF(contamination=0.1, method='fast', n_neighbors=20) 
 {'Accuracy': 0.834, 'Precision': 0.667, 'Recall': 0.442, 'F1': 0.532, 'MCC': 0.449, 'AUC_PR': 0.603, 'AUC_ROC': 0.774, 'PREC_N_SCORES': 0.593}


LODA

In [None]:
from pyod.models.loda import LODA

model = LODA()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

LODA(contamination=0.1, n_bins=10, n_random_cuts=100) 
 {'Accuracy': 0.83, 'Precision': 0.689, 'Recall': 0.372, 'F1': 0.483, 'MCC': 0.418, 'AUC_PR': 0.549, 'AUC_ROC': 0.692, 'PREC_N_SCORES': 0.522}


LUNAR

In [None]:
from pyod.models.lunar import LUNAR

model = LUNAR()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

LUNAR(contamination=0.1, epsilon=0.1, lr=0.001, model_type='WEIGHT',
   n_epochs=200, n_neighbours=5, negative_sampling='MIXED', proportion=1.0,
   scaler=MinMaxScaler(), val_size=0.1, verbose=0, wd=0.1) 
 {'Accuracy': 0.815, 'Precision': 0.742, 'Recall': 0.204, 'F1': 0.319, 'MCC': 0.322, 'AUC_PR': 0.539, 'AUC_ROC': 0.796, 'PREC_N_SCORES': 0.451}


CBLOF

In [None]:
from pyod.models.cblof import CBLOF

model = CBLOF()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

CBLOF(alpha=0.9, beta=5, check_estimator=False, clustering_estimator=None,
   contamination=0.1, n_clusters=8, n_jobs=None, random_state=None,
   use_weights=False) 
 {'Accuracy': 0.802, 'Precision': 0.569, 'Recall': 0.292, 'F1': 0.386, 'MCC': 0.304, 'AUC_PR': 0.45, 'AUC_ROC': 0.574, 'PREC_N_SCORES': 0.372}


DIF

In [None]:
from pyod.models.dif import DIF

model = DIF()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.predict_proba(X_test2)[:,1]

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

DIF(batch_size=1000, contamination=0.1, device=device(type='cpu'),
  hidden_activation='tanh', hidden_neurons=[500, 100], max_samples=256,
  n_ensemble=50, n_estimators=6, random_state=None, representation_dim=20,
  skip_connection=False) 
 {'Accuracy': 0.786, 'Precision': 0.5, 'Recall': 0.009, 'F1': 0.017, 'MCC': 0.043, 'AUC_PR': 0.541, 'AUC_ROC': 0.836, 'PREC_N_SCORES': 0.584}


VAE

In [None]:
from pyod.models.vae import VAE

model = VAE()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

Training:   0%|          | 0/30 [00:00<?, ?it/s]

Training: 100%|██████████| 30/30 [00:11<00:00,  2.57it/s]


VAE(batch_norm=False, batch_size=32, beta=1.0, capacity=0.0,
  compile_mode='default', contamination=0.1,
  decoder_neuron_list=[32, 64, 128], device=device(type='cpu'),
  dropout_rate=0.2, encoder_neuron_list=[128, 64, 32], epoch_num=30,
  hidden_activation_name='relu', latent_dim=2, lr=0.001,
  optimizer_name='adam', optimizer_params={'weight_decay': 1e-05},
  output_activation_name='sigmoid', preprocessing=True, random_state=42,
  use_compile=False, verbose=1) 
 {'Accuracy': 0.794, 'Precision': 0.532, 'Recall': 0.292, 'F1': 0.377, 'MCC': 0.283, 'AUC_PR': 0.446, 'AUC_ROC': 0.687, 'PREC_N_SCORES': 0.513}


GMM

In [None]:
from pyod.models.gmm import GMM

model = GMM()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

GMM(contamination=0.1, covariance_type='full', init_params='kmeans',
  max_iter=100, means_init=None, n_components=1, n_init=1,
  precisions_init=None, random_state=None, reg_covar=1e-06, tol=0.001,
  warm_start=False, weights_init=None) 
 {'Accuracy': 0.783, 'Precision': 0.482, 'Recall': 0.239, 'F1': 0.32, 'MCC': 0.225, 'AUC_PR': 0.426, 'AUC_ROC': 0.713, 'PREC_N_SCORES': 0.389}


DeepSVDD

In [None]:
from pyod.models.deep_svdd import DeepSVDD

# Determina il numero di feature
n_features = X_train2.shape[1]

model = DeepSVDD(n_features=n_features)
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

Epoch 1/100, Loss: 36.17359483242035
Epoch 2/100, Loss: 36.19166633486748
Epoch 3/100, Loss: 36.2466336786747
Epoch 4/100, Loss: 36.13528761267662
Epoch 5/100, Loss: 36.165921211242676
Epoch 6/100, Loss: 36.13916572928429
Epoch 7/100, Loss: 36.189294904470444
Epoch 8/100, Loss: 36.17238187789917
Epoch 9/100, Loss: 36.2117395401001
Epoch 10/100, Loss: 36.185857594013214
Epoch 11/100, Loss: 36.13321906328201
Epoch 12/100, Loss: 36.1584706902504
Epoch 13/100, Loss: 36.17630282044411
Epoch 14/100, Loss: 36.17380636930466
Epoch 15/100, Loss: 36.25334322452545
Epoch 16/100, Loss: 36.1712027490139
Epoch 17/100, Loss: 36.12485006451607
Epoch 18/100, Loss: 36.4436274766922
Epoch 19/100, Loss: 36.22374951839447
Epoch 20/100, Loss: 36.2115415930748
Epoch 21/100, Loss: 36.16678577661514
Epoch 22/100, Loss: 36.20809951424599
Epoch 23/100, Loss: 36.228652626276016
Epoch 24/100, Loss: 36.154085248708725
Epoch 25/100, Loss: 36.138443648815155
Epoch 26/100, Loss: 36.5161928832531
Epoch 27/100, Loss: 36

PCA

In [None]:
from pyod.models.pca import PCA

model = PCA()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

PCA(contamination=0.1, copy=True, iterated_power='auto', n_components=None,
  n_selected_components=None, random_state=None, standardization=True,
  svd_solver='auto', tol=0.0, weighted=True, whiten=False) 
 {'Accuracy': 0.779, 'Precision': 0.464, 'Recall': 0.23, 'F1': 0.308, 'MCC': 0.21, 'AUC_PR': 0.373, 'AUC_ROC': 0.612, 'PREC_N_SCORES': 0.363}


COPOD

In [None]:
from pyod.models.copod import COPOD

model = COPOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

COPOD(contamination=0.1, n_jobs=1) 
 {'Accuracy': 0.767, 'Precision': 0.4, 'Recall': 0.177, 'F1': 0.245, 'MCC': 0.147, 'AUC_PR': 0.328, 'AUC_ROC': 0.627, 'PREC_N_SCORES': 0.257}


SOS

In [None]:
from pyod.models.sos import SOS

model = SOS()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

SOS(contamination=0.1, eps=1e-05, metric='euclidean', perplexity=4.5) 
 {'Accuracy': 0.758, 'Precision': 0.364, 'Recall': 0.177, 'F1': 0.238, 'MCC': 0.125, 'AUC_PR': 0.308, 'AUC_ROC': 0.524, 'PREC_N_SCORES': 0.274}


ECOD

In [None]:
from pyod.models.ecod import ECOD

model = ECOD()
model.fit(X_train2)

y_predicted = model.predict(X_test2)
y_predicted_score = model.decision_function(X_test2)

print(model, '\n', evaluate_metrics(y_test, y_predicted, y_predicted_score))

ECOD(contamination=0.1, n_jobs=1) 
 {'Accuracy': 0.767, 'Precision': 0.396, 'Recall': 0.168, 'F1': 0.236, 'MCC': 0.14, 'AUC_PR': 0.34, 'AUC_ROC': 0.637, 'PREC_N_SCORES': 0.345}


## XGBOD

In [None]:
from pyod.models.xgbod import XGBOD

# Inizializza e addestra XGBOD
model = XGBOD()
model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)
# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)

#n_estimators=50,
#max_depth=3,
#learning_rate=0.1,
#random_state=SEED

Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=1, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=1, no...ax_features=1.0,
    max_samples='auto', n_estimators=200, n_jobs=1, random_state=0,
    verbose=0)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, F

#### Con metiche di Memoria e Tempo

In [None]:
import time
from memory_profiler import memory_usage
from pyod.models.xgbod import XGBOD

# Inizializza e addestra XGBOD
model = XGBOD(n_estimators=50, max_depth=3, learning_rate=0.1, random_state=SEED)

def train_model():
    start_time = time.time()
    mem_usage = memory_usage((model.fit, (X_train_scaled, y_train)))
    training_time = time.time() - start_time
    print(f"\n Tempo di addestramento: {training_time} secondi")
    print(f"Uso della memoria durante l'addestramento: {max(mem_usage)} MiB")
    return training_time, mem_usage

def inference_model():
    start_time = time.time()
    mem_usage_inference = memory_usage((model.predict, (X_test_scaled,)))
    inference_time = time.time() - start_time
    y_pred = model.predict(X_test_scaled)
    print(f"\n Tempo di inferenza: {inference_time} secondi")
    print(f"Uso della memoria durante l'inferenza: {max(mem_usage_inference)} MiB")
    return y_pred, inference_time, mem_usage_inference



### XGBOD più modelli unsupervised

In [None]:
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]
# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models)

model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)
# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)

Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, n...3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True], subsample=1) {'Accuracy': 0.968, 'Precision': 0.944, 'Recall': 0.903, 'F1': 0.923, 'MCC': 0.903, 'AUC_PR': 0.974, 'AUC_ROC': 0.991, 'PREC_N_SCORES': 0.92}


#### Con Metriche di Tempo e Memoria

In [None]:
import time
from memory_profiler import memory_usage
from pyod.models.xgbod import XGBOD

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]
# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models)

def train_model():
    start_time = time.time()
    mem_usage = memory_usage((model.fit, (X_train_scaled, y_train)))
    training_time = time.time() - start_time
    print(f"\n Tempo di addestramento: {training_time} secondi")
    print(f"Uso della memoria durante l'addestramento: {max(mem_usage)} MiB")
    return training_time, mem_usage

def inference_model():
    start_time = time.time()
    mem_usage_inference = memory_usage((model.predict, (X_test_scaled,)))
    inference_time = time.time() - start_time
    y_pred = model.predict(X_test_scaled)
    print(f"\n Tempo di inferenza: {inference_time} secondi")
    print(f"Uso della memoria durante l'inferenza: {max(mem_usage_inference)} MiB")
    return y_pred, inference_time, mem_usage_inference

# Addestramento del modello e monitoraggio delle metriche di efficientamento
training_time, mem_usage = train_model()

# Inferenza del modello e monitoraggio delle metriche di efficientamento
y_pred, inference_time, mem_usage_inference = inference_model()

# Calcola i punteggi di decisione
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche con le nuove metriche di efficientamento
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.3419463634490967 secondi
Uso della memoria durante l'addestramento: 815.8125 MiB

 Tempo di inferenza: 1.605494499206543 secondi
Uso della memoria durante l'inferenza: 815.79296875 MiB
XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, n...3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True], su

### XGBOD più modelli unsupervised e Parametri

In [None]:
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]

# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models,
              n_estimators=100,
              max_depth=3,
              learning_rate=0.2,
              n_jobs=-1,
              random_state=SEED
            )

model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)
print("")
print(metrics)

Parameters: { "silent" } are not used.




{'Accuracy': 0.97, 'Precision': 0.945, 'Recall': 0.912, 'F1': 0.928, 'MCC': 0.909, 'AUC_PR': 0.973, 'AUC_ROC': 0.992, 'PREC_N_SCORES': 0.92}


#### Con Metriche di Tempo e Memoria

In [None]:
import time
from memory_profiler import memory_usage
from pyod.models.xgbod import XGBOD

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]
# Inizializza e addestra XGBOD
model = XGBOD(estimator_list=unsupervised_models, n_estimators=100, max_depth=3, learning_rate=0.2, random_state=SEED)

def train_model():
    start_time = time.time()
    mem_usage = memory_usage((model.fit, (X_train_scaled, y_train)))
    training_time = time.time() - start_time
    print(f"\n Tempo di addestramento: {training_time} secondi")
    print(f"Uso della memoria durante l'addestramento: {max(mem_usage)} MiB")
    return training_time, mem_usage

def inference_model():
    start_time = time.time()
    mem_usage_inference = memory_usage((model.predict, (X_test_scaled,)))
    inference_time = time.time() - start_time
    y_pred = model.predict(X_test_scaled)
    print(f"\n Tempo di inferenza: {inference_time} secondi")
    print(f"Uso della memoria durante l'inferenza: {max(mem_usage_inference)} MiB")
    return y_pred, inference_time, mem_usage_inference

# Addestramento del modello e monitoraggio delle metriche di efficientamento
training_time, mem_usage = train_model()

# Inferenza del modello e monitoraggio delle metriche di efficientamento
y_pred, inference_time, mem_usage_inference = inference_model()

# Calcola i punteggi di decisione
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche con le nuove metriche di efficientamento
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)


Parameters: { "silent" } are not used.




 Tempo di addestramento: 2.611022472381592 secondi
Uso della memoria durante l'addestramento: 816.11328125 MiB

 Tempo di inferenza: 1.9620587825775146 secondi
Uso della memoria durante l'inferenza: 816.078125 MiB
XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=5, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=20, n...3, gamma='auto',
   kernel='rbf', max_iter=-1, nu=0.5, shrinking=True, tol=0.001,
   verbose=False)],
   gamma=0, learning_rate=0.2, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=100, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=2137, reg_alpha=0,
   reg_lambda=1, scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True

### Early Stopping
Termina l'esecuzione anticipatamente se per un numero prestabilito di round non migliorano più i parametri

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.iforest import IForest
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]

# Divisione del dataset di allenamento per avere un set di validazione
X_train_sub, X_val, y_train_sub, y_val = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=SEED)

# Inizializzazione del modello
model = XGBOD(estimator_list=unsupervised_models, n_estimators=50, max_depth=3, learning_rate=0.2, n_jobs=-1, random_state=SEED)

best_score = -np.inf
patience = 10       # Numero di volte che il modello cercherà di migliorarsi
patience_counter = 0
n_iterations = 100      # Numero massimo di cicli del'allenamento

for i in range(n_iterations):  # Numero massimo di iterazioni
    model.fit(X_train_sub, y_train_sub)
    
    # Predizione sul set di validazione
    y_val_pred = model.predict(X_val)
    val_score = accuracy_score(y_val, y_val_pred)
    
    # Controllo early stopping
    if val_score > best_score:
        best_score = val_score
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"Early stopping at iteration {i}")
            break
    model.n_estimators += 1  # Incrementa il numero di stimatori per la prossima iterazione

# Predizione sul set di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)
print("")
print(metrics)


Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.



Early stopping at iteration 12

{'Accuracy': 0.97, 'Precision': 0.971, 'Recall': 0.885, 'F1': 0.926, 'MCC': 0.909, 'AUC_PR': 0.969, 'AUC_ROC': 0.99, 'PREC_N_SCORES': 0.912}


### XGBOD + ESN

In [None]:
import numpy as np
from reservoirpy.nodes import Reservoir, Ridge
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

# Definizione dei modelli unsupervised
unsupervised_models = [
    KNN(),
    LOF(),
    ABOD(),
    OCSVM()
]

# Creazione del reservoir
reservoir = Reservoir(units=1000, sr=0.95)  # sr: raggio spettrale
# Creazione del nodo di output per il readout
readout = Ridge(ridge=1e-5)
# Connessione del reservoir al readout per creare l'ESN
reservoir >> readout

# Pipeline di preprocessing
preprocessor = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler()),
    ('reservoir', reservoir)
])

# Trasformazione dei dati di addestramento e test con ESN
# Addestramento del modello
readout.fit(reservoir.run(X_train_scaled), X_train_scaled)  # Si allena il readout sugli stati del reservoir

# Predizione per il rilevamento di anomalie
X_train_transformed = reservoir.run(X_train_scaled)
X_test_transformed = reservoir.run(X_test_scaled)

# Creazione del modello XGBOD con parametri specificati
model = XGBOD(estimator_list=unsupervised_models, n_estimators=50, max_depth=3, learning_rate=0.1, n_jobs=-1, random_state=42)
# Uso le trasformazioni di ESN con il modello XGBOD
model.fit(X_train_transformed, y_train)

# Predizione sui dati di test
y_pred = model.predict(X_test_transformed)
y_predicted_score = model.decision_function(X_test_transformed)

# Valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(f"Model: {model}")
print(f"Metrics: {metrics}")


ModuleNotFoundError: No module named 'reservoirpy'

### Batch Processing
Addestra il modello su piccole porzioni migliorando il carico sulla memoria e la velocità

In [None]:
import numpy as np
from pyod.models.xgbod import XGBOD
from pyod.models.knn import KNN
from pyod.models.lof import LOF
from pyod.models.abod import ABOD
from pyod.models.ocsvm import OCSVM

# Dividi il dataset in batch
n_batches = 10  # Specifica il numero di batch che vuoi
X_train_batches = np.array_split(X_train_scaled, n_batches)
y_train_batches = np.array_split(y_train, n_batches)

# Definizione dei modelli unsupervised
unsupervised_models = [ KNN(),
                       LOF(),
                       ABOD(),
                        OCSVM()
                    ]

# Inizializza i modelli per ciascun batch
models = []
for X_batch, y_batch in zip(X_train_batches, y_train_batches):
    # Inizializza e addestra il modello
    model = XGBOD(estimator_list=unsupervised_models,
                  n_estimators=100,
                  max_depth=3,
                  learning_rate=0.2,
                  n_jobs=-1,
                  random_state=SEED
                )
    model.fit(X_batch, y_batch)
    models.append(model)

# Prevedi gli outlier nel dataset di test e combinalo
y_pred_scores = np.zeros_like(X_test_scaled[:, 0], dtype=float)
for model in models:
    y_pred_scores += model.decision_function(X_test_scaled)

# Media dei punteggi di decisione
y_pred_scores /= n_batches
y_pred = (y_pred_scores > np.mean(y_pred_scores)).astype(int)

# Esegui la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_pred_scores)
print("")
print(metrics)


  return bound(*args, **kwds)
Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.

Parameters: { "silent" } are not used.




{'Accuracy': 0.749, 'Precision': 0.455, 'Recall': 0.885, 'F1': 0.601, 'MCC': 0.496, 'AUC_PR': 0.878, 'AUC_ROC': 0.918, 'PREC_N_SCORES': 0.796}


#### Cross Validation

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.feature_selection import SelectKBest, f_classif
from pyod.models.xgbod import XGBOD
from sklearn.model_selection import cross_val_score
import numpy as np

# Preprocessing and model pipeline
preprocessor = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('feature_selection', SelectKBest(score_func=f_classif, k=10)),
    ('classifier', XGBOD(n_estimators=50, max_depth=3, learning_rate=0.1))
])

# Cross-validation with pipeline
scores = cross_val_score(pipeline, X_train_scaled, y_train, cv=5, scoring='roc_auc', n_jobs=-1)
print(f"Cross-validation scores: {scores}")
print(f"Mean ROC AUC score: {np.mean(scores)}")

# Train and evaluate model
pipeline.fit(X_train_scaled, y_train)
y_pred = pipeline.predict(X_test_scaled)
y_predicted_score = pipeline.decision_function(X_test_scaled)

metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)
print(pipeline.named_steps['classifier'], metrics)


Cross-validation scores: [nan nan nan nan nan]
Mean ROC AUC score: nan


Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=1, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=1, no...ax_features=1.0,
    max_samples='auto', n_estimators=200, n_jobs=1, random_state=0,
    verbose=0)],
   gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=50, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, Fa

### XGBOD con ricerca iperparametri con "grid"

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from pyod.models.xgbod import XGBOD
import numpy as np

# Definizione della griglia di parametri
param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5],
    'learning_rate': [0.01, 0.1]
}

# Inizializza il modello
model = XGBOD()

# Randomized search con meno iterazioni e parallelizzazione
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=10, cv=3, scoring='roc_auc', random_state=42, n_jobs=-1)
random_search.fit(X_train_scaled, y_train)

# Migliori parametri trovati
best_params = random_search.best_params_
print(f"Best parameters found: {best_params}")

# Riaddestramento del modello con i migliori parametri
model = XGBOD(**best_params)
model.fit(X_train_scaled, y_train)

# Prevedi gli outlier nel dataset di test
y_pred = model.predict(X_test_scaled)
y_predicted_score = model.decision_function(X_test_scaled)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)


Parameters: { "silent" } are not used.



Best parameters found: {'n_estimators': 50, 'max_depth': 3, 'learning_rate': 0.01}


Parameters: { "silent" } are not used.



XGBOD(base_score=0.5, booster='gbtree', colsample_bylevel=1,
   colsample_bytree=1,
   estimator_list=[KNN(algorithm='auto', contamination=0.1, leaf_size=30, method='largest',
  metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=1, p=2,
  radius=1.0), LOF(algorithm='auto', contamination=0.1, leaf_size=30, metric='minkowski',
  metric_params=None, n_jobs=1, n_neighbors=1, no...ax_features=1.0,
    max_samples='auto', n_estimators=200, n_jobs=1, random_state=0,
    verbose=0)],
   gamma=0, learning_rate=0.01, max_delta_step=0, max_depth=3,
   min_child_weight=1, n_estimators=50, n_jobs=1, nthread=None,
   objective='binary:logistic', random_state=0, reg_alpha=0, reg_lambda=1,
   scale_pos_weight=1, silent=True,
   standardization_flag_list=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, False, False, False, True, True, True, True, True, True, True, True, True, True, True, False, False, False, False, F

### FCNN

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt

# Definisci il modello FCNN
model = Sequential([
    Conv1D(64, 3, activation='relu', input_shape=(X_train_scaled.shape[1], 1)),
    MaxPooling1D(2),
    Conv1D(128, 3, activation='relu'),
    MaxPooling1D(2),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Poiché si tratta di una classificazione binaria
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Addestra il modello
model.fit(X_train_scaled, y_train, epochs=10, batch_size=32, validation_data=(X_test_scaled, y_test))

# Prevedi gli outlier nel dataset di test
y_pred = (model.predict(X_test_scaled) > 0.5).astype("int32")
y_predicted_score = model.predict(X_test_scaled)

metrics = evaluate_metrics(y_test, y_pred, y_predicted_score)

# Stampa i risultati
print(model, metrics)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 14ms/step - accuracy: 0.8006 - loss: 0.4877 - val_accuracy: 0.8885 - val_loss: 0.2546
Epoch 2/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9154 - loss: 0.2390 - val_accuracy: 0.9244 - val_loss: 0.1969
Epoch 3/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.9334 - loss: 0.1862 - val_accuracy: 0.9168 - val_loss: 0.1949
Epoch 4/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9408 - loss: 0.1831 - val_accuracy: 0.9452 - val_loss: 0.1793
Epoch 5/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.9474 - loss: 0.1629 - val_accuracy: 0.9471 - val_loss: 0.1570
Epoch 6/10
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.9424 - loss: 0.1595 - val_accuracy: 0.9546 - val_loss: 0.1572
Epoch 7/10
[1m50/50[0m [32m━━━━━━━━━

# Rocket

In [None]:
from sktime.transformations.panel.rocket import Rocket
from pyod.models.xgbod import XGBOD
import numpy as np


# 2. Applica ROCKET
rocket = Rocket(num_kernels=10000)
rocket.fit(X_train_scaled, y_train)
features = rocket.transform(X_train_scaled)
# Verifica che il numero di campioni di features e y_test sia lo stesso
assert features.shape[0] == y_test.shape[0], "Il numero di campioni non corrisponde!"


# 3. Rilevamento delle anomalie
model = XGBOD(contamination=0.01, random_state=42)  # Modello non supervisionato
anomaly_scores = model.fit_predict(features, y_test)


ModuleNotFoundError: No module named 'sktime'

## Rocket Normale

In [None]:
import numpy as np
import pandas as pd
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score

# Funzioni già definite in precedenza
def generate_kernels(input_length, num_kernels):
    candidate_lengths = np.array((7, 9, 11), dtype=np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype=np.float64)
    biases = np.zeros(num_kernels, dtype=np.float64)
    dilations = np.zeros(num_kernels, dtype=np.int32)
    paddings = np.zeros(num_kernels, dtype=np.int32)

    a1 = 0
    for i in range(num_kernels):
        _length = lengths[i]
        _weights = np.random.normal(0, 1, _length)
        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()
        biases[i] = np.random.uniform(-1, 1)
        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation
        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding
        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath=True)
def apply_kernel(X, weights, length, bias, dilation, padding):
    input_length = len(X)
    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)
    _ppv = 0
    _max = np.NINF
    _mean_sum = 0  # Per calcolare la media
    end = (input_length + padding) - ((length - 1) * dilation)
    for i in range(-padding, end):
        _sum = bias
        index = i
        for j in range(length):
            if index > -1 and index < input_length:
                _sum += weights[j] * X[index]
            index += dilation
        _mean_sum += _sum  # Aggiungi al totale per la media
        if _sum > _max:
            _max = _sum
        if _sum > 0:
            _ppv += 1
    mean_response = _mean_sum / output_length  # Calcola la media
    return _ppv / output_length, _max, mean_response

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel=True, fastmath=True)
def apply_kernels(X, kernels):
    weights, lengths, biases, dilations, paddings = kernels
    num_examples, _ = X.shape
    num_kernels = len(lengths)
    _X = np.zeros((num_examples, num_kernels * 3), dtype=np.float64)  # 3 features per kernel
    for i in prange(num_examples):
        a1 = 0  # Per i pesi
        a2 = 0  # Per le caratteristiche
        for j in range(num_kernels):
            b1 = a1 + lengths[j]
            b2 = a2 + 3
            _X[i, a2:b2] = apply_kernel(
                X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j]
            )
            a1 = b1
            a2 = b2
    return _X

def detect_anomalies_with_threshold(scores, threshold):
    return (scores > threshold).astype(int)

# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 10000
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train2, kernels)
features_test = apply_kernels(X_test2, kernels)

# Sintesi delle caratteristiche per esempio
anomaly_scores_train = np.mean(features_train, axis=1)  # Media
anomaly_scores_test = np.mean(features_test, axis=1)  # Media

# Rilevamento delle anomalie
threshold = np.percentile(anomaly_scores_train , 95)
anomaly_labels_train = detect_anomalies_with_threshold(anomaly_scores_train , threshold)
anomaly_labels_test = detect_anomalies_with_threshold(anomaly_scores_test , threshold)

# Visualizzazione dei risultati
print("Anomalie rilevate nel training set:", anomaly_labels_train)
print("Anomalie rilevate nel test set:", anomaly_labels_test)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, anomaly_labels_test, y_proba=anomaly_scores_test)
print("Metriche di valutazione sul test set:\n", metrics)
# {'Accuracy': 0.832, 'Precision': 0.962, 'Recall': 0.221, 'F1': 0.36, 'MCC': 0.415, 'AUC_PR': 0.726, 'AUC_ROC': 0.772, 'PREC_N_SCORES': 0.646}

Anomalie rilevate nel training set: [1 0 0 ... 0 0 0]
Anomalie rilevate nel test set: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0
 0 0 0 0 0 0 1

# Rockad

In [None]:
import numpy as np
import pandas as pd

from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import PowerTransformer
from sklearn.utils import resample
from sktime.transformations.panel.rocket import Rocket
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import distance_metrics


class NearestNeighborOCC():
    
    def __init__(self, dist="euclidean"):
        self.scores_train = None
        self.dist = None
        
        metrics = distance_metrics()
        
        if type(dist) is str and dist in metrics.keys():
            self.dist = metrics[dist]
        elif dist in metrics.values():
            self.dist = dist
        elif False:
            # TODO: allow time series distance measures such as DTW or Matrix Profile
            pass
        else:
            raise Exception("Distance metric not supported.")
    
    
    def fit(self, scores_train):
        _scores_train = scores_train
        
        if type(_scores_train) is not np.array:
            _scores_train = np.array(scores_train.copy())
        
        if len(_scores_train.shape) == 1:
            _scores_train = _scores_train.reshape(-1, 1)
        
        self.scores_train = _scores_train
        
        return self
    
        
    def predict(self, scores_test):
        """
        Per definition (see [1]): 0 indicates an anomaly, 1 indicates normal.
        Here : -1 indicates an anomaly, 1 indicates normal. 
        """
        
        predictions = []
        for score in scores_test:
            predictions.append(self.predict_score(score))
        return np.array(predictions)
    
    
    def predict_score(self, anomaly_score):
        prediction = None
        
        anomaly_score_arr = np.array([anomaly_score for i in range(len(self.scores_train))])
        
        _scores_train = self.scores_train.copy().reshape(-1, 1)
        anomaly_score_arr = anomaly_score_arr.reshape(-1, 1)
        nearest_neighbor_idx = np.argmin(self.dist(anomaly_score_arr, _scores_train))
        
        _scores_train = np.delete(_scores_train, nearest_neighbor_idx).reshape(-1, 1)
        
        nearest_neighbor_score = self.scores_train[nearest_neighbor_idx]
        neares_neighbot_score_arr = np.array([nearest_neighbor_score for i in range(len(_scores_train))])
        nearest_neighbor_score_arr = neares_neighbot_score_arr.reshape(-1, 1)
        
        nearest_nearest_neighbor_idx = np.argmin(self.dist(nearest_neighbor_score_arr, _scores_train))
        nearest_nearest_neighbor_score = _scores_train[nearest_nearest_neighbor_idx][0]
        
        prediction = self.indicator_function(
            anomaly_score, nearest_neighbor_score, nearest_nearest_neighbor_score)
        
        return prediction
    
    
    def indicator_function(self, z_score, nearest_score, nearest_of_nearest_score):
        
        # make it an array and reshape it to calculate the distance
        z_score_arr = np.array(z_score).reshape(1, -1)
        nearest_score_arr = np.array(nearest_score).reshape(1, -1)
        nearest_of_nearest_score_arr = np.array(nearest_of_nearest_score).reshape(1, -1)
        
        numerator = self.dist(z_score_arr, nearest_score_arr)
        denominator = self.dist(nearest_score_arr, nearest_of_nearest_score_arr)
        
        # error handling for corner cases
        if numerator == 0:
            return 1
        elif denominator == 0:
            return -1
        else:
            return 1 if (numerator/denominator) <= 1 else -1

class NN:
    
    def __init__(self, 
            n_neighbors = 5, 
            n_jobs = 1,
            dist = 'euclidean',
            random_state=42, 
        ) -> None:
        
        self.n_neighbors = n_neighbors
        self.n_jobs = n_jobs
        self.dist = dist
        self.random_state = random_state


    def fit(self, X):
        self.nn = NearestNeighbors(
            n_neighbors = self.n_neighbors,
            n_jobs = self.n_jobs,
            metric = self.dist,
            algorithm = 'ball_tree',
            )
        
        self.nn.fit(X)


    def predict_proba(self, X, y=None):
        scores = self.nn.kneighbors(X)
        scores = scores[0].mean(axis=1).reshape(-1,1)
        
        return scores
    

class ROCKAD():
    
    def __init__(self,
            n_estimators=10,
            n_kernels = 100,
            n_neighbors = 5,
            n_jobs = 1,
            power_transform = True,
            random_state = 42,
        ) -> None:
        self.random_state = random_state
        self.power_transform = power_transform
        
        self.n_estimators = n_estimators
        self.n_kernels = n_kernels
        self.n_neighbors = n_neighbors
        self.n_jobs = n_jobs
        self.n_inf_cols = []
        
        self.estimator = NN
        self.rocket_transformer = Rocket(num_kernels = self.n_kernels, n_jobs = self.n_jobs, random_state = self.random_state)
        self.scaler = StandardScaler()
        self.power_transformer = PowerTransformer(standardize = False)


    def init(self, X):
        
        # Fit Rocket & Transform into rocket feature space
        Xt = self.rocket_transformer.fit_transform(X)

        self.Xtp = None # X: values, t: (rocket) transformed, p: power transformed
        
        if self.power_transform is True:
            
            Xtp = self.power_transformer.fit_transform(Xt)
            
            self.Xtp = pd.DataFrame(Xtp)
            
        else:
            self.Xtp = pd.DataFrame(Xt)


    def fit_estimators(self):
        
        Xtp_scaled = None
        
        if self.power_transform is True:
            # Check for infinite columns and get indices
            self._check_inf_values(self.Xtp)
            
            # Remove infinite columns
            self.Xtp = self.Xtp[self.Xtp.columns[~self.Xtp.columns.isin(self.n_inf_cols)]]
            
            # Fit Scaler
            Xtp_scaled = self.scaler.fit_transform(self.Xtp)
            
            Xtp_scaled = pd.DataFrame(Xtp_scaled, columns = self.Xtp.columns)
            
            self._check_inf_values(Xtp_scaled)
            
            Xtp_scaled = Xtp_scaled.astype(np.float32).to_numpy()
            
        else:
            Xtp_scaled = self.Xtp.astype(np.float32).to_numpy()
            
        
        self.list_baggers = []
        
        for idx_estimator in range(self.n_estimators):
            # Initialize estimator
            estimator = self.estimator(
                n_neighbors = self.n_neighbors,
                n_jobs = self.n_jobs,
            )
            
            # Bootstrap Aggregation
            Xtp_scaled_sample = resample(
                Xtp_scaled,
                replace = True,
                n_samples = None,
                random_state = self.random_state + idx_estimator,
                stratify = None,
            )

            # Fit estimator and append to estimator list
            estimator.fit(Xtp_scaled_sample)
            self.list_baggers.append(estimator)


    def fit(self, X):
        self.init(X)
        self.fit_estimators()
        
        return self
    
    
    def predict_proba(self, X):
        y_scores = np.zeros((len(X), self.n_estimators))
        
        # Transform into rocket feature space
        Xt = self.rocket_transformer.transform(X)
        
        Xtp_scaled = None
        
        if self.power_transform == True:
            # Power Transform using yeo-johnson
            Xtp = self.power_transformer.transform(Xt)
            Xtp = pd.DataFrame(Xtp)
            
            # Check for infinite columns and remove them
            self._check_inf_values(Xtp)
            Xtp = Xtp[Xtp.columns[~Xtp.columns.isin(self.n_inf_cols)]]
            Xtp_temp = Xtp.copy()
            
            # Scale the data
            Xtp_scaled = self.scaler.transform(Xtp_temp)
            Xtp_scaled = pd.DataFrame(Xtp_scaled, columns = Xtp_temp.columns)
            
            # Check for infinite columns and remove them
            self._check_inf_values(Xtp_scaled)
            Xtp_scaled = Xtp_scaled[Xtp_scaled.columns[~Xtp_scaled.columns.isin(self.n_inf_cols)]]
            Xtp_scaled = Xtp_scaled.astype(np.float32).to_numpy() 
        
        else:
            Xtp_scaled = Xt.astype(np.float32)
        
        
        for idx, bagger in enumerate(self.list_baggers):
            # Get scores from each estimator
            scores = bagger.predict_proba(Xtp_scaled).squeeze()
            
            y_scores[:, idx] = scores
            
        # Average the scores to get the final score for each time series
        y_scores = y_scores.mean(axis=1)
        
        return y_scores
    
    
    def _check_inf_values(self, X):
        if np.isinf(X[X.columns[~X.columns.isin(self.n_inf_cols)]]).any(axis=0).any() : 
            self.n_inf_cols.extend(X.columns.to_series()[np.isinf(X).any()])
            self.fit_estimators()
            return True
    
# Create the normal dataset (Normal class: Class 1)  
#        the anomaly dataset (Anomaly class: Class 2)

RANDOM_STATE = 42
# Initialize and fit ROCKAD

# X_train_array = np.array([x.to_numpy().flatten() for x in X_train.iloc[:, 0]])
# X_test_array = np.array([x.to_numpy().flatten() for x in X_test.iloc[:, 0]])

# Create the normal dataset (Normal class: Class 1)  
#        the anomaly dataset (Anomaly class: Class 2)
X_normal_train = X_train[y_train == '1'].dropna()
X_normal_test = X_test[y_test == '1'].dropna()
X_anomaly_test = X_test[y_test == '2'].dropna()

y_normal_test = y_test[y_test == '1']
y_anomaly_test = y_test[y_test == '2']

X_test = np.concatenate((X_normal_test, X_anomaly_test), axis=0)
y_test = np.concatenate((y_normal_test, y_anomaly_test), axis=0)

rockad = ROCKAD(random_state=RANDOM_STATE)
rockad.fit(X_normal_train) 

# Predict anomaly scores
scores = rockad.predict_proba(X_test)

print("Score: ",scores)

# Initialize and fit NearestNeigbor One Class Classifier
decision_func = NearestNeighborOCC().fit(scores)

# Predict anomalies
predictions = decision_func.predict(scores)

  X = (X - X.mean(axis=-1, keepdims=True)) / (
  ret = um.true_divide(
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = um.true_divide(


## Rilevamento di anomalie SUPERVISED

In [None]:
import numpy as np
import pandas as pd
from pyod.models.xgbod import XGBOD
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score

@njit("Tuple((float64[:],int32[:],float64[:],int32[:],int32[:]))(int64,int64)")
def generate_kernels(input_length, num_kernels):

    candidate_lengths = np.array((7, 9, 11), dtype = np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype = np.float64)
    biases = np.zeros(num_kernels, dtype = np.float64)
    dilations = np.zeros(num_kernels, dtype = np.int32)
    paddings = np.zeros(num_kernels, dtype = np.int32)

    a1 = 0

    for i in range(num_kernels):

        _length = lengths[i]

        _weights = np.random.normal(0, 1, _length)

        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()

        biases[i] = np.random.uniform(-1, 1)

        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation

        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding

        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding):

    input_length = len(X)

    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (input_length + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < input_length:

                _sum = _sum + weights[j] * X[index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return _ppv / output_length, _max

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel = True, fastmath = True)
def apply_kernels(X, kernels):

    weights, lengths, biases, dilations, paddings = kernels

    num_examples, _ = X.shape
    num_kernels = len(lengths)

    _X = np.zeros((num_examples, num_kernels * 2), dtype = np.float64) # 2 features per kernel

    for i in prange(num_examples):

        a1 = 0 # for weights
        a2 = 0 # for features

        for j in range(num_kernels):

            b1 = a1 + lengths[j]
            b2 = a2 + 2

            _X[i, a2:b2] = \
            apply_kernel(X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j])

            a1 = b1
            a2 = b2

    return _X
    weights, lengths, biases, dilations, paddings = kernels
    num_examples, _ = X.shape
    num_kernels = len(lengths)
    _X = np.zeros((num_examples, num_kernels * 3), dtype=np.float64)  # 3 features per kernel
    for i in prange(num_examples):
        a1 = 0  # Per i pesi
        a2 = 0  # Per le caratteristiche
        for j in range(num_kernels):
            b1 = a1 + lengths[j]
            b2 = a2 + 3
            _X[i, a2:b2] = apply_kernel(
                X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j]
            )
            a1 = b1
            a2 = b2
    return _X

# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 1000 # Valore standard
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train2, kernels)
features_test = apply_kernels(X_test2, kernels)

# Addestramento del modello supervisionato
model = XGBOD(n_estimators=100, max_depth=3, learning_rate=0.1, random_state=SEED)
model.fit(features_train, y_train)

# Predizione delle anomalie nei dati di test
y_pred = model.predict(features_test)
y_proba = model.predict_proba(features_test)

# Visualizzazione dei risultati
print("Predizioni nel test set:", y_pred)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_proba=y_proba)
print("Metriche di valutazione:\n", metrics)

# Scaled -> {'Accuracy': 0.6, 'Precision': 0.7, 'Recall': 0.583, 'F1': 0.636, 'MCC': 0.204, 'AUC_PR': 0.632, 'AUC_ROC': 0.542}
# Non Scaled -> {'Accuracy': 0.7, 'Precision': 0.75, 'Recall': 0.75, 'F1': 0.75, 'MCC': 0.375, 'AUC_PR': 0.712, 'AUC_ROC': 0.656}



KeyboardInterrupt: 

KNN

In [None]:
import numpy as np
import pandas as pd
from pyod.models.iforest import IsolationForest
from pyod.models.knn import KNN
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score

@njit("Tuple((float64[:],int32[:],float64[:],int32[:],int32[:]))(int64,int64)")
def generate_kernels(input_length, num_kernels):

    candidate_lengths = np.array((7, 9, 11), dtype = np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype = np.float64)
    biases = np.zeros(num_kernels, dtype = np.float64)
    dilations = np.zeros(num_kernels, dtype = np.int32)
    paddings = np.zeros(num_kernels, dtype = np.int32)

    a1 = 0

    for i in range(num_kernels):

        _length = lengths[i]

        _weights = np.random.normal(0, 1, _length)

        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()

        biases[i] = np.random.uniform(-1, 1)

        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation

        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding

        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding):

    input_length = len(X)

    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (input_length + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < input_length:

                _sum = _sum + weights[j] * X[index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return _ppv / output_length, _max

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel = True, fastmath = True)
def apply_kernels(X, kernels):

    weights, lengths, biases, dilations, paddings = kernels

    num_examples, _ = X.shape
    num_kernels = len(lengths)

    _X = np.zeros((num_examples, num_kernels * 2), dtype = np.float64) # 2 features per kernel

    for i in prange(num_examples):

        a1 = 0 # for weights
        a2 = 0 # for features

        for j in range(num_kernels):

            b1 = a1 + lengths[j]
            b2 = a2 + 2

            _X[i, a2:b2] = \
            apply_kernel(X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j])

            a1 = b1
            a2 = b2

    return _X

# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 1000
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train2, kernels)
features_test = apply_kernels(X_test2, kernels)


# Addestramento del modello supervisionato
model = KNN()
model.fit(features_train)

# Predizione delle anomalie nei dati di test
y_pred = model.predict(features_test)
y_proba = model.decision_function(features_test)

# Visualizzazione dei risultati
print("Predizioni nel test set:", y_pred)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_proba)
print("Metriche di valutazione:\n", metrics)
# {'Accuracy': 0.845, 'Precision': 0.763, 'Recall': 0.398, 'F1': 0.523, 'MCC': 0.475, 'AUC_PR': 0.619, 'AUC_ROC': 0.811, 'PREC_N_SCORES': 0.54}

Predizioni nel test set: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 1 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1
 0 1 0 1 1 0 1 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0
 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1
 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


### Regressione Logistica -> Classificatore lineare

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from pyod.models.knn import KNN
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score

@njit("Tuple((float64[:],int32[:],float64[:],int32[:],int32[:]))(int64,int64)")
def generate_kernels(input_length, num_kernels):

    candidate_lengths = np.array((7, 9, 11), dtype = np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype = np.float64)
    biases = np.zeros(num_kernels, dtype = np.float64)
    dilations = np.zeros(num_kernels, dtype = np.int32)
    paddings = np.zeros(num_kernels, dtype = np.int32)

    a1 = 0

    for i in range(num_kernels):

        _length = lengths[i]

        _weights = np.random.normal(0, 1, _length)

        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()

        biases[i] = np.random.uniform(-1, 1)

        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation

        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding

        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding):

    input_length = len(X)

    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (input_length + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < input_length:

                _sum = _sum + weights[j] * X[index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return _ppv / output_length, _max

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel = True, fastmath = True)
def apply_kernels(X, kernels):

    weights, lengths, biases, dilations, paddings = kernels

    num_examples, _ = X.shape
    num_kernels = len(lengths)

    _X = np.zeros((num_examples, num_kernels * 2), dtype = np.float64) # 2 features per kernel

    for i in prange(num_examples):

        a1 = 0 # for weights
        a2 = 0 # for features

        for j in range(num_kernels):

            b1 = a1 + lengths[j]
            b2 = a2 + 2

            _X[i, a2:b2] = \
            apply_kernel(X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j])

            a1 = b1
            a2 = b2

    return _X

# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 1000
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train2, kernels)
features_test = apply_kernels(X_test2, kernels)


# Addestramento del modello supervisionato
model = LogisticRegression(max_iter=1000)
model.fit(features_train, y_train)

# Predizione delle anomalie nei dati di test
y_pred = model.predict(features_test)
y_proba = model.decision_function(features_test)

# Visualizzazione dei risultati
print("Predizioni nel test set:", y_pred)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_proba)
print("Metriche di valutazione:\n", metrics)
# {'Accuracy': 0.977, 'Precision': 0.972, 'Recall': 0.92, 'F1': 0.945, 'MCC': 0.932, 'AUC_PR': 0.962, 'AUC_ROC': 0.984, 'PREC_N_SCORES': 0.929}

Predizioni nel test set: [0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 1
 0 0 1 0 0 1 1 0 1 1 1 0 1 0 0 0 0 0 0 1 1 1 1 1 1 0 1 0 1 1 1 0 1 1 1 1 1
 0 1 0 1 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 1 0 0 1 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 1 0 1 1 0 0 0
 0 0 1 0 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0
 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0
 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 1 1 1 0 0 0 0 0 0
 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


### Prova con Dettagli dal GitHub del Paper

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score

@njit("Tuple((float64[:],int32[:],float64[:],int32[:],int32[:]))(int64,int64)")
def generate_kernels(input_length, num_kernels):

    candidate_lengths = np.array((7, 9, 11), dtype = np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype = np.float64)
    biases = np.zeros(num_kernels, dtype = np.float64)
    dilations = np.zeros(num_kernels, dtype = np.int32)
    paddings = np.zeros(num_kernels, dtype = np.int32)

    a1 = 0

    for i in range(num_kernels):

        _length = lengths[i]

        _weights = np.random.normal(0, 1, _length)

        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()

        biases[i] = np.random.uniform(-1, 1)

        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation

        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding

        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding):

    input_length = len(X)

    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (input_length + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < input_length:

                _sum = _sum + weights[j] * X[index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return _ppv / output_length, _max

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel = True, fastmath = True)
def apply_kernels(X, kernels):

    weights, lengths, biases, dilations, paddings = kernels

    num_examples, _ = X.shape
    num_kernels = len(lengths)

    _X = np.zeros((num_examples, num_kernels * 2), dtype = np.float64) # 2 features per kernel

    for i in prange(num_examples):

        a1 = 0 # for weights
        a2 = 0 # for features

        for j in range(num_kernels):

            b1 = a1 + lengths[j]
            b2 = a2 + 2

            _X[i, a2:b2] = \
            apply_kernel(X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j])

            a1 = b1
            a2 = b2

    return _X

def detect_anomalies_with_threshold(scores, threshold):
    return (scores > threshold).astype(int)


# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 1000
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train2, kernels)
features_test = apply_kernels(X_test2, kernels)


# Addestramento del modello supervisionato
model = Ridge(alpha=1.0)
model.fit(features_train, y_train)

# Predizione delle anomalie nei dati di test
anomaly_scores_test = model.predict(features_test)
anomaly_scores_train = model.predict(features_train)

# Rilevamento delle anomalie
threshold = np.percentile(anomaly_scores_train , 95)
anomaly_labels_train = detect_anomalies_with_threshold(anomaly_scores_train , threshold)
anomaly_labels_test = detect_anomalies_with_threshold(anomaly_scores_test , threshold)

# Visualizzazione dei risultati
print("Predizioni nel test set:", anomaly_labels_test)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, anomaly_labels_test, y_proba=anomaly_scores_test)
print("Metriche di valutazione:\n", metrics)
#  {'Accuracy': 0.888, 'Precision': 0.966, 'Recall': 0.496, 'F1': 0.655, 'MCC': 0.644, 'AUC_PR': 0.922, 'AUC_ROC': 0.962, 'PREC_N_SCORES': 0.912}

Predizioni nel test set: [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 1 1 0 0 0 1 1 0
 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


## LogisticClassifierCV

In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import RidgeClassifierCV
from numba import njit, prange
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, matthews_corrcoef, average_precision_score, roc_auc_score
from pyod.utils.data import precision_n_scores
from scipy.special import softmax

@njit("Tuple((float64[:],int32[:],float64[:],int32[:],int32[:]))(int64,int64)")
def generate_kernels(input_length, num_kernels):

    candidate_lengths = np.array((7, 9, 11), dtype = np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels)

    weights = np.zeros(lengths.sum(), dtype = np.float64)
    biases = np.zeros(num_kernels, dtype = np.float64)
    dilations = np.zeros(num_kernels, dtype = np.int32)
    paddings = np.zeros(num_kernels, dtype = np.int32)

    a1 = 0

    for i in range(num_kernels):

        _length = lengths[i]

        _weights = np.random.normal(0, 1, _length)

        b1 = a1 + _length
        weights[a1:b1] = _weights - _weights.mean()

        biases[i] = np.random.uniform(-1, 1)

        dilation = 2 ** np.random.uniform(0, np.log2((input_length - 1) / (_length - 1)))
        dilation = np.int32(dilation)
        dilations[i] = dilation

        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding

        a1 = b1

    return weights, lengths, biases, dilations, paddings

@njit(fastmath = True)
def apply_kernel(X, weights, length, bias, dilation, padding):

    input_length = len(X)

    output_length = (input_length + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (input_length + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < input_length:

                _sum = _sum + weights[j] * X[index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return _ppv / output_length, _max

@njit("float64[:,:](float64[:,:],Tuple((float64[::1],int32[:],float64[:],int32[:],int32[:])))", parallel = True, fastmath = True)
def apply_kernels(X, kernels):

    weights, lengths, biases, dilations, paddings = kernels

    num_examples, _ = X.shape
    num_kernels = len(lengths)

    _X = np.zeros((num_examples, num_kernels * 2), dtype = np.float64) # 2 features per kernel

    for i in prange(num_examples):

        a1 = 0 # for weights
        a2 = 0 # for features

        for j in range(num_kernels):

            b1 = a1 + lengths[j]
            b2 = a2 + 2

            _X[i, a2:b2] = \
            apply_kernel(X[i], weights[a1:b1], lengths[j], biases[j], dilations[j], paddings[j])

            a1 = b1
            a2 = b2

    return _X


# Genera kernel convoluzionali casuali
input_length = X_train.shape[1]
num_kernels = 1000
kernels = generate_kernels(input_length, num_kernels)

# Applica i kernel alle serie temporali
features_train = apply_kernels(X_train2, kernels)
features_test = apply_kernels(X_test2, kernels)


# Addestramento del modello supervisionato
model = RidgeClassifierCV(alphas = np.logspace(-3, 3, 10))
model.fit(features_train, y_train)

# Predizione delle anomalie nei dati di test
y_pred = model.predict(features_test)

if  len(np.unique(y_test)) > 2:
    y_proba = softmax(model.decision_function(features_test), axis=1)
else:
    y_proba = softmax(model.decision_function(features_test), axis=0)

# Visualizzazione dei risultati
print("Predizioni nel test set:", y_pred)

# Eseguiamo la valutazione delle metriche
metrics = evaluate_metrics(y_test, y_pred, y_proba)
print("Metriche di valutazione:\n", metrics)
# {'Accuracy': 0.977, 'Precision': 0.972, 'Recall': 0.92, 'F1': 0.945, 'MCC': 0.932, 'AUC_PR': 0.962, 'AUC_ROC': 0.984, 'PREC_N_SCORES': 0.929}

Predizioni nel test set: [0 0 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 1 0 1 0 0 1
 0 0 1 0 0 1 1 0 1 1 1 0 1 0 0 0 0 0 0 1 1 1 1 0 0 0 1 0 1 1 1 0 1 1 1 1 1
 0 1 0 1 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 1 0 0 1 1 0 0 0 0 1 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 1 1 0 1 1 0 1 1 0 0 0
 0 0 1 0 1 0 0 1 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0
 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 1 0 0 0 0 0 0 0 1 0 1 0 0 0 1 0
 0 0 1 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 1 1 1 1 1 0 0 0 0 0 0
 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
