<a href="https://colab.research.google.com/github/RickyF404/Tesi/blob/main/Dataset_inclinometri.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import LocalOutlierFactor
from sklearn.svm import OneClassSVM

import numpy as np
import matplotlib.pyplot as plt
import gdown

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Layer

In [None]:
url_inc = "https://drive.google.com/uc?id=1jZZ7Oje34Rj_H0_R7SkHntEhu2KZv_It"
output_inc = "data_inc.csv"
gdown.download(url_inc, output_inc)

Downloading...
From: https://drive.google.com/uc?id=1jZZ7Oje34Rj_H0_R7SkHntEhu2KZv_It
To: /content/data_inc.csv
100%|██████████| 2.59M/2.59M [00:00<00:00, 74.7MB/s]


'data_inc.csv'

In [None]:
df_inc = pd.read_csv("/content/data_inc.csv", encoding = "utf-8")

mapping = {
    "I_P01_01_C_X": "Mode 1",
    "I_P01_01_C_Y": "Mode 2",
    "I_P02_01_C_X": "Mode 3",
    "I_P02_01_C_Y": "Mode 4",
    "I_P03_01_C_X": "Mode 5",
    "I_P03_01_C_Y": "Mode 6",
}

df_inc = df_inc.rename(columns=mapping)

df_inc["timestamp"] = pd.to_datetime(df_inc["timestamp"])
df_inc["timestamp"] = df_inc["timestamp"].dt.tz_localize(None)
df_inc = df_inc.sort_values("timestamp")

percentuale_nan = (df_inc.isna().sum() / len(df_inc)) * 100

print("Percentuale di NaN per colonna:")
print(percentuale_nan)

df_inc = df_inc.set_index('timestamp')

# interpolazione temporale
df_inc = df_inc.interpolate(method='time')

df_inc = df_inc.reset_index()

end = "2025-01-01"
df = df_inc[(df_inc["timestamp"] < end)]

Percentuale di NaN per colonna:
Unnamed: 0    0.000000
timestamp     0.000000
Mode 1        1.021038
Mode 2        1.025155
Mode 3        0.065873
Mode 4        0.065873
Mode 5        0.374655
Mode 6        0.374655
dtype: float64


In [None]:
def create_fake_month(df, frac_per_month, random_state):
  rng = np.random.default_rng(seed=random_state) # generatore casuale con seed
  df["day"] = df["timestamp"].dt.floor("D")
  df["year_month"] = df["timestamp"].dt.to_period("M")
  train_indices = []

  for ym, group in df.groupby("year_month"):
    days = group["day"].unique()
    n_take = max(1, int(len(days) * frac_per_month))
    sampled_days = rng.choice(days, size=n_take, replace=False)
    sel = group[group["day"].isin(sampled_days)].index.tolist()
    train_indices += sel

  train_df = df.loc[train_indices].copy()
  valid_df = df.drop(index=train_indices).copy()
  train_df = train_df.drop(columns=["day", "year_month"])
  valid_df = valid_df.drop(columns=["day", "year_month"])
  train_df = train_df.sort_values("timestamp")
  valid_df = valid_df.sort_values("timestamp")

  return train_df, valid_df

# PCA

In [None]:
def anomaly_rate_PCA(train_df, valid_df, features):
  scaler = StandardScaler()
  # scaler = RobustScaler()
  X_train = scaler.fit_transform(train_df[features])
  X_val = scaler.transform(valid_df[features])

  pca = PCA(n_components = 0.95)
  X_train_pca = pca.fit_transform(X_train)
  X_train_reconstructed = pca.inverse_transform(X_train_pca)
  train_err = np.mean((X_train - X_train_reconstructed)**2, axis = 1)
  threshold = np.percentile(train_err, 98.5)

  X_val_pca = pca.transform(X_val)
  X_val_reconstructed = pca.inverse_transform(X_val_pca)
  val_err = np.mean((X_val - X_val_reconstructed)**2, axis = 1)
  anomaly_rate = np.mean(val_err > threshold)

  return anomaly_rate

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 2
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_df = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  valid_df = df[~df["timestamp"].isin(train_df["timestamp"])]

  anomaly = anomaly_rate_PCA(train_df, valid_df, features)

  train_months = sorted(train_df["timestamp"].dt.to_period("M").unique())

  results.append({"train_months": [str(x) for x in train_months] , "window_size": len(train_months), "anomaly rate": anomaly})

results_df_2m_PCA = pd.DataFrame(results)
print(results_df_2m_PCA)

          train_months  window_size  anomaly rate
0   [2023-01, 2023-02]            2      0.973144
1   [2023-02, 2023-03]            2      0.880097
2   [2023-03, 2023-04]            2      0.875854
3   [2023-04, 2023-05]            2      0.839577
4   [2023-05, 2023-06]            2      0.625249
5   [2023-06, 2023-07]            2      0.886005
6   [2023-07, 2023-08]            2      0.829417
7   [2023-08, 2023-09]            2      0.726599
8   [2023-09, 2023-10]            2      0.914651
9   [2023-10, 2023-11]            2      0.964980
10  [2023-11, 2023-12]            2      0.917455


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 4
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_df = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  valid_df = df[~df["timestamp"].isin(train_df["timestamp"])]

  anomaly = anomaly_rate_PCA(train_df, valid_df, features)

  train_months = sorted(train_df["timestamp"].dt.to_period("M").unique())

  results.append({"train_months": [str(x) for x in train_months] , "window_size": len(train_months), "anomaly rate": anomaly})

results_df_4m_PCA = pd.DataFrame(results)
print(results_df_4m_PCA)

                           train_months  window_size  anomaly rate
0  [2023-01, 2023-02, 2023-03, 2023-04]            4      0.979952
1  [2023-02, 2023-03, 2023-04, 2023-05]            4      0.921442
2  [2023-03, 2023-04, 2023-05, 2023-06]            4      0.889820
3  [2023-04, 2023-05, 2023-06, 2023-07]            4      0.802832
4  [2023-05, 2023-06, 2023-07, 2023-08]            4      0.573937
5  [2023-06, 2023-07, 2023-08, 2023-09]            4      0.645421
6  [2023-07, 2023-08, 2023-09, 2023-10]            4      0.879128
7  [2023-08, 2023-09, 2023-10, 2023-11]            4      0.925893
8  [2023-09, 2023-10, 2023-11, 2023-12]            4      0.847549


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 6
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_df = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  valid_df = df[~df["timestamp"].isin(train_df["timestamp"])]

  anomaly = anomaly_rate_PCA(train_df, valid_df, features)

  train_months = sorted(train_df["timestamp"].dt.to_period("M").unique())

  results.append({"train_months": [str(x) for x in train_months] ,  "window_size": len(train_months), "anomaly rate": anomaly})

results_df_6m_PCA = pd.DataFrame(results)
print(results_df_6m_PCA)

                                        train_months  window_size  \
0  [2023-01, 2023-02, 2023-03, 2023-04, 2023-05, ...            6   
1  [2023-02, 2023-03, 2023-04, 2023-05, 2023-06, ...            6   
2  [2023-03, 2023-04, 2023-05, 2023-06, 2023-07, ...            6   
3  [2023-04, 2023-05, 2023-06, 2023-07, 2023-08, ...            6   
4  [2023-05, 2023-06, 2023-07, 2023-08, 2023-09, ...            6   
5  [2023-06, 2023-07, 2023-08, 2023-09, 2023-10, ...            6   
6  [2023-07, 2023-08, 2023-09, 2023-10, 2023-11, ...            6   

   anomaly rate  
0      0.983343  
1      0.908405  
2      0.855312  
3      0.806559  
4      0.718390  
5      0.874790  
6      0.910440  


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_2m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.1, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

anomaly = anomaly_rate_PCA(train_df, valid_df, features)
results_2m_fake.append({"window_size": 2, "anomaly": anomaly})

results_df_2m_fake_PCA = pd.DataFrame(results_2m_fake)
print(results_df_2m_fake_PCA)

Giorni training: 86
Giorni validation: 833
   window_size   anomaly
0            2  0.027422


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_4m_fake = []

train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.2, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

anomaly = anomaly_rate_PCA(train_df, valid_df, features)
results_4m_fake.append({"window_size": 4, "anomaly": anomaly})

results_df_4m_fake_PCA = pd.DataFrame(results_4m_fake)
print(results_df_4m_fake_PCA)

Giorni training: 177
Giorni validation: 742
   window_size   anomaly
0            4  0.010725


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_6m_fake = []

train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.3, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

anomaly = anomaly_rate_PCA(train_df, valid_df, features)
results_6m_fake.append({"window_size": 6, "anomaly": anomaly})

results_df_6m_fake_PCA = pd.DataFrame(results_6m_fake)
print(results_df_6m_fake_PCA)

Giorni training: 268
Giorni validation: 651
   window_size   anomaly
0            6  0.008764


In [None]:
# salvataggio dei risultati PCA
# real_results_df_PCA = pd.concat([results_df_2m_PCA, results_df_4m_PCA, results_df_6m_PCA])
# real_results_df_PCA.to_csv("/content/drive/MyDrive/real_inclinometri_PCA.csv", index=False)

# fake_results_df_PCA = pd.concat([results_df_2m_fake_PCA, results_df_4m_fake_PCA, results_df_6m_fake_PCA])
# fake_results_df_PCA.to_csv("/content/drive/MyDrive/fake_inclinometri_PCA.csv", index=False)

# SVM

In [None]:
def anomaly_rate_SVM(train_df, valid_df, features):
  scaler = StandardScaler()
  # scaler = RobustScaler()
  X_train = scaler.fit_transform(train_df[features])
  X_val = scaler.transform(valid_df[features])

  ocsvm = OneClassSVM(kernel="rbf", nu=0.01, gamma=0.01)
  ocsvm.fit(X_train)

  preds = ocsvm.predict(X_val)
  anomaly_rate = np.mean(preds == -1)

  return anomaly_rate

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 2
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_df = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  valid_df = df[~df["timestamp"].isin(train_df["timestamp"])]

  anomaly = anomaly_rate_SVM(train_df, valid_df, features)

  train_months = sorted(train_df["timestamp"].dt.to_period("M").unique())

  results.append({"train_months": [str(x) for x in train_months] , "window_size": len(train_months), "anomaly rate": anomaly})

results_df_2m_SVM = pd.DataFrame(results)
print(results_df_2m_SVM)

          train_months  window_size  anomaly rate
0   [2023-01, 2023-02]            2      0.935520
1   [2023-02, 2023-03]            2      0.920559
2   [2023-03, 2023-04]            2      0.931890
3   [2023-04, 2023-05]            2      0.940693
4   [2023-05, 2023-06]            2      0.987146
5   [2023-06, 2023-07]            2      0.948760
6   [2023-07, 2023-08]            2      0.964297
7   [2023-08, 2023-09]            2      0.980950
8   [2023-09, 2023-10]            2      0.942378
9   [2023-10, 2023-11]            2      0.975728
10  [2023-11, 2023-12]            2      0.990483


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 4
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_df = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  valid_df = df[~df["timestamp"].isin(train_df["timestamp"])]

  anomaly = anomaly_rate_SVM(train_df, valid_df, features)

  train_months = sorted(train_df["timestamp"].dt.to_period("M").unique())

  results.append({"train_months": [str(x) for x in train_months] , "window_size": len(train_months), "anomaly rate": anomaly})

results_df_4m_SVM = pd.DataFrame(results)
print(results_df_4m_SVM)

                           train_months  window_size  anomaly rate
0  [2023-01, 2023-02, 2023-03, 2023-04]            4      0.993937
1  [2023-02, 2023-03, 2023-04, 2023-05]            4      0.945603
2  [2023-03, 2023-04, 2023-05, 2023-06]            4      0.896966
3  [2023-04, 2023-05, 2023-06, 2023-07]            4      0.831537
4  [2023-05, 2023-06, 2023-07, 2023-08]            4      0.968461
5  [2023-06, 2023-07, 2023-08, 2023-09]            4      0.988293
6  [2023-07, 2023-08, 2023-09, 2023-10]            4      0.931371
7  [2023-08, 2023-09, 2023-10, 2023-11]            4      0.918030
8  [2023-09, 2023-10, 2023-11, 2023-12]            4      0.995464


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 6
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_df = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  valid_df = df[~df["timestamp"].isin(train_df["timestamp"])]

  anomaly = anomaly_rate_SVM(train_df, valid_df, features)

  train_months = sorted(train_df["timestamp"].dt.to_period("M").unique())

  results.append({"train_months": [str(x) for x in train_months] ,  "window_size": len(train_months), "anomaly rate": anomaly})

results_df_6m_SVM = pd.DataFrame(results)
print(results_df_6m_SVM)

                                        train_months  window_size  \
0  [2023-01, 2023-02, 2023-03, 2023-04, 2023-05, ...            6   
1  [2023-02, 2023-03, 2023-04, 2023-05, 2023-06, ...            6   
2  [2023-03, 2023-04, 2023-05, 2023-06, 2023-07, ...            6   
3  [2023-04, 2023-05, 2023-06, 2023-07, 2023-08, ...            6   
4  [2023-05, 2023-06, 2023-07, 2023-08, 2023-09, ...            6   
5  [2023-06, 2023-07, 2023-08, 2023-09, 2023-10, ...            6   
6  [2023-07, 2023-08, 2023-09, 2023-10, 2023-11, ...            6   

   anomaly rate  
0      0.997113  
1      0.883471  
2      0.848613  
3      0.852796  
4      0.926990  
5      0.912998  
6      0.938652  


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_2m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.1, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

anomaly = anomaly_rate_SVM(train_df, valid_df, features)
results_2m_fake.append({"window_size": 2, "anomaly": anomaly})

results_df_2m_fake_SVM = pd.DataFrame(results_2m_fake)
print(results_df_2m_fake_SVM)

Giorni training: 86
Giorni validation: 833
   window_size   anomaly
0            2  0.010823


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_4m_fake = []

train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.2, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

anomaly = anomaly_rate_SVM(train_df, valid_df, features)
results_4m_fake.append({"window_size": 4, "anomaly": anomaly})

results_df_4m_fake_SVM = pd.DataFrame(results_4m_fake)
print(results_df_4m_fake_SVM)

Giorni training: 177
Giorni validation: 742
   window_size   anomaly
0            4  0.006252


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_6m_fake = []

train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.3, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

anomaly = anomaly_rate_SVM(train_df, valid_df, features)
results_6m_fake.append({"window_size": 6, "anomaly": anomaly})

results_df_6m_fake_SVM = pd.DataFrame(results_6m_fake)
print(results_df_6m_fake_SVM)

Giorni training: 268
Giorni validation: 651
   window_size   anomaly
0            6  0.011782


In [None]:
# salvataggio dei risultati SVM
real_results_df_SVM = pd.concat([results_df_2m_SVM, results_df_4m_SVM, results_df_6m_SVM])
real_results_df_SVM.to_csv("/content/drive/MyDrive/real_inclinometri_SVM.csv", index=False)

fake_results_df_SVM = pd.concat([results_df_2m_fake_SVM, results_df_4m_fake_SVM, results_df_6m_fake_SVM])
fake_results_df_SVM.to_csv("/content/drive/MyDrive/fake_inclinometri_SVM.csv", index=False)

# LOF

In [None]:
def anomaly_rate_LOF(train_df, valid_df, features, n_neighbors=20, contamination=0.01):
  scaler = StandardScaler()
  # scaler = RobustScaler()
  X_train = scaler.fit_transform(train_df[features])
  X_valid = scaler.transform(valid_df[features])

  lof = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=contamination, novelty=True)
  lof.fit(X_train)

  y_pred = lof.predict(X_valid)
  anomaly_rate = np.mean(y_pred == -1)
  return anomaly_rate

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 2
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_df = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  valid_df = df[~df["timestamp"].isin(train_df["timestamp"])]

  anomaly = anomaly_rate_LOF(train_df, valid_df, features)

  train_months = sorted(train_df["timestamp"].dt.to_period("M").unique())

  results.append({"train_months": [str(x) for x in train_months] , "window_size": len(train_months), "anomaly rate": anomaly})

results_df_2m_LOF = pd.DataFrame(results)
print(results_df_2m_LOF)

          train_months  window_size  anomaly rate
0   [2023-01, 2023-02]            2      0.975743
1   [2023-02, 2023-03]            2      0.930830
2   [2023-03, 2023-04]            2      0.956951
3   [2023-04, 2023-05]            2      0.941339
4   [2023-05, 2023-06]            2      0.981864
5   [2023-06, 2023-07]            2      0.975995
6   [2023-07, 2023-08]            2      0.980897
7   [2023-08, 2023-09]            2      0.993415
8   [2023-09, 2023-10]            2      0.979690
9   [2023-10, 2023-11]            2      0.981929
10  [2023-11, 2023-12]            2      0.989628


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 4
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_df = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  valid_df = df[~df["timestamp"].isin(train_df["timestamp"])]

  anomaly = anomaly_rate_LOF(train_df, valid_df, features)

  train_months = sorted(train_df["timestamp"].dt.to_period("M").unique())

  results.append({"train_months": [str(x) for x in train_months] , "window_size": len(train_months), "anomaly rate": anomaly})

results_df_4m_LOF = pd.DataFrame(results)
print(results_df_4m_LOF)

                           train_months  window_size  anomaly rate
0  [2023-01, 2023-02, 2023-03, 2023-04]            4      0.992559
1  [2023-02, 2023-03, 2023-04, 2023-05]            4      0.935701
2  [2023-03, 2023-04, 2023-05, 2023-06]            4      0.922367
3  [2023-04, 2023-05, 2023-06, 2023-07]            4      0.923107
4  [2023-05, 2023-06, 2023-07, 2023-08]            4      0.974054
5  [2023-06, 2023-07, 2023-08, 2023-09]            4      0.993561
6  [2023-07, 2023-08, 2023-09, 2023-10]            4      0.973532
7  [2023-08, 2023-09, 2023-10, 2023-11]            4      0.977982
8  [2023-09, 2023-10, 2023-11, 2023-12]            4      0.996220


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 6
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_df = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  valid_df = df[~df["timestamp"].isin(train_df["timestamp"])]

  anomaly = anomaly_rate_LOF(train_df, valid_df, features)

  train_months = sorted(train_df["timestamp"].dt.to_period("M").unique())

  results.append({"train_months": [str(x) for x in train_months] ,  "window_size": len(train_months), "anomaly rate": anomaly})

results_df_6m_LOF = pd.DataFrame(results)
print(results_df_6m_LOF)

                                        train_months  window_size  \
0  [2023-01, 2023-02, 2023-03, 2023-04, 2023-05, ...            6   
1  [2023-02, 2023-03, 2023-04, 2023-05, 2023-06, ...            6   
2  [2023-03, 2023-04, 2023-05, 2023-06, 2023-07, ...            6   
3  [2023-04, 2023-05, 2023-06, 2023-07, 2023-08, ...            6   
4  [2023-05, 2023-06, 2023-07, 2023-08, 2023-09, ...            6   
5  [2023-06, 2023-07, 2023-08, 2023-09, 2023-10, ...            6   
6  [2023-07, 2023-08, 2023-09, 2023-10, 2023-11, ...            6   

   anomaly rate  
0      0.997853  
1      0.934004  
2      0.927037  
3      0.936912  
4      0.964538  
5      0.974411  
6      0.992402  


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_2m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.1, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

anomaly = anomaly_rate_LOF(train_df, valid_df, features)
results_2m_fake.append({"window_size": 2, "anomaly": anomaly})

results_df_2m_fake_LOF = pd.DataFrame(results_2m_fake)
print(results_df_2m_fake_LOF)

Giorni training: 86
Giorni validation: 833
   window_size   anomaly
0            2  0.076489


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_4m_fake = []

train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.2, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

anomaly = anomaly_rate_LOF(train_df, valid_df, features)
results_4m_fake.append({"window_size": 4, "anomaly": anomaly})

results_df_4m_fake_LOF = pd.DataFrame(results_4m_fake)
print(results_df_4m_fake_LOF)

Giorni training: 177
Giorni validation: 742
   window_size   anomaly
0            4  0.078428


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_6m_fake = []

train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.3, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

anomaly = anomaly_rate_LOF(train_df, valid_df, features)
results_6m_fake.append({"window_size": 6, "anomaly": anomaly})

results_df_6m_fake_LOF = pd.DataFrame(results_6m_fake)
print(results_df_6m_fake_LOF)

Giorni training: 268
Giorni validation: 651
   window_size   anomaly
0            6  0.066976


In [None]:
# salvataggio dei risultati LOF
real_results_df_LOF = pd.concat([results_df_2m_LOF, results_df_4m_LOF, results_df_6m_LOF])
real_results_df_LOF.to_csv("/content/drive/MyDrive/real_inclinometri_LOF.csv", index=False)

fake_results_df_LOF = pd.concat([results_df_2m_fake_LOF, results_df_4m_fake_LOF, results_df_6m_fake_LOF])
fake_results_df_LOF.to_csv("/content/drive/MyDrive/fake_inclinometri_LOF.csv", index=False)

# VAE senza temperatura

In [None]:
class Sampling(Layer):
  def call(self, inputs):
    z_mean, z_log_var = inputs
    epsilon = tf.random.normal(shape=tf.shape(z_mean))
    return z_mean + tf.exp(0.5 * z_log_var) * epsilon


class VAE(keras.Model):
  def __init__(self, input_dim, latent_dim = 2, beta = 0.01, **kwargs):
    super(VAE, self).__init__(**kwargs)
    self.beta = beta

    #encoder
    x_input = keras.Input(shape=(input_dim,))
    x = layers.Dense(48, activation="relu")(x_input)
    x = layers.Dense(24, activation="relu")(x)
    x = layers.Dense(12, activation="relu")(x)
    z_mean = layers.Dense(latent_dim)(x)
    z_log_var = layers.Dense(latent_dim)(x)

    z = Sampling()([z_mean, z_log_var])
    self.encoder = keras.Model(x_input, [z_mean, z_log_var, z])

    #decoder
    latent_inputs = keras.Input(shape=(latent_dim,))
    x = layers.Dense(12, activation="relu")(latent_inputs)
    x = layers.Dense(24, activation="relu")(x)
    x = layers.Dense(48, activation="relu")(x)
    outputs = layers.Dense(input_dim, activation="linear")(x)

    self.decoder = keras.Model(latent_inputs, outputs)

  def train_step(self, data):
    x, y = data

    with tf.GradientTape() as tape:
      z_mean, z_log_var, z = self.encoder(x, training = True)
      reconstruction = self.decoder(z, training = True)

      reconstruction_loss = tf.reduce_mean(tf.square(x - reconstruction))
      kl_loss = -0.5 * tf.reduce_mean(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
      total_loss = reconstruction_loss + self.beta * kl_loss

    grads = tape.gradient(total_loss, self.trainable_weights)
    self.optimizer.apply_gradients(zip(grads, self.trainable_weights))

    return {"loss": total_loss, "recon_loss": reconstruction_loss, "kl_loss": kl_loss}

  def call(self, inputs):
    z_mean, _,  z = self.encoder(inputs)
    return self.decoder(z)

In [None]:
# parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 1
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_data = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  val_data = df[~df["timestamp"].isin(train_data["timestamp"])]

  scaler_X = StandardScaler()
  scaler_T = StandardScaler()

  X_train = scaler_X.fit_transform(train_data[features])

  X_val = scaler_X.transform(val_data[features])

  vae = VAE(input_dim=X_train.shape[1], latent_dim=2)
  vae.build(input_shape=(None, X_train.shape[1]))

  vae.compile(optimizer=keras.optimizers.Adam())

  vae.fit(
      X_train,
      X_train,
      epochs = 50,
      batch_size = 64,
      verbose = 0
  )

  vae.save_weights(f"/content/drive/MyDrive/VAE_no_temp_1month_inclinometro_{m}.weights.h5")

  X_pred = vae.predict(X_val)
  mse = np.mean(np.square(X_val - X_pred), axis=1)

  mean_mse = np.mean(mse)
  std_mse = np.std(mse)
  threshold = mean_mse + 3 * std_mse
  anomalies = mse > threshold
  anomaly_rate = np.mean(anomalies)

  train_months = sorted(train_data["timestamp"].dt.to_period("M").unique())
  results.append({"train_months": [str(x) for x in train_months], "window_size": len(train_months), "anomaly rate": anomaly_rate})

results_df_1m_VAE = pd.DataFrame(results)
print(results_df_1m_VAE)

[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m532/532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m539/539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m558/558[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m539/539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m539/539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m558/558[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m555/555[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[1m539/539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
   train_months  window_size  anomaly rate
0     [2023-01]            1      0.000000
1     [2023-02

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 2
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_data = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  val_data = df[~df["timestamp"].isin(train_data["timestamp"])]

  scaler_X = StandardScaler()
  scaler_T = StandardScaler()

  X_train = scaler_X.fit_transform(train_data[features])

  X_val = scaler_X.transform(val_data[features])

  vae = VAE(input_dim=X_train.shape[1], latent_dim=2)
  vae.build(input_shape=(None, X_train.shape[1]))

  vae.compile(optimizer=keras.optimizers.Adam())

  vae.fit(
      X_train,
      X_train,
      epochs = 50,
      batch_size = 64,
      verbose = 0
  )

  vae.save_weights(f"/content/drive/MyDrive/VAE_no_temp_2month_inclinometro_{m}.weights.h5")

  X_pred = vae.predict(X_val)
  mse = np.mean(np.square(X_val - X_pred), axis=1)

  mean_mse = np.mean(mse)
  std_mse = np.std(mse)
  threshold = mean_mse + 3 * std_mse
  anomalies = mse > threshold
  anomaly_rate = np.mean(anomalies)

  train_months = sorted(train_data["timestamp"].dt.to_period("M").unique())
  results.append({"train_months": [str(x) for x in train_months], "window_size": len(train_months), "anomaly rate": anomaly_rate})

results_df_2m_VAE = pd.DataFrame(results)
print(results_df_2m_VAE)

[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m533/533[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m533/533[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m532/532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m548/548[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m530/530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
          train_months  window_size  anomaly rate
0   [2023-01, 2023-02]            2      0.000000
1   [2023-02, 2023-03]            2      0.000186
2   [2023-03, 2023-04]   

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 4
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_data = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  val_data = df[~df["timestamp"].isin(train_data["timestamp"])]

  scaler_X = StandardScaler()
  scaler_T = StandardScaler()

  X_train = scaler_X.fit_transform(train_data[features])

  X_val = scaler_X.transform(val_data[features])

  vae = VAE(input_dim=X_train.shape[1], latent_dim=2)
  vae.build(input_shape=(None, X_train.shape[1]))

  vae.compile(optimizer=keras.optimizers.Adam())

  vae.fit(
      X_train,
      X_train,
      epochs = 50,
      batch_size = 64,
      verbose = 0
  )

  vae.save_weights(f"/content/drive/MyDrive/VAE_no_temp_4month_inclinometro_{m}.weights.h5")

  X_pred = vae.predict(X_val)
  mse = np.mean(np.square(X_val - X_pred), axis=1)

  mean_mse = np.mean(mse)
  std_mse = np.std(mse)
  threshold = mean_mse + 3 * std_mse
  anomalies = mse > threshold
  anomaly_rate = np.mean(anomalies)

  train_months = sorted(train_data["timestamp"].dt.to_period("M").unique())
  results.append({"train_months": [str(x) for x in train_months], "window_size": len(train_months), "anomaly rate": anomaly_rate})

results_df_4m_VAE = pd.DataFrame(results)
print(results_df_4m_VAE)

[1m454/454[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m482/482[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m482/482[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m497/497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m497/497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
                           train_months  window_size  anomaly rate
0  [2023-01, 2023-02, 2023-03, 2023-04]            4      0.000000
1  [2023-02, 2023-03, 2023-04, 2023-05]            4      0.009572
2  [2023-03, 2023-04, 2023-05, 2023-06]            4      0.015981
3  [2023-04, 2023-05, 2023-06, 2023-07]            4     

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 6
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_data = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  val_data = df[~df["timestamp"].isin(train_data["timestamp"])]

  scaler_X = StandardScaler()
  scaler_T = StandardScaler()

  X_train = scaler_X.fit_transform(train_data[features])

  X_val = scaler_X.transform(val_data[features])

  vae = VAE(input_dim=X_train.shape[1], latent_dim=2)
  vae.build(input_shape=(None, X_train.shape[1]))

  vae.compile(optimizer=keras.optimizers.Adam())

  vae.fit(
      X_train,
      X_train,
      epochs = 50,
      batch_size = 64,
      verbose = 0
  )

  vae.save_weights(f"/content/drive/MyDrive/VAE_no_temp_6month_inclinometro_{m}.weights.h5")

  X_pred = vae.predict(X_val)
  mse = np.mean(np.square(X_val - X_pred), axis=1)

  mean_mse = np.mean(mse)
  std_mse = np.std(mse)
  threshold = mean_mse + 3 * std_mse
  anomalies = mse > threshold
  anomaly_rate = np.mean(anomalies)
#  anomaly_rate = np.mean(anomalies)*100

  train_months = sorted(train_data["timestamp"].dt.to_period("M").unique())
  results.append({"train_months": [str(x) for x in train_months], "window_size": len(train_months), "anomaly rate": anomaly_rate})

results_df_6m_VAE = pd.DataFrame(results)
print(results_df_6m_VAE)

[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m430/430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m445/445[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
                                        train_months  window_size  \
0  [2023-01, 2023-02, 2023-03, 2023-04, 2023-05, ...            6   
1  [2023-02, 2023-03, 2023-04, 2023-05, 2023-06, ...            6   
2  [2023-03, 2023-04, 2023-05, 2023-06, 2023-07, ...            6   
3  [2023-04, 2023-05, 2023-06, 2023-07, 2023-08, ...            6   
4  [2023-05, 2023-06, 2023-07, 2023-08, 2023-09, ...            6   
5  [2023-06, 2023-07, 2023-08, 2023-09, 2023-10, ...         

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_1m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.05, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

scaler_X = StandardScaler()

X_train = scaler_X.fit_transform(train_df[features])

X_val = scaler_X.transform(valid_df[features])

vae = VAE(input_dim=X_train.shape[1], latent_dim=2)

vae.compile(optimizer=keras.optimizers.Adam())

vae.fit(
    X_train,
    X_train,
    epochs = 50,
    batch_size = 64,
    verbose = 0
  )

X_pred = vae.predict(X_val)
mse = np.mean(np.square(X_val - X_pred), axis=1)

mean_mse = np.mean(mse)
std_mse = np.std(mse)
threshold = mean_mse + 3 * std_mse
anomalies = mse > threshold
anomaly_rate = np.mean(anomalies)


results_1m_fake.append({"window_size": 1, "anomaly": anomaly_rate})

results_df_1m_fake_VAE = pd.DataFrame(results_1m_fake)
print(results_df_1m_fake_VAE)

Giorni training: 32
Giorni validation: 887
[1m734/734[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
   window_size   anomaly
0            1  0.017685


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_2m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.1, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

scaler_X = StandardScaler()

X_train = scaler_X.fit_transform(train_df[features])

X_val = scaler_X.transform(valid_df[features])

vae = VAE(input_dim=X_train.shape[1], latent_dim=2)

vae.compile(optimizer=keras.optimizers.Adam())

vae.fit(
    X_train,
    X_train,
    epochs = 50,
    batch_size = 64,
    verbose = 0
  )

X_pred = vae.predict(X_val)
mse = np.mean(np.square(X_val - X_pred), axis=1)

mean_mse = np.mean(mse)
std_mse = np.std(mse)
threshold = mean_mse + 3 * std_mse
anomalies = mse > threshold
anomaly_rate = np.mean(anomalies)


results_2m_fake.append({"window_size": 2, "anomaly": anomaly_rate})

results_df_2m_fake_VAE = pd.DataFrame(results_2m_fake)
print(results_df_2m_fake_VAE)

Giorni training: 86
Giorni validation: 833
[1m688/688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
   window_size  anomaly
0            2  0.02201


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_4m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.2, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

scaler_X = StandardScaler()

X_train = scaler_X.fit_transform(train_df[features])

X_val = scaler_X.transform(valid_df[features])

vae = VAE(input_dim=X_train.shape[1], latent_dim=2)

vae.compile(optimizer=keras.optimizers.Adam())

vae.fit(
    X_train,
    X_train,
    epochs = 50,
    batch_size = 64,
    verbose = 0
  )

X_pred = vae.predict(X_val)
mse = np.mean(np.square(X_val - X_pred), axis=1)

mean_mse = np.mean(mse)
std_mse = np.std(mse)
threshold = mean_mse + 3 * std_mse
anomalies = mse > threshold
anomaly_rate = np.mean(anomalies)


results_4m_fake.append({"window_size": 4, "anomaly": anomaly_rate})

results_df_4m_fake_VAE = pd.DataFrame(results_4m_fake)
print(results_df_4m_fake_VAE)

Giorni training: 177
Giorni validation: 742
[1m615/615[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
   window_size   anomaly
0            4  0.014029


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_6m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.3, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

scaler_X = StandardScaler()

X_train = scaler_X.fit_transform(train_df[features])

X_val = scaler_X.transform(valid_df[features])

vae = VAE(input_dim=X_train.shape[1], latent_dim=2)

vae.compile(optimizer=keras.optimizers.Adam())

vae.fit(
    X_train,
    X_train,
    epochs = 50,
    batch_size = 64,
    verbose = 0
  )

X_pred = vae.predict(X_val)
mse = np.mean(np.square(X_val - X_pred), axis=1)

mean_mse = np.mean(mse)
std_mse = np.std(mse)
threshold = mean_mse + 3 * std_mse
anomalies = mse > threshold
anomaly_rate = np.mean(anomalies)


results_6m_fake.append({"window_size": 6, "anomaly": anomaly_rate})

results_df_6m_fake_VAE = pd.DataFrame(results_6m_fake)
print(results_df_6m_fake_VAE)

Giorni training: 268
Giorni validation: 651
[1m539/539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
   window_size   anomaly
0            6  0.019095


In [None]:
# salvataggio dei risultati VAE (no temperatura)
real_results_df_VAE = pd.concat([results_df_1m_VAE, results_df_2m_VAE, results_df_4m_VAE, results_df_6m_VAE])
real_results_df_VAE.to_csv("/content/drive/MyDrive/real_inclinometri_VAE.csv", index=False)

fake_results_df_VAE = pd.concat([results_df_1m_fake_VAE, results_df_2m_fake_VAE, results_df_4m_fake_VAE, results_df_6m_fake_VAE])
fake_results_df_VAE.to_csv("/content/drive/MyDrive/fake_inclinometri_VAE.csv", index=False)

# AE senza temperatura

In [None]:
def build_ae(input_dim, latent_dim=2):
  #encoder
  input_enc = layers.Input(shape=(input_dim,))
  x = layers.Dense(64, activation="relu")(input_enc)
  x = layers.Dense(32, activation="relu")(x)
  x = layers.Dense(16, activation="relu")(x)
  z = layers.Dense(latent_dim, activation="relu", name="latent")(x)

  #decoder
  x = layers.Dense(16, activation="relu")(z)
  x = layers.Dense(32, activation="relu")(x)
  x = layers.Dense(64, activation="relu")(x)
  output_dec = layers.Dense(input_dim, activation="linear")(x)

  #autoencoder
  ae = keras.Model(input_enc, output_dec, name="autoencoder")
  ae.compile(optimizer="adam", loss="mse")
  return ae

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 1
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_data = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  val_data = df[~df["timestamp"].isin(train_data["timestamp"])]

  scaler_X = StandardScaler()

  X_train = scaler_X.fit_transform(train_data[features])
  X_val = scaler_X.transform(val_data[features])

  ae = build_ae(input_dim=X_train.shape[1], latent_dim=2)

  ae.fit(
      X_train,
      X_train,
      epochs = 50,
      batch_size = 64,
      verbose = 0
  )

  ae.save_weights(f"/content/drive/MyDrive/AE_no_temp_1month_inclinometro_{m}.weights.h5")

  X_pred = ae.predict(X_val)
  mse = np.mean(np.square(X_val - X_pred), axis=1)

  mean_mse = np.mean(mse)
  std_mse = np.std(mse)
  threshold = mean_mse + 3 * std_mse
  anomalies = mse > threshold
  anomaly_rate = np.mean(anomalies)

  train_months = sorted(train_data["timestamp"].dt.to_period("M").unique())
  results.append({"train_months": [str(x) for x in train_months], "window_size": len(train_months), "anomaly rate": anomaly_rate})

results_df_1m_AE = pd.DataFrame(results)
print(results_df_1m_AE)

[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m532/532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m539/539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m558/558[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m539/539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m539/539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m558/558[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m555/555[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m539/539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m538/538[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
   train_months  window_size  anomaly rate
0     [2023-01]            1      0.000000
1     [2023-02

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 2
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_data = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  val_data = df[~df["timestamp"].isin(train_data["timestamp"])]

  scaler_X = StandardScaler()

  X_train = scaler_X.fit_transform(train_data[features])
  X_val = scaler_X.transform(val_data[features])

  ae = build_ae(input_dim=X_train.shape[1], latent_dim=2)

  ae.fit(
      X_train,
      X_train,
      epochs = 50,
      batch_size = 64,
      verbose = 0
  )


  ae.save_weights(f"/content/drive/MyDrive/AE_no_temp_2month_inclinometro_{m}.weights.h5")

  X_pred = ae.predict(X_val)
  mse = np.mean(np.square(X_val - X_pred), axis=1)

  mean_mse = np.mean(mse)
  std_mse = np.std(mse)
  threshold = mean_mse + 3 * std_mse
  anomalies = mse > threshold
  anomaly_rate = np.mean(anomalies)
#  anomaly_rate = np.mean(anomalies)*100

  train_months = sorted(train_data["timestamp"].dt.to_period("M").unique())
  results.append({"train_months": [str(x) for x in train_months], "window_size": len(train_months), "anomaly rate": anomaly_rate})

results_df_2m_AE = pd.DataFrame(results)
print(results_df_2m_AE)

[1m505/505[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m506/506[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m533/533[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m533/533[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m532/532[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m548/548[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m530/530[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m513/513[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
          train_months  window_size  anomaly rate
0   [2023-01, 2023-02]            2      0.000000
1   [2023-02, 2023-03]            2      0.000000
2   [2023-03, 2023-04]   

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 4
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_data = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  val_data = df[~df["timestamp"].isin(train_data["timestamp"])]

  scaler_X = StandardScaler()

  X_train = scaler_X.fit_transform(train_data[features])
  X_val = scaler_X.transform(val_data[features])

  ae = build_ae(input_dim=X_train.shape[1], latent_dim=2)

  ae.fit(
      X_train,
      X_train,
      epochs = 50,
      batch_size = 64,
      verbose = 0
  )

  ae.save_weights(f"/content/drive/MyDrive/AE_no_temp_4month_inclinometro_{m}.weights.h5")

  X_pred = ae.predict(X_val)
  mse = np.mean(np.square(X_val - X_pred), axis=1)

  mean_mse = np.mean(mse)
  std_mse = np.std(mse)
  threshold = mean_mse + 3 * std_mse
  anomalies = mse > threshold
  anomaly_rate = np.mean(anomalies)

  train_months = sorted(train_data["timestamp"].dt.to_period("M").unique())
  results.append({"train_months": [str(x) for x in train_months], "window_size": len(train_months), "anomaly rate": anomaly_rate})

results_df_4m_AE = pd.DataFrame(results)
print(results_df_4m_AE)

[1m454/454[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m474/474[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m482/482[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m482/482[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m481/481[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m496/496[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m497/497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m497/497[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
                           train_months  window_size  anomaly rate
0  [2023-01, 2023-02, 2023-03, 2023-04]            4      0.000000
1  [2023-02, 2023-03, 2023-04, 2023-05]            4      0.010364
2  [2023-03, 2023-04, 2023-05, 2023-06]            4      0.001169
3  [2023-04, 2023-05, 2023-06, 2023-07]            4     

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results = []
window_size = 6
n_months = 12
start_date = df["timestamp"].min()

for m in range(1, n_months - window_size + 2):
  train_start = start_date + pd.DateOffset(months=m-1)
  train_end = start_date + pd.DateOffset(months=m-1+window_size)
  train_data = df[(df["timestamp"] >= train_start) & (df["timestamp"] < train_end)]

  val_data = df[~df["timestamp"].isin(train_data["timestamp"])]

  scaler_X = StandardScaler()

  X_train = scaler_X.fit_transform(train_data[features])
  X_val = scaler_X.transform(val_data[features])

  ae = build_ae(input_dim=X_train.shape[1], latent_dim=2)

  ae.fit(
      X_train,
      X_train,
      epochs = 50,
      batch_size = 64,
      verbose = 0
  )

  ae.save_weights(f"/content/drive/MyDrive/AE_no_temp_6month_inclinometro_{m}.weights.h5")

  X_pred = ae.predict(X_val)
  mse = np.mean(np.square(X_val - X_pred), axis=1)

  mean_mse = np.mean(mse)
  std_mse = np.std(mse)
  threshold = mean_mse + 3 * std_mse
  anomalies = mse > threshold
  anomaly_rate = np.mean(anomalies)
#  anomaly_rate = np.mean(anomalies)*100

  train_months = sorted(train_data["timestamp"].dt.to_period("M").unique())
  results.append({"train_months": [str(x) for x in train_months], "window_size": len(train_months), "anomaly rate": anomaly_rate})

results_df_6m_AE = pd.DataFrame(results)
print(results_df_6m_AE)

[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m423/423[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m430/430[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m449/449[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[1m465/465[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m446/446[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[1m445/445[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
                                        train_months  window_size  \
0  [2023-01, 2023-02, 2023-03, 2023-04, 2023-05, ...            6   
1  [2023-02, 2023-03, 2023-04, 2023-05, 2023-06, ...            6   
2  [2023-03, 2023-04, 2023-05, 2023-06, 2023-07, ...            6   
3  [2023-04, 2023-05, 2023-06, 2023-07, 2023-08, ...            6   
4  [2023-05, 2023-06, 2023-07, 2023-08, 2023-09, ...            6   
5  [2023-06, 2023-07, 2023-08, 2023-09, 2023-10, ...         

In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_1m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.05, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

scaler_X = StandardScaler()

X_train = scaler_X.fit_transform(train_df[features])

X_val = scaler_X.transform(valid_df[features])

ae = build_ae(input_dim=X_train.shape[1], latent_dim=2)

ae.fit(
    X_train,
    X_train,
    epochs = 50,
    batch_size = 64,
    verbose = 0
  )

X_pred = ae.predict(X_val)
mse = np.mean(np.square(X_val - X_pred), axis=1)

mean_mse = np.mean(mse)
std_mse = np.std(mse)
threshold = mean_mse + 3 * std_mse
anomalies = mse > threshold
anomaly_rate = np.mean(anomalies)


results_1m_fake.append({"window_size": 1, "anomaly": anomaly_rate})

results_df_1m_fake_AE = pd.DataFrame(results_1m_fake)
print(results_df_1m_fake_AE)

Giorni training: 32
Giorni validation: 887
[1m734/734[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
   window_size   anomaly
0            1  0.028893


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_2m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.1, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

scaler_X = StandardScaler()

X_train = scaler_X.fit_transform(train_df[features])

X_val = scaler_X.transform(valid_df[features])

ae = build_ae(input_dim=X_train.shape[1], latent_dim=2)

ae.fit(
    X_train,
    X_train,
    epochs = 50,
    batch_size = 64,
    verbose = 0
  )

X_pred = ae.predict(X_val)
mse = np.mean(np.square(X_val - X_pred), axis=1)

mean_mse = np.mean(mse)
std_mse = np.std(mse)
threshold = mean_mse + 3 * std_mse
anomalies = mse > threshold
anomaly_rate = np.mean(anomalies)


results_2m_fake.append({"window_size": 2, "anomaly": anomaly_rate})

results_df_2m_fake_AE = pd.DataFrame(results_2m_fake)
print(results_df_2m_fake_AE)

Giorni training: 86
Giorni validation: 833
[1m688/688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
   window_size   anomaly
0            2  0.019918


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_4m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.2, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

scaler_X = StandardScaler()

X_train = scaler_X.fit_transform(train_df[features])

X_val = scaler_X.transform(valid_df[features])

ae = build_ae(input_dim=X_train.shape[1], latent_dim=2)

ae.fit(
    X_train,
    X_train,
    epochs = 50,
    batch_size = 64,
    verbose = 0
  )

X_pred = ae.predict(X_val)
mse = np.mean(np.square(X_val - X_pred), axis=1)

mean_mse = np.mean(mse)
std_mse = np.std(mse)
threshold = mean_mse + 3 * std_mse
anomalies = mse > threshold
anomaly_rate = np.mean(anomalies)


results_4m_fake.append({"window_size": 4, "anomaly": anomaly_rate})

results_df_4m_fake_AE = pd.DataFrame(results_4m_fake)
print(results_df_4m_fake_AE)

Giorni training: 177
Giorni validation: 742
[1m615/615[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
   window_size   anomaly
0            4  0.018247


In [None]:
#parametri
features = ["Mode 1", "Mode 2", "Mode 3", "Mode 4", "Mode 5", "Mode 6"]
results_6m_fake = []
train_df, valid_df = create_fake_month(df_inc, frac_per_month=0.3, random_state=42)

print("Giorni training:", train_df["timestamp"].dt.date.nunique())
print("Giorni validation:", valid_df["timestamp"].dt.date.nunique())

scaler_X = StandardScaler()

X_train = scaler_X.fit_transform(train_df[features])

X_val = scaler_X.transform(valid_df[features])

ae = build_ae(input_dim=X_train.shape[1], latent_dim=2)

ae.fit(
    X_train,
    X_train,
    epochs = 50,
    batch_size = 64,
    verbose = 0
  )

X_pred = ae.predict(X_val)
mse = np.mean(np.square(X_val - X_pred), axis=1)

mean_mse = np.mean(mse)
std_mse = np.std(mse)
threshold = mean_mse + 3 * std_mse
anomalies = mse > threshold
anomaly_rate = np.mean(anomalies)


results_6m_fake.append({"window_size": 6, "anomaly": anomaly_rate})

results_df_6m_fake_AE = pd.DataFrame(results_6m_fake)
print(results_df_6m_fake_AE)

Giorni training: 268
Giorni validation: 651
[1m539/539[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
   window_size   anomaly
0            6  0.015786


In [None]:
# salvataggio dei risultati AE (no temperatura)
real_results_df_AE = pd.concat([results_df_1m_AE, results_df_2m_AE, results_df_4m_AE, results_df_6m_AE])
real_results_df_AE.to_csv("/content/drive/MyDrive/real_inclinometri_AE.csv", index=False)

fake_results_df_AE = pd.concat([results_df_1m_fake_AE, results_df_2m_fake_AE, results_df_4m_fake_AE, results_df_6m_fake_AE])
fake_results_df_AE.to_csv("/content/drive/MyDrive/fake_inclinometri_AE.csv", index=False)