Connected to .venv (Python 3.10.16)

In [None]:
#Import Pypots Library
from pypots.optim import Adam
from pypots.imputation import GPVAE
#from pypots.utils.metrics import calc_mae
from pypots.nn.functional import calc_mae


import argparse
import hashlib
from pathlib import Path

import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.utils.data
import data_insight
from data_insight import setup_duckdb
from duckdb import DuckDBPyConnection as DuckDB
from duckdb import DuckDBPyRelation as Relation
from pathlib import Path
import hashlib
from duckdb import DuckDBPyConnection as DuckDB
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import optuna 
from optuna.visualization import plot_optimization_history




from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset, Dataset
from pygrinder.missing_completely_at_random import mcar
from tqdm.auto import tqdm

import sensor_imputation_thesis.shared.load_data as load

torch.cuda.empty_cache()
#PatchTST might be an ideal choise if SAITS is too slow 

##Drop columns with different indexes while loading data.. Or the mean values 

df=pd.read_parquet("/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/ny_df_for_pypots.parquet")

len(df)

#current length of the dataframe is 119439

# Check nan values in each column
for col in df.columns:
    print(f"Column {col} has {df[col].isna().sum()} NaN values")
    missing_rate=df[col].isna().sum()/len(df[col])
    print(f"Column {col} has {missing_rate} Missing_rate")


#Try with smaller dataset, size 4000
##SAMPLE the percengtage of the dataset, df.sample (averagely pick samples)
#not df.sample cuz it will randomly select 
original_size=len(df)
desired_fraction=0.3 #Select data every 3 minutes 
step=int(1/desired_fraction) #step_size=10 (sample every 10th (3/10) minute)

#Systematic sampling: Start at a random offset to avoid bias 
start=np.random.randint(0,step) #Random start between 0-9
df1=df.iloc[start::step].reset_index(drop=True)

print(f"Original size:{len(df)}, Sampled size: {len(df1)}")



# Custom Dataset class
class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Data processing code
sensor_cols = [col for col in df1.columns if col != "time"]
data = df1[sensor_cols].values

#¤get feature names for printing mae later 
feature_names=df1[sensor_cols].columns.tolist()

## Convert data to 3D arrays of shape n_samples, n_timesteps, n_features, X_ori refers to the original data without missing values 
## Reconstruct all columns simultaneously  #num_features: 119
n_features = data.shape[1]  # exclude the time column
n_steps = 20 #60 (was 60 previously) #(TRY TO CHANGE HERE)  # # window length, 1440 steps = 24 hours of 1-minute data, but here is revised to 60 again
#total_elements = data.shape[0] * data.shape[1]
n_samples = data.shape[0] // n_steps 



# Reshape to (n_samples // n_steps, n_steps, n_features)
#data_reshaped = data.reshape((n_samples, n_steps, n_features))
data_reshaped=data[:n_samples*n_steps].reshape(n_samples,n_steps,n_features)
print(f"Reshaped data:{data.shape}")

#Split into train, test, val, fit scaler only on the train set (prevent data leakage)

#train_size = int(0.6 * len(data))
#val_size = int(0.2 * len(data))
#test_size = len(data) - train_size - val_size

#train_data = data_reshaped[:train_size]
#val_data = data_reshaped[train_size:train_size + val_size]
#test_data= data_reshaped[train_size + val_size:]


#Apply time series split 
#Split into train(60%), val(20%), and test (20%)
train_data, temp_data=train_test_split(data_reshaped,test_size=0.4,shuffle=True)
val_data, test_data=train_test_split(temp_data, test_size=0.5, shuffle=False)

##Normalization is important because of the nature of mse calculation of saits, columns with large 
#values dominate the loss, making metrics meaningless. SAITS computes MSE/MAE column-wise and averages 
#them across all columns 
#  Apply minmax scaler here 
#normalize each feature independently
scalers={}


#train_scaled = np.zeros_like(data_reshaped[train_size])  # Initialize the normalized data array
#val_scaled=np.zeros_like(data_reshaped[train_size:train_size + val_size])
#test_scaled=np.zeros_like(data_reshaped[train_size + val_size:])

train_scaled = np.zeros_like(train_data)
val_scaled = np.zeros_like(val_data)
test_scaled = np.zeros_like(test_data)



for i in range(data_reshaped.shape[2]):
    scaler = MinMaxScaler(feature_range=(-1, 1)) #changed to -1,1
    # Flatten timesteps and samples for scaling
    train_scaled[:, :, i] = scaler.fit_transform(train_data[:, :, i].reshape(-1, 1)).reshape(train_data.shape[0], train_data.shape[1])
    val_scaled[:, :, i] = scaler.transform(val_data[:, :, i].reshape(-1, 1)).reshape(val_data.shape[0], val_data.shape[1])
    test_scaled[:, :, i] = scaler.transform(test_data[:, :, i].reshape(-1, 1)).reshape(test_data.shape[0], test_data.shape[1])
    scalers[i] = scaler  # Save scalers to inverse-transform later

#Inverse Scale
def inverse_scale(imputation, scalers):
    n_samples, n_timesteps, n_features = imputation.shape
    imputation_denorm = np.empty_like(imputation)
    
    for i in range(n_features):
        reshaped = imputation[:, :, i].reshape(-1, 1)
        inversed = scalers[i].inverse_transform(reshaped)
        imputation_denorm[:, :, i] = inversed.reshape(n_samples, n_timesteps)
    
    return imputation_denorm



#Optional: Artificially mask. Mask 20% of the data (MIT part). Try masking 30% here 
def mcar_f(X, mask_ratio=0.3):
    """Apply MCAR only to observed values."""
    observed_mask=~np.isnan(X) #find observed positions
    artificial_mask=mcar(X,mask_ratio).astype(bool) #generate MCAR mask, cast to boolean
    #combine masks 
    combined_mask=observed_mask & artificial_mask

    #Apply masking
    X_masked=X.copy()
    X_masked[combined_mask]=np.nan
    return X_masked,combined_mask


#Use mcar on validation data 
val_X_masked, val_mask =mcar_f(val_scaled)
val_X_ori=val_scaled.copy() 

test_X_masked, test_mask =mcar_f(test_scaled)
test_X_ori=test_scaled.copy() 


#?? Problem: Can't have the best input for testing
#1.Create synthetic test_data cuz if I drop nan values for test set, there's basically nothing left
#synthetic_data=np.random.randn(n_samples,n_steps,n_features)
#test_X_masked,test_mask=mcar_f(synthetic_data)
#test_X_ori=synthetic_data.copy() #Ground truth

# 2, Ensure no NaN values in synthetic data
#test_X_masked = np.nan_to_num(test_X_masked, nan=np.nanmean(test_X_masked))
#test_X_ori = np.nan_to_num(test_X_ori, nan=np.nanmean(test_X_ori))



class Config:
    no_cuda = False
    no_mps = False
    seed = 1

args=Config()

torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)


args.cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()

args.cuda = not args.no_cuda and torch.cuda.is_available()
print("CUDA available:", torch.cuda.is_available())


if args.cuda:
    device = torch.device("cuda")
    print("Using CUDA")
elif use_mps:
    device = torch.device("mps")
    print("Using MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

train_scaled = torch.tensor(train_scaled, dtype=torch.float32)
val_X_masked = torch.tensor(val_X_masked, dtype=torch.float32)
val_X_ori = torch.tensor(val_X_ori, dtype=torch.float32)

train_scaled = train_scaled.to(device)
val_X_masked = val_X_masked.to(device)
val_X_ori = val_X_ori.to(device)


#MLflow set up
mlflow.set_tracking_uri("http://localhost:5000")
client = mlflow.tracking.MlflowClient()
mlflow.set_experiment("GP_VAE_2")


# Optuna objective function
def objective(trial):
    params = {
        "lr": trial.suggest_float("lr", 1e-4, 1e-3, log=True),
        "epochs": trial.suggest_int("epochs", 10, 20),
        "batch_size": trial.suggest_int("batch_size", 4, 16)
    }

    with mlflow.start_run(run_name="GP-VAE-Trial", nested=True):
        mlflow.log_params(params)

        gp_vae = GPVAE(
            n_steps=data_reshaped.shape[1],
            n_features=data_reshaped.shape[2],
            latent_size=37,
            encoder_sizes=(128,128),
            decoder_sizes=(256,256),
            kernel="cauchy",
            beta=0.2,
            M=1,  #The number of Monte Carlo samples for ELBO estimation during training.
            K=1,  #The number of importance weights for IWAE model training loss.
            sigma=1.005, # The scale parameter for a kernel function
            length_scale=7.0, #The length scale parameter for a kernel function
            kernel_scales=1, #The number of different length scales over latent space dimensions
            window_size=24,  # Window size for the inference CNN.
            batch_size=params["batch_size"],
            # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
            epochs=params["epochs"],
            # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
            # You can leave it to defualt as None to disable early stopping.
            patience=3,
            # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
            # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
            optimizer=Adam(lr=params["lr"]),
            # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
            # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
            # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
            num_workers=0,
            # just leave it to default as None, PyPOTS will automatically assign the best device for you.
            # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
            device=device,
            # set the path for saving tensorboard and trained model files 
            saving_path="/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model",
            # only save the best model after training finished.
            # You can also set it as "better" to save models performing better ever during training.
            model_saving_strategy="best",
        )




        # train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
        gp_vae.fit(train_set={"X": train_scaled}, val_set={"X": val_X_masked, "X_ori": val_X_ori})
        gp_vae_results = gp_vae.predict({"X": test_X_masked}, n_sampling_times=2)
        gp_vae_imputation = gp_vae_results["imputation"]

        print(f"The shape of gp_vae_imputation is {gp_vae_imputation.shape}")

        # for error calculation, we need to take the mean value of the multiple samplings for each data sample
        mean_gp_vae_imputation = gp_vae_imputation.mean(axis=1)

        test_imputation_denorm = inverse_scale(mean_gp_vae_imputation, scalers)
        test_ori_denorm = inverse_scale(test_X_ori, scalers)

        mae_list, rmse_list = [], []
        for i in range(n_features):
            mask_i = test_mask[:, :, i]
            pred = test_imputation_denorm[:, :, i][mask_i]
            true = test_ori_denorm[:, :, i][mask_i]
            if len(true) == 0: continue
            mae = np.mean(np.abs(pred - true))
            rmse = np.sqrt(mean_squared_error(true, pred))
            mae_list.append(mae)
            rmse_list.append(rmse)
            mlflow.log_metric(f"MAE_{i}", mae)
            mlflow.log_metric(f"RMSE_{i}", rmse)

        mlflow.log_metric("avg_mae", np.mean(mae_list))
        mlflow.log_metric("avg_rmse", np.mean(rmse_list))

        return np.mean(mae_list), np.mean(rmse_list)

# Run Optuna study
mlflow.set_experiment("GP-VAE-2")
with mlflow.start_run(run_name="GPVAE_Optuna_Study") as parent_run:
    study = optuna.create_study(directions=["minimize","minimize"])
    study.optimize(objective, n_trials=10)

    best_params = study.best_trial.params
    best_value = study.best_trial.value # or best_trial.values if multi-objective

    # Log best parameters
    mlflow.log_params(best_params)

    # Log best metric(s)
    mlflow.log_metric("best_objective_value", best_value)

    print("Best Parameters:", best_params)
    print("Best Objective Value:", best_value)

  from .autonotebook import tqdm as notebook_tqdm


[34m
████████╗██╗███╗   ███╗███████╗    ███████╗███████╗██████╗ ██╗███████╗███████╗    █████╗ ██╗
╚══██╔══╝██║████╗ ████║██╔════╝    ██╔════╝██╔════╝██╔══██╗██║██╔════╝██╔════╝   ██╔══██╗██║
   ██║   ██║██╔████╔██║█████╗█████╗███████╗█████╗  ██████╔╝██║█████╗  ███████╗   ███████║██║
   ██║   ██║██║╚██╔╝██║██╔══╝╚════╝╚════██║██╔══╝  ██╔══██╗██║██╔══╝  ╚════██║   ██╔══██║██║
   ██║   ██║██║ ╚═╝ ██║███████╗    ███████║███████╗██║  ██║██║███████╗███████║██╗██║  ██║██║
   ╚═╝   ╚═╝╚═╝     ╚═╝╚══════╝    ╚══════╝╚══════╝╚═╝  ╚═╝╚═╝╚══════╝╚══════╝╚═╝╚═╝  ╚═╝╚═╝
ai4ts v0.0.3 - building AI for unified time-series analysis, https://time-series.ai [0m

Column time has 0 NaN values
Column time has 0.0 Missing_rate
Column fr_eng has 0 NaN values
Column fr_eng has 0.0 Missing_rate
Column te_exh_cyl_out__0 has 0 NaN values
Column te_exh_cyl_out__0 has 0.0 Missing_rate
Column pd_air_ic__0 has 0 NaN values
Column pd_air_ic__0 has 0.0 Missing_rate
Column pr_exh_turb_out__0 has 316581 NaN values
Colu

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))


CUDA available: True
Using CUDA


2025/05/19 22:50:13 INFO mlflow.tracking.fluent: Experiment with name 'GP_VAE_2' does not exist. Creating a new experiment.
2025/05/19 22:50:13 INFO mlflow.tracking.fluent: Experiment with name 'GP-VAE-2' does not exist. Creating a new experiment.
[I 2025-05-19 22:50:13,818] A new study created in memory with name: no-name-80320a2b-4c2d-4a98-b708-5ed8770305df
2025-05-19 22:50:13 [INFO]: Using the given device: cuda
2025-05-19 22:50:13 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225013
2025-05-19 22:50:13 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225013/tensorboard
2025-05-19 22:50:13 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678
2025-05-19 22:50:16 [INFO]: Epoch 001 - training loss (default): 11669.4683, validatio

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-19 22:50:49,927] Trial 0 finished with values: [316478.3729327418, 406613.69210850203] and parameters: {'lr': 0.0001704281313940993, 'epochs': 16, 'batch_size': 14}.
2025-05-19 22:50:49 [INFO]: Using the given device: cuda
2025-05-19 22:50:49 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225049
2025-05-19 22:50:49 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225049/tensorboard
2025-05-19 22:50:49 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/b25851d697854fdf8c6719ae17207083
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-19 22:50:52 [INFO]: Epoch 001 - training loss (default): 9978.8988, validation loss: 21890.1545
2025-05-19 22:50:54 [INFO]: Epoch 002 - training loss (default): 9128.3903, validation loss: 21643.0596
2025-05-19 22:50:57 [INFO]: Epoch 003 - training loss (default): 9126.3512, validation loss: 21473.5833
2025-05-19 22:50:59 [INFO]: Epoch 004 - training loss (default): 9125.5432, validation loss: 21339.3894
2025-05-19 22:51:02 [INFO]: Epoch 005 - training loss (default): 9125.0234, validation loss: 21238.3811
2025-05-19 22:51:04 [INFO]: Epoch 006 - training loss (default): 9124.7301, validation loss: 21150.8241
2025-05-19 22:51:07 [INFO]: Epoch 007 - training loss (default): 9124.5835, validation loss: 21077.1386
2025-05-19 22:51:10 [INFO]: Epoch 008 - training loss (default): 9124.3002, validation loss: 21011.0777
2025-05-19 22:51:12 [INFO]: Epoch 009 - training loss (default): 9124.1059, validation loss: 20958.1911
2025-05-19 22:51:15 [INFO]: Epoch 010 - training loss (default):

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-19 22:51:39,196] Trial 1 finished with values: [321771.8614608287, 461946.163265407] and parameters: {'lr': 0.00046604823908184394, 'epochs': 19, 'batch_size': 12}.
2025-05-19 22:51:39 [INFO]: Using the given device: cuda
2025-05-19 22:51:39 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225139
2025-05-19 22:51:39 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225139/tensorboard
2025-05-19 22:51:39 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/cadd370fecc7461f956ed204e7e4b7fb
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-19 22:51:41 [INFO]: Epoch 001 - training loss (default): 10540.1574, validation loss: 22315.8322
2025-05-19 22:51:43 [INFO]: Epoch 002 - training loss (default): 9130.4513, validation loss: 22008.8104
2025-05-19 22:51:46 [INFO]: Epoch 003 - training loss (default): 9127.7234, validation loss: 21812.1921
2025-05-19 22:51:48 [INFO]: Epoch 004 - training loss (default): 9126.4930, validation loss: 21664.0030
2025-05-19 22:51:50 [INFO]: Epoch 005 - training loss (default): 9125.8423, validation loss: 21545.2151
2025-05-19 22:51:53 [INFO]: Epoch 006 - training loss (default): 9125.2495, validation loss: 21442.2666
2025-05-19 22:51:55 [INFO]: Epoch 007 - training loss (default): 9124.9736, validation loss: 21353.0749
2025-05-19 22:51:57 [INFO]: Epoch 008 - training loss (default): 9124.6476, validation loss: 21274.8237
2025-05-19 22:52:00 [INFO]: Epoch 009 - training loss (default): 9124.5043, validation loss: 21205.5013
2025-05-19 22:52:02 [INFO]: Epoch 010 - training loss (default)

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-19 22:52:05,572] Trial 2 finished with values: [314569.3917306948, 409205.8877926546] and parameters: {'lr': 0.0003162564228356125, 'epochs': 11, 'batch_size': 13}.
2025-05-19 22:52:05 [INFO]: Using the given device: cuda
2025-05-19 22:52:05 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225205
2025-05-19 22:52:05 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225205/tensorboard
2025-05-19 22:52:05 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/9f6d1369581641ef88f8c0d1179fa32f
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-19 22:52:09 [INFO]: Epoch 001 - training loss (default): 10407.9615, validation loss: 22344.2183
2025-05-19 22:52:12 [INFO]: Epoch 002 - training loss (default): 9130.7042, validation loss: 22093.7795
2025-05-19 22:52:16 [INFO]: Epoch 003 - training loss (default): 9127.8214, validation loss: 21943.8718
2025-05-19 22:52:19 [INFO]: Epoch 004 - training loss (default): 9126.6722, validation loss: 21822.1376
2025-05-19 22:52:22 [INFO]: Epoch 005 - training loss (default): 9125.9316, validation loss: 21719.6457
2025-05-19 22:52:26 [INFO]: Epoch 006 - training loss (default): 9125.3987, validation loss: 21630.8705
2025-05-19 22:52:29 [INFO]: Epoch 007 - training loss (default): 9124.9800, validation loss: 21553.6136
2025-05-19 22:52:33 [INFO]: Epoch 008 - training loss (default): 9124.7287, validation loss: 21485.1410
2025-05-19 22:52:36 [INFO]: Epoch 009 - training loss (default): 9124.4651, validation loss: 21426.8873
2025-05-19 22:52:40 [INFO]: Epoch 010 - training loss (default)

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-19 22:52:55,643] Trial 3 finished with values: [312021.7489440709, 436930.24347313656] and parameters: {'lr': 0.00020943290721424612, 'epochs': 14, 'batch_size': 8}.
2025-05-19 22:52:55 [INFO]: Using the given device: cuda
2025-05-19 22:52:55 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225255
2025-05-19 22:52:55 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225255/tensorboard
2025-05-19 22:52:55 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/efc8c5993ebe49b79510e3677132fa09
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-19 22:52:58 [INFO]: Epoch 001 - training loss (default): 9846.5052, validation loss: 21664.2411
2025-05-19 22:53:01 [INFO]: Epoch 002 - training loss (default): 9127.2023, validation loss: 21355.1006
2025-05-19 22:53:04 [INFO]: Epoch 003 - training loss (default): 9125.5760, validation loss: 21164.5458
2025-05-19 22:53:07 [INFO]: Epoch 004 - training loss (default): 9125.0163, validation loss: 21026.7654
2025-05-19 22:53:10 [INFO]: Epoch 005 - training loss (default): 9124.5555, validation loss: 20914.7993
2025-05-19 22:53:13 [INFO]: Epoch 006 - training loss (default): 9124.2636, validation loss: 20823.2091
2025-05-19 22:53:16 [INFO]: Epoch 007 - training loss (default): 9124.1494, validation loss: 20753.4097
2025-05-19 22:53:19 [INFO]: Epoch 008 - training loss (default): 9123.7953, validation loss: 20683.0910
2025-05-19 22:53:22 [INFO]: Epoch 009 - training loss (default): 9123.6037, validation loss: 20616.2844
2025-05-19 22:53:25 [INFO]: Epoch 010 - training loss (default):

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-19 22:53:34,532] Trial 4 finished with values: [333494.30425831483, 477358.01158874435] and parameters: {'lr': 0.0005086467435686872, 'epochs': 13, 'batch_size': 10}.
2025-05-19 22:53:34 [INFO]: Using the given device: cuda
2025-05-19 22:53:34 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225334
2025-05-19 22:53:34 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225334/tensorboard
2025-05-19 22:53:34 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/b947c52674404f7cb6c53f363e7db200
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-19 22:53:39 [INFO]: Epoch 001 - training loss (default): 9353.9612, validation loss: 20731.1173
2025-05-19 22:53:45 [INFO]: Epoch 002 - training loss (default): 9125.3628, validation loss: 20563.5664
2025-05-19 22:53:50 [INFO]: Epoch 003 - training loss (default): 9124.8708, validation loss: 20508.9881
2025-05-19 22:53:55 [INFO]: Epoch 004 - training loss (default): 9124.2663, validation loss: 20457.7714
2025-05-19 22:54:01 [INFO]: Epoch 005 - training loss (default): 9123.9203, validation loss: 20315.8302
2025-05-19 22:54:06 [INFO]: Epoch 006 - training loss (default): 9123.6946, validation loss: 20307.4837
2025-05-19 22:54:11 [INFO]: Epoch 007 - training loss (default): 9123.6612, validation loss: 20105.3309
2025-05-19 22:54:17 [INFO]: Epoch 008 - training loss (default): 9123.5234, validation loss: 20040.2857
2025-05-19 22:54:22 [INFO]: Epoch 009 - training loss (default): 9123.3788, validation loss: 19852.8430
2025-05-19 22:54:27 [INFO]: Epoch 010 - training loss (default):

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-19 22:55:18,273] Trial 5 finished with values: [317963.1586264935, 455580.2292209908] and parameters: {'lr': 0.0008752538034088159, 'epochs': 19, 'batch_size': 5}.
2025-05-19 22:55:18 [INFO]: Using the given device: cuda
2025-05-19 22:55:18 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225518
2025-05-19 22:55:18 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225518/tensorboard
2025-05-19 22:55:18 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/bafea9273bd94abeb22ef19756d290ed
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-19 22:55:22 [INFO]: Epoch 001 - training loss (default): 9609.6060, validation loss: 21450.8878
2025-05-19 22:55:26 [INFO]: Epoch 002 - training loss (default): 9125.9232, validation loss: 21185.1344
2025-05-19 22:55:30 [INFO]: Epoch 003 - training loss (default): 9124.9774, validation loss: 21028.2179
2025-05-19 22:55:34 [INFO]: Epoch 004 - training loss (default): 9124.7810, validation loss: 20908.9630
2025-05-19 22:55:38 [INFO]: Epoch 005 - training loss (default): 9124.4892, validation loss: 20820.0712
2025-05-19 22:55:42 [INFO]: Epoch 006 - training loss (default): 9124.3911, validation loss: 20762.5896
2025-05-19 22:55:46 [INFO]: Epoch 007 - training loss (default): 9123.9307, validation loss: 20687.1977
2025-05-19 22:55:51 [INFO]: Epoch 008 - training loss (default): 9123.5644, validation loss: 20612.4713
2025-05-19 22:55:56 [INFO]: Epoch 009 - training loss (default): 9123.4825, validation loss: 20533.0609
2025-05-19 22:56:00 [INFO]: Epoch 010 - training loss (default):

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-19 22:56:00,967] Trial 6 finished with values: [332063.22014325875, 475107.3750530148] and parameters: {'lr': 0.0005305717913643639, 'epochs': 10, 'batch_size': 7}.
2025-05-19 22:56:00 [INFO]: Using the given device: cuda
2025-05-19 22:56:00 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225600
2025-05-19 22:56:00 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225600/tensorboard
2025-05-19 22:56:00 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/fa069e2a0d164595b5ccf431ca8f3da7
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-19 22:56:04 [INFO]: Epoch 001 - training loss (default): 9777.7843, validation loss: 21675.3546
2025-05-19 22:56:07 [INFO]: Epoch 002 - training loss (default): 9127.0944, validation loss: 21297.4761
2025-05-19 22:56:11 [INFO]: Epoch 003 - training loss (default): 9125.5157, validation loss: 21071.1036
2025-05-19 22:56:15 [INFO]: Epoch 004 - training loss (default): 9124.9748, validation loss: 20922.4289
2025-05-19 22:56:18 [INFO]: Epoch 005 - training loss (default): 9124.5767, validation loss: 20798.0840
2025-05-19 22:56:22 [INFO]: Epoch 006 - training loss (default): 9124.3178, validation loss: 20697.2786
2025-05-19 22:56:26 [INFO]: Epoch 007 - training loss (default): 9124.0242, validation loss: 20608.8538
2025-05-19 22:56:29 [INFO]: Epoch 008 - training loss (default): 9123.8435, validation loss: 20527.6828
2025-05-19 22:56:32 [INFO]: Epoch 009 - training loss (default): 9123.6265, validation loss: 20447.3202
2025-05-19 22:56:36 [INFO]: Epoch 010 - training loss (default):

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-19 22:56:40,770] Trial 7 finished with values: [316703.6145334785, 451838.01813143794] and parameters: {'lr': 0.00040039036187399375, 'epochs': 11, 'batch_size': 8}.
2025-05-19 22:56:40 [INFO]: Using the given device: cuda
2025-05-19 22:56:40 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225640
2025-05-19 22:56:40 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225640/tensorboard
2025-05-19 22:56:40 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/0898f76fc65d4520855707a68e590156
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-19 22:56:43 [INFO]: Epoch 001 - training loss (default): 10068.6461, validation loss: 21907.9115
2025-05-19 22:56:46 [INFO]: Epoch 002 - training loss (default): 9127.5650, validation loss: 21673.9956
2025-05-19 22:56:49 [INFO]: Epoch 003 - training loss (default): 9125.7771, validation loss: 21522.0588
2025-05-19 22:56:52 [INFO]: Epoch 004 - training loss (default): 9125.0538, validation loss: 21410.1648
2025-05-19 22:56:55 [INFO]: Epoch 005 - training loss (default): 9124.7070, validation loss: 21320.1477
2025-05-19 22:56:58 [INFO]: Epoch 006 - training loss (default): 9124.5084, validation loss: 21242.3446
2025-05-19 22:57:01 [INFO]: Epoch 007 - training loss (default): 9124.2445, validation loss: 21166.5282
2025-05-19 22:57:04 [INFO]: Epoch 008 - training loss (default): 9124.2001, validation loss: 21119.5911
2025-05-19 22:57:07 [INFO]: Epoch 009 - training loss (default): 9123.9592, validation loss: 21055.5204
2025-05-19 22:57:10 [INFO]: Epoch 010 - training loss (default)

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-19 22:57:14,538] Trial 8 finished with values: [317639.97061383846, 452181.89225430466] and parameters: {'lr': 0.00046353380456412536, 'epochs': 11, 'batch_size': 11}.
2025-05-19 22:57:14 [INFO]: Using the given device: cuda
2025-05-19 22:57:14 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225714
2025-05-19 22:57:14 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250519_T225714/tensorboard
2025-05-19 22:57:14 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/8cd65d77cf65497a8a31eea3ad1faa7c
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-19 22:57:17 [INFO]: Epoch 001 - training loss (default): 10024.7653, validation loss: 21260.5430
2025-05-19 22:57:21 [INFO]: Epoch 002 - training loss (default): 9128.0963, validation loss: 20834.3593
2025-05-19 22:57:24 [INFO]: Epoch 003 - training loss (default): 9126.5857, validation loss: 20615.0654
2025-05-19 22:57:27 [INFO]: Epoch 004 - training loss (default): 9125.7249, validation loss: 20464.5100
2025-05-19 22:57:30 [INFO]: Epoch 005 - training loss (default): 9125.1479, validation loss: 20347.4020
2025-05-19 22:57:33 [INFO]: Epoch 006 - training loss (default): 9124.7165, validation loss: 20242.6674
2025-05-19 22:57:36 [INFO]: Epoch 007 - training loss (default): 9124.4796, validation loss: 20153.9985
2025-05-19 22:57:41 [INFO]: Epoch 008 - training loss (default): 9124.2670, validation loss: 20077.9207
2025-05-19 22:57:45 [INFO]: Epoch 009 - training loss (default): 9124.0293, validation loss: 19998.7656
2025-05-19 22:57:49 [INFO]: Epoch 010 - training loss (default)

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-19 22:58:03,570] Trial 9 finished with values: [331623.7726320681, 474579.2345077271] and parameters: {'lr': 0.0003217054036792562, 'epochs': 14, 'batch_size': 9}.


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/7060a3a2bca74d1cac7da85e22beed7b
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318
🏃 View run GPVAE_Optuna_Study at: http://localhost:5000/#/experiments/832352739106302318/runs/2b73088bf5864b02ba1e8eef1c6d61e3
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


RuntimeError: A single best trial cannot be retrieved from a multi-objective study. Consider using Study.best_trials to retrieve a list containing the best trials.

In [None]:
feature_names

['fr_eng',
 'te_exh_cyl_out__0',
 'pd_air_ic__0',
 'pr_exh_turb_out__0',
 'te_air_ic_out__0',
 'te_seawater',
 'te_air_comp_in_a__0',
 'te_air_comp_in_b__0',
 'fr_tc__0',
 'pr_baro',
 'pd_air_ic__0_1',
 'pr_exh_rec',
 'te_exh_turb_in__0',
 'te_exh_turb_out__0',
 'bo_aux_blower_running',
 're_eng_load',
 'pr_air_scav_ecs',
 'pr_air_scav',
 'te_air_scav_rec',
 'te_air_ic_out__0_1',
 'pr_cyl_comp__0',
 'pr_cyl_max__0',
 'se_mip__0',
 'te_exh_cyl_out__0_1',
 'fr_eng_setpoint',
 'te_air_scav_rec_iso',
 'pr_cyl_max_mv_iso',
 'pr_cyl_comp_mv_iso',
 'fr_eng_ecs',
 'pr_air_scav_iso',
 'engine_type_G80ME-C9.5-GI-LPSCR']

In [None]:
df1["te_exh_cyl_out__0"].std()

39.82180184402368

In [None]:
df1["fr_eng"].std()

0.14288534662599145

In [None]:
df1["te_air_scav_rec"].std()

2.2921270072758015

In [None]:
df1.describe()

Unnamed: 0,time,fr_eng,te_exh_cyl_out__0,pd_air_ic__0,pr_exh_turb_out__0,te_air_ic_out__0,te_seawater,te_air_comp_in_a__0,te_air_comp_in_b__0,fr_tc__0,...,pr_cyl_max__0,se_mip__0,te_exh_cyl_out__0_1,fr_eng_setpoint,te_air_scav_rec_iso,pr_cyl_max_mv_iso,pr_cyl_comp_mv_iso,fr_eng_ecs,pr_air_scav_iso,engine_type_G80ME-C9.5-GI-LPSCR
count,105527,105527.0,105527.0,105527.0,0.0,105527.0,105527.0,0.0,0.0,0.0,...,105379.0,105379.0,105527.0,105527.0,63515.0,62682.0,62682.0,105527.0,62682.0,105527.0
mean,2024-03-04 03:38:36.214807296,0.832822,529.817308,3111.211728,,307.180888,273.15,,,,...,14519040.0,947342.1,529.817308,0.832486,304.498497,14560750.0,10775810.0,0.832822,99827.213871,1.0
min,2023-10-01 05:00:00,0.169932,319.15,0.0,,293.35,273.15,,,,...,4044394.0,-39909.92,319.15,0.0,299.030721,4031951.0,4143255.0,0.169932,970.760107,1.0
25%,2023-12-12 22:18:30,0.777478,513.15,1770.0,,305.85,273.15,,,,...,12822900.0,789936.1,513.15,0.778309,302.687664,13003700.0,9346764.0,0.777478,60163.341706,1.0
50%,2024-03-05 10:30:00,0.897176,542.15,3290.0,,307.15,273.15,,,,...,15668810.0,1045076.0,542.15,0.899969,303.826959,15515220.0,11576140.0,0.897176,106679.359693,1.0
75%,2024-05-13 19:55:30,0.917475,553.15,4370.0,,308.15,273.15,,,,...,16378850.0,1121633.0,553.15,0.91665,307.11844,16288800.0,12276500.0,0.917475,131557.217291,1.0
max,2024-07-31 16:36:00,1.022311,597.15,10000.0,,318.75,273.15,,,,...,18825220.0,1513627.0,597.15,1.016633,310.080785,18979770.0,15606240.0,1.022311,225486.996753,1.0
std,,0.142885,39.821802,1808.911991,,1.789162,5.684369e-14,,,,...,2560751.0,265828.1,39.821802,0.143879,2.474538,2467797.0,2251713.0,0.142885,52369.802338,0.0


In [None]:
df1["pr_baro"].std()

759.7741848141841

In [None]:
df1["pr_baro"].std()

759.7741848141841

In [None]:
df1["pr_cyl_max__0"].std()

2560750.9406238147

In [None]:
#Import Pypots Library
from pypots.optim import Adam
from pypots.imputation import GPVAE
#from pypots.utils.metrics import calc_mae
from pypots.nn.functional import calc_mae


import argparse
import hashlib
from pathlib import Path

import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.utils.data
import data_insight
from data_insight import setup_duckdb
from duckdb import DuckDBPyConnection as DuckDB
from duckdb import DuckDBPyRelation as Relation
from pathlib import Path
import hashlib
from duckdb import DuckDBPyConnection as DuckDB
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import optuna 
from optuna.visualization import plot_optimization_history




from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset, Dataset
from pygrinder.missing_completely_at_random import mcar
from tqdm.auto import tqdm

import sensor_imputation_thesis.shared.load_data as load

torch.cuda.empty_cache()
#PatchTST might be an ideal choise if SAITS is too slow 

##Drop columns with different indexes while loading data.. Or the mean values 

df=pd.read_parquet("/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/ny_df_for_pypots.parquet")

len(df)

#current length of the dataframe is 119439

# Check nan values in each column
for col in df.columns:
    print(f"Column {col} has {df[col].isna().sum()} NaN values")
    missing_rate=df[col].isna().sum()/len(df[col])
    print(f"Column {col} has {missing_rate} Missing_rate")


#Try with smaller dataset, size 4000
##SAMPLE the percengtage of the dataset, df.sample (averagely pick samples)
#not df.sample cuz it will randomly select 
original_size=len(df)
desired_fraction=0.3 #Select data every 3 minutes 
step=int(1/desired_fraction) #step_size=10 (sample every 10th (3/10) minute)

#Systematic sampling: Start at a random offset to avoid bias 
start=np.random.randint(0,step) #Random start between 0-9
df1=df.iloc[start::step].reset_index(drop=True)

print(f"Original size:{len(df)}, Sampled size: {len(df1)}")



# Custom Dataset class
class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Data processing code
sensor_cols = [col for col in df1.columns if col != "time"]
data = df1[sensor_cols].values

#¤get feature names for printing mae later 
feature_names=df1[sensor_cols].columns.tolist()

## Convert data to 3D arrays of shape n_samples, n_timesteps, n_features, X_ori refers to the original data without missing values 
## Reconstruct all columns simultaneously  #num_features: 119
n_features = data.shape[1]  # exclude the time column
n_steps = 20 #60 (was 60 previously) #(TRY TO CHANGE HERE)  # # window length, 1440 steps = 24 hours of 1-minute data, but here is revised to 60 again
#total_elements = data.shape[0] * data.shape[1]
n_samples = data.shape[0] // n_steps 



# Reshape to (n_samples // n_steps, n_steps, n_features)
#data_reshaped = data.reshape((n_samples, n_steps, n_features))
data_reshaped=data[:n_samples*n_steps].reshape(n_samples,n_steps,n_features)
print(f"Reshaped data:{data.shape}")

#Split into train, test, val, fit scaler only on the train set (prevent data leakage)

#train_size = int(0.6 * len(data))
#val_size = int(0.2 * len(data))
#test_size = len(data) - train_size - val_size

#train_data = data_reshaped[:train_size]
#val_data = data_reshaped[train_size:train_size + val_size]
#test_data= data_reshaped[train_size + val_size:]


#Apply time series split 
#Split into train(60%), val(20%), and test (20%)
train_data, temp_data=train_test_split(data_reshaped,test_size=0.4,shuffle=True)
val_data, test_data=train_test_split(temp_data, test_size=0.5, shuffle=False)

##Normalization is important because of the nature of mse calculation of saits, columns with large 
#values dominate the loss, making metrics meaningless. SAITS computes MSE/MAE column-wise and averages 
#them across all columns 
#  Apply minmax scaler here 
#normalize each feature independently
scalers={}


#train_scaled = np.zeros_like(data_reshaped[train_size])  # Initialize the normalized data array
#val_scaled=np.zeros_like(data_reshaped[train_size:train_size + val_size])
#test_scaled=np.zeros_like(data_reshaped[train_size + val_size:])

train_scaled = np.zeros_like(train_data)
val_scaled = np.zeros_like(val_data)
test_scaled = np.zeros_like(test_data)



for i in range(data_reshaped.shape[2]):
    scaler = MinMaxScaler(feature_range=(-1, 1)) #changed to -1,1
    # Flatten timesteps and samples for scaling
    train_scaled[:, :, i] = scaler.fit_transform(train_data[:, :, i].reshape(-1, 1)).reshape(train_data.shape[0], train_data.shape[1])
    val_scaled[:, :, i] = scaler.transform(val_data[:, :, i].reshape(-1, 1)).reshape(val_data.shape[0], val_data.shape[1])
    test_scaled[:, :, i] = scaler.transform(test_data[:, :, i].reshape(-1, 1)).reshape(test_data.shape[0], test_data.shape[1])
    scalers[i] = scaler  # Save scalers to inverse-transform later

#Inverse Scale
def inverse_scale(imputation, scalers):
    n_samples, n_timesteps, n_features = imputation.shape
    imputation_denorm = np.empty_like(imputation)
    
    for i in range(n_features):
        reshaped = imputation[:, :, i].reshape(-1, 1)
        inversed = scalers[i].inverse_transform(reshaped)
        imputation_denorm[:, :, i] = inversed.reshape(n_samples, n_timesteps)
    
    return imputation_denorm



#Optional: Artificially mask. Mask 20% of the data (MIT part). Try masking 30% here 
def mcar_f(X, mask_ratio=0.3):
    """Apply MCAR only to observed values."""
    observed_mask=~np.isnan(X) #find observed positions
    artificial_mask=mcar(X,mask_ratio).astype(bool) #generate MCAR mask, cast to boolean
    #combine masks 
    combined_mask=observed_mask & artificial_mask

    #Apply masking
    X_masked=X.copy()
    X_masked[combined_mask]=np.nan
    return X_masked,combined_mask


#Use mcar on validation data 
val_X_masked, val_mask =mcar_f(val_scaled)
val_X_ori=val_scaled.copy() 

test_X_masked, test_mask =mcar_f(test_scaled)
test_X_ori=test_scaled.copy() 


#?? Problem: Can't have the best input for testing
#1.Create synthetic test_data cuz if I drop nan values for test set, there's basically nothing left
#synthetic_data=np.random.randn(n_samples,n_steps,n_features)
#test_X_masked,test_mask=mcar_f(synthetic_data)
#test_X_ori=synthetic_data.copy() #Ground truth

# 2, Ensure no NaN values in synthetic data
#test_X_masked = np.nan_to_num(test_X_masked, nan=np.nanmean(test_X_masked))
#test_X_ori = np.nan_to_num(test_X_ori, nan=np.nanmean(test_X_ori))



class Config:
    no_cuda = False
    no_mps = False
    seed = 1

args=Config()

torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)


args.cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()

args.cuda = not args.no_cuda and torch.cuda.is_available()
print("CUDA available:", torch.cuda.is_available())


if args.cuda:
    device = torch.device("cuda")
    print("Using CUDA")
elif use_mps:
    device = torch.device("mps")
    print("Using MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

train_scaled = torch.tensor(train_scaled, dtype=torch.float32)
val_X_masked = torch.tensor(val_X_masked, dtype=torch.float32)
val_X_ori = torch.tensor(val_X_ori, dtype=torch.float32)

train_scaled = train_scaled.to(device)
val_X_masked = val_X_masked.to(device)
val_X_ori = val_X_ori.to(device)


#MLflow set up
mlflow.set_tracking_uri("http://localhost:5000")
client = mlflow.tracking.MlflowClient()
mlflow.set_experiment("GP_VAE_2")


# Optuna objective function
def objective(trial):
    params = {
        "lr": trial.suggest_float("lr", 1e-4, 1e-3, log=True),
        "epochs": trial.suggest_int("epochs", 10, 50),
        "batch_size": trial.suggest_int("batch_size", 32, 128, step=32),
        "length_scale": trial.suggest_float("length_scale",0.5,5.0),
        "beta": trial.suggest_float("beta",0.1,1.0),
        "kernel":trial.suggest_categorical("kernel",["cauchy", "diffusion", "rbf", "matern"]),



    }

    with mlflow.start_run(run_name="GP-VAE-Trial", nested=True):
        mlflow.log_params(params)

        gp_vae = GPVAE(
            n_steps=data_reshaped.shape[1],
            n_features=data_reshaped.shape[2],
            latent_size=37, #should be the latent dimensions 
            encoder_sizes=(128,128), #should I change it here too?
            decoder_sizes=(256,256), #should I change the model size?
            kernel=params["kernel"],
            beta=params["beta"], #The weight of KL divergence in ELBO
            M=1,  #The number of Monte Carlo samples for ELBO estimation during training.
            K=1,  #The number of importance weights for IWAE model training loss.
            sigma=1.005, # The scale parameter for a kernel function
            length_scale=params["length_scale"], #The length scale parameter for a kernel function
            kernel_scales=1, #The number of different length scales over latent space dimensions
            window_size=24,  # Window size for the inference CNN.
            batch_size=params["batch_size"],
            # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
            epochs=params["epochs"],
            # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
            # You can leave it to defualt as None to disable early stopping.
            patience=3,
            # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
            # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
            optimizer=Adam(lr=params["lr"]),
            # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
            # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
            # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
            num_workers=0,
            # just leave it to default as None, PyPOTS will automatically assign the best device for you.
            # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
            device=device,
            # set the path for saving tensorboard and trained model files 
            saving_path="/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model",
            # only save the best model after training finished.
            # You can also set it as "better" to save models performing better ever during training.
            model_saving_strategy="best",
        )




        # train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
        gp_vae.fit(train_set={"X": train_scaled}, val_set={"X": val_X_masked, "X_ori": val_X_ori})
        gp_vae_results = gp_vae.predict({"X": test_X_masked}, n_sampling_times=2)
        gp_vae_imputation = gp_vae_results["imputation"]

        print(f"The shape of gp_vae_imputation is {gp_vae_imputation.shape}")

        # for error calculation, we need to take the mean value of the multiple samplings for each data sample
        mean_gp_vae_imputation = gp_vae_imputation.mean(axis=1)

        test_imputation_denorm = inverse_scale(mean_gp_vae_imputation, scalers)
        test_ori_denorm = inverse_scale(test_X_ori, scalers)

        mae_list, rmse_list = [], []
        for i in range(n_features):
            mask_i = test_mask[:, :, i]
            pred = test_imputation_denorm[:, :, i][mask_i]
            true = test_ori_denorm[:, :, i][mask_i]
            if len(true) == 0: continue
            mae = np.mean(np.abs(pred - true))
            rmse = np.sqrt(mean_squared_error(true, pred))
            mae_list.append(mae)
            rmse_list.append(rmse)
            mlflow.log_metric(f"MAE_{i}", mae)
            mlflow.log_metric(f"RMSE_{i}", rmse)

         # Calculate metrics
        mae_per_feature = []
        rmse_per_feature=[]
        percentage_mae_per_feature = []

        for i in range(n_features):
            imputation_i = test_imputation_denorm[:, :, i]
            ground_truth_i = test_ori_denorm[:, :, i]
            mask_i = test_mask[:, :, i]
            if np.isnan(imputation_i).any() or np.isnan(ground_truth_i).any():
                continue
            mae_i = calc_mae(imputation_i, ground_truth_i, mask_i)
            mae_per_feature.append(mae_i)
            rmse_i = np.sqrt(mean_squared_error(imputation_i, ground_truth_i))
            rmse_per_feature.append(rmse)

            #Calculate the original standard deviation for the feature
            std_dev_i = np.std(ground_truth_i[mask_i == 1])
             # Calculate the percentage of MAE relative to the standard deviation   
            if std_dev_i != 0:
                percentage_mae_i = (mae_i / std_dev_i) * 100
                percentage_mae_per_feature.append(percentage_mae_i)
            else:
                 percentage_mae_i = float('inf')
            
            mlflow.log_metric(f"MAE_{feature_names[i]}", mae_i)
            mlflow.log_metric(f"RMSE_{feature_names[i]}",rmse_i)
            mlflow.log_metric(f"Percentage_MAE_{feature_names[i]}", percentage_mae_i)

        avg_mae = np.mean(mae_per_feature)
        #avg_rmse=np.mean(rmse_per_feature)
       
        mlflow.log_metric("avg_mae", np.mean(mae_list))
        mlflow.log_metric("avg_rmse", np.mean(rmse_list))

        trial.set_user_attr("model_state_dict", GPVAE.state_dict())
        trial.set_user_attr("mlflow_run_id", run.info.run_id)

        return avg_mae

    print("MAE per feature:", mae_per_feature)
    print("RMSE per feature",rmse_per_feature)
    print("Percentage MAE per feature:", percentage_mae_per_feature)
   

# Run Optuna study
mlflow.set_experiment("GP-VAE-2")
with mlflow.start_run(run_name="GPVAE_Optuna_Study(2)") as parent_run:
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)

    best_params = study.best_trial.params
    best_value = study.best_trial.value
    best_run_id = study.best_trial.user_attrs["mlflow_run_id"]


    
    # Log best parameters
    mlflow.log_params(best_params)

    # Log best metric(s)
    mlflow.log_metric("best_objective_value", best_value)
    mlflow.log_param("best_run_id", best_run_id)

    print("Best Parameters:", best_params)
    print("Best Objective Value:", best_value)

Column time has 0 NaN values
Column time has 0.0 Missing_rate
Column fr_eng has 0 NaN values
Column fr_eng has 0.0 Missing_rate
Column te_exh_cyl_out__0 has 0 NaN values
Column te_exh_cyl_out__0 has 0.0 Missing_rate
Column pd_air_ic__0 has 0 NaN values
Column pd_air_ic__0 has 0.0 Missing_rate
Column pr_exh_turb_out__0 has 316581 NaN values
Column pr_exh_turb_out__0 has 1.0 Missing_rate
Column te_air_ic_out__0 has 0 NaN values
Column te_air_ic_out__0 has 0.0 Missing_rate
Column te_seawater has 0 NaN values
Column te_seawater has 0.0 Missing_rate
Column te_air_comp_in_a__0 has 316581 NaN values
Column te_air_comp_in_a__0 has 1.0 Missing_rate
Column te_air_comp_in_b__0 has 316581 NaN values
Column te_air_comp_in_b__0 has 1.0 Missing_rate
Column fr_tc__0 has 316581 NaN values
Column fr_tc__0 has 1.0 Missing_rate
Column pr_baro has 0 NaN values
Column pr_baro has 0.0 Missing_rate
Column pd_air_ic__0_1 has 0 NaN values
Column pd_air_ic__0_1 has 0.0 Missing_rate
Column pr_exh_rec has 0 NaN va

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
[I 2025-05-20 00:33:10,118] A new study created in memory with name: no-name-5f63202d-f249-4cf2-984e-5320fadef355
2025-05-20 00:33:10 [INFO]: Using the given device: cuda
2025-05-20 00:33:10 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T003310
2025-05-20 00:33:10 [INFO]: Tensorboard file will be saved to /home/ec2-use

🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/15c38ce0534446fcb51213a37a58a9b7
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318
🏃 View run GPVAE_Optuna_Study(2) at: http://localhost:5000/#/experiments/832352739106302318/runs/a1950ed8c0a04febb7e4910ed83d7038
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


RuntimeError: Training got interrupted. Model was not trained. Please investigate the error printed above.

In [None]:
#Import Pypots Library
from pypots.optim import Adam
from pypots.imputation import GPVAE
#from pypots.utils.metrics import calc_mae
from pypots.nn.functional import calc_mae


import argparse
import hashlib
from pathlib import Path

import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.utils.data
import data_insight
from data_insight import setup_duckdb
from duckdb import DuckDBPyConnection as DuckDB
from duckdb import DuckDBPyRelation as Relation
from pathlib import Path
import hashlib
from duckdb import DuckDBPyConnection as DuckDB
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import optuna 
from optuna.visualization import plot_optimization_history




from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset, Dataset
from pygrinder.missing_completely_at_random import mcar
from tqdm.auto import tqdm

import sensor_imputation_thesis.shared.load_data as load

torch.cuda.empty_cache()
#PatchTST might be an ideal choise if SAITS is too slow 

##Drop columns with different indexes while loading data.. Or the mean values 

df=pd.read_parquet("/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/ny_df_for_pypots.parquet")

len(df)

#current length of the dataframe is 119439

# Check nan values in each column
for col in df.columns:
    print(f"Column {col} has {df[col].isna().sum()} NaN values")
    missing_rate=df[col].isna().sum()/len(df[col])
    print(f"Column {col} has {missing_rate} Missing_rate")


#Try with smaller dataset, size 4000
##SAMPLE the percengtage of the dataset, df.sample (averagely pick samples)
#not df.sample cuz it will randomly select 
original_size=len(df)
desired_fraction=0.3 #Select data every 3 minutes 
step=int(1/desired_fraction) #step_size=10 (sample every 10th (3/10) minute)

#Systematic sampling: Start at a random offset to avoid bias 
start=np.random.randint(0,step) #Random start between 0-9
df1=df.iloc[start::step].reset_index(drop=True)

print(f"Original size:{len(df)}, Sampled size: {len(df1)}")



# Custom Dataset class
class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Data processing code
sensor_cols = [col for col in df1.columns if col != "time"]
data = df1[sensor_cols].values

#¤get feature names for printing mae later 
feature_names=df1[sensor_cols].columns.tolist()

## Convert data to 3D arrays of shape n_samples, n_timesteps, n_features, X_ori refers to the original data without missing values 
## Reconstruct all columns simultaneously  #num_features: 119
n_features = data.shape[1]  # exclude the time column
n_steps = 20 #60 (was 60 previously) #(TRY TO CHANGE HERE)  # # window length, 1440 steps = 24 hours of 1-minute data, but here is revised to 60 again
#total_elements = data.shape[0] * data.shape[1]
n_samples = data.shape[0] // n_steps 



# Reshape to (n_samples // n_steps, n_steps, n_features)
#data_reshaped = data.reshape((n_samples, n_steps, n_features))
data_reshaped=data[:n_samples*n_steps].reshape(n_samples,n_steps,n_features)
print(f"Reshaped data:{data.shape}")

#Split into train, test, val, fit scaler only on the train set (prevent data leakage)

#train_size = int(0.6 * len(data))
#val_size = int(0.2 * len(data))
#test_size = len(data) - train_size - val_size

#train_data = data_reshaped[:train_size]
#val_data = data_reshaped[train_size:train_size + val_size]
#test_data= data_reshaped[train_size + val_size:]


#Apply time series split 
#Split into train(60%), val(20%), and test (20%)
train_data, temp_data=train_test_split(data_reshaped,test_size=0.4,shuffle=True)
val_data, test_data=train_test_split(temp_data, test_size=0.5, shuffle=False)

##Normalization is important because of the nature of mse calculation of saits, columns with large 
#values dominate the loss, making metrics meaningless. SAITS computes MSE/MAE column-wise and averages 
#them across all columns 
#  Apply minmax scaler here 
#normalize each feature independently
scalers={}


#train_scaled = np.zeros_like(data_reshaped[train_size])  # Initialize the normalized data array
#val_scaled=np.zeros_like(data_reshaped[train_size:train_size + val_size])
#test_scaled=np.zeros_like(data_reshaped[train_size + val_size:])

train_scaled = np.zeros_like(train_data)
val_scaled = np.zeros_like(val_data)
test_scaled = np.zeros_like(test_data)



for i in range(data_reshaped.shape[2]):
    scaler = MinMaxScaler(feature_range=(-1, 1)) #changed to -1,1
    # Flatten timesteps and samples for scaling
    train_scaled[:, :, i] = scaler.fit_transform(train_data[:, :, i].reshape(-1, 1)).reshape(train_data.shape[0], train_data.shape[1])
    val_scaled[:, :, i] = scaler.transform(val_data[:, :, i].reshape(-1, 1)).reshape(val_data.shape[0], val_data.shape[1])
    test_scaled[:, :, i] = scaler.transform(test_data[:, :, i].reshape(-1, 1)).reshape(test_data.shape[0], test_data.shape[1])
    scalers[i] = scaler  # Save scalers to inverse-transform later

#Inverse Scale
def inverse_scale(imputation, scalers):
    n_samples, n_timesteps, n_features = imputation.shape
    imputation_denorm = np.empty_like(imputation)
    
    for i in range(n_features):
        reshaped = imputation[:, :, i].reshape(-1, 1)
        inversed = scalers[i].inverse_transform(reshaped)
        imputation_denorm[:, :, i] = inversed.reshape(n_samples, n_timesteps)
    
    return imputation_denorm



#Optional: Artificially mask. Mask 20% of the data (MIT part). Try masking 30% here 
def mcar_f(X, mask_ratio=0.3):
    """Apply MCAR only to observed values."""
    observed_mask=~np.isnan(X) #find observed positions
    artificial_mask=mcar(X,mask_ratio).astype(bool) #generate MCAR mask, cast to boolean
    #combine masks 
    combined_mask=observed_mask & artificial_mask

    #Apply masking
    X_masked=X.copy()
    X_masked[combined_mask]=np.nan
    return X_masked,combined_mask


#Use mcar on validation data 
val_X_masked, val_mask =mcar_f(val_scaled)
val_X_ori=val_scaled.copy() 

test_X_masked, test_mask =mcar_f(test_scaled)
test_X_ori=test_scaled.copy() 


#?? Problem: Can't have the best input for testing
#1.Create synthetic test_data cuz if I drop nan values for test set, there's basically nothing left
#synthetic_data=np.random.randn(n_samples,n_steps,n_features)
#test_X_masked,test_mask=mcar_f(synthetic_data)
#test_X_ori=synthetic_data.copy() #Ground truth

# 2, Ensure no NaN values in synthetic data
#test_X_masked = np.nan_to_num(test_X_masked, nan=np.nanmean(test_X_masked))
#test_X_ori = np.nan_to_num(test_X_ori, nan=np.nanmean(test_X_ori))



class Config:
    no_cuda = False
    no_mps = False
    seed = 1

args=Config()

torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)


args.cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()

args.cuda = not args.no_cuda and torch.cuda.is_available()
print("CUDA available:", torch.cuda.is_available())


if args.cuda:
    device = torch.device("cuda")
    print("Using CUDA")
elif use_mps:
    device = torch.device("mps")
    print("Using MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

train_scaled = torch.tensor(train_scaled, dtype=torch.float32)
val_X_masked = torch.tensor(val_X_masked, dtype=torch.float32)
val_X_ori = torch.tensor(val_X_ori, dtype=torch.float32)

train_scaled = train_scaled.to(device)
val_X_masked = val_X_masked.to(device)
val_X_ori = val_X_ori.to(device)


#MLflow set up
mlflow.set_tracking_uri("http://localhost:5000")
client = mlflow.tracking.MlflowClient()
mlflow.set_experiment("GP_VAE_2")


# Optuna objective function
def objective(trial):
    params = {
        "lr": trial.suggest_float("lr", 1e-4, 1e-3, log=True),
        "epochs": trial.suggest_int("epochs", 10, 50),
        "batch_size": trial.suggest_int("batch_size", 32, 128, step=32),
        "length_scale": trial.suggest_float("length_scale",0.5,5.0),
        "beta": trial.suggest_float("beta",0.1,1.0),
        "kernel":trial.suggest_categorical("kernel",["cauchy", "diffusion", "rbf"]),



    }

    with mlflow.start_run(run_name="GP-VAE-Trial", nested=True):
        mlflow.log_params(params)

        gp_vae = GPVAE(
            n_steps=data_reshaped.shape[1],
            n_features=data_reshaped.shape[2],
            latent_size=37, #should be the latent dimensions 
            encoder_sizes=(128,128), #should I change it here too?
            decoder_sizes=(256,256), #should I change the model size?
            kernel=params["kernel"],
            beta=params["beta"], #The weight of KL divergence in ELBO
            M=1,  #The number of Monte Carlo samples for ELBO estimation during training.
            K=1,  #The number of importance weights for IWAE model training loss.
            sigma=1.005, # The scale parameter for a kernel function
            length_scale=params["length_scale"], #The length scale parameter for a kernel function
            kernel_scales=1, #The number of different length scales over latent space dimensions
            window_size=24,  # Window size for the inference CNN.
            batch_size=params["batch_size"],
            # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
            epochs=params["epochs"],
            # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
            # You can leave it to defualt as None to disable early stopping.
            patience=3,
            # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
            # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
            optimizer=Adam(lr=params["lr"]),
            # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
            # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
            # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
            num_workers=0,
            # just leave it to default as None, PyPOTS will automatically assign the best device for you.
            # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
            device=device,
            # set the path for saving tensorboard and trained model files 
            saving_path="/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model",
            # only save the best model after training finished.
            # You can also set it as "better" to save models performing better ever during training.
            model_saving_strategy="best",
        )




        # train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
        gp_vae.fit(train_set={"X": train_scaled}, val_set={"X": val_X_masked, "X_ori": val_X_ori})
        gp_vae_results = gp_vae.predict({"X": test_X_masked}, n_sampling_times=2)
        gp_vae_imputation = gp_vae_results["imputation"]

        print(f"The shape of gp_vae_imputation is {gp_vae_imputation.shape}")

        # for error calculation, we need to take the mean value of the multiple samplings for each data sample
        mean_gp_vae_imputation = gp_vae_imputation.mean(axis=1)

        test_imputation_denorm = inverse_scale(mean_gp_vae_imputation, scalers)
        test_ori_denorm = inverse_scale(test_X_ori, scalers)

        mae_list, rmse_list = [], []
        for i in range(n_features):
            mask_i = test_mask[:, :, i]
            pred = test_imputation_denorm[:, :, i][mask_i]
            true = test_ori_denorm[:, :, i][mask_i]
            if len(true) == 0: continue
            mae = np.mean(np.abs(pred - true))
            rmse = np.sqrt(mean_squared_error(true, pred))
            mae_list.append(mae)
            rmse_list.append(rmse)
            mlflow.log_metric(f"MAE_{i}", mae)
            mlflow.log_metric(f"RMSE_{i}", rmse)

         # Calculate metrics
        mae_per_feature = []
        rmse_per_feature=[]
        percentage_mae_per_feature = []

        for i in range(n_features):
            imputation_i = test_imputation_denorm[:, :, i]
            ground_truth_i = test_ori_denorm[:, :, i]
            mask_i = test_mask[:, :, i]
            if np.isnan(imputation_i).any() or np.isnan(ground_truth_i).any():
                continue
            mae_i = calc_mae(imputation_i, ground_truth_i, mask_i)
            mae_per_feature.append(mae_i)
            rmse_i = np.sqrt(mean_squared_error(imputation_i, ground_truth_i))
            rmse_per_feature.append(rmse)

            #Calculate the original standard deviation for the feature
            std_dev_i = np.std(ground_truth_i[mask_i == 1])
             # Calculate the percentage of MAE relative to the standard deviation   
            if std_dev_i != 0:
                percentage_mae_i = (mae_i / std_dev_i) * 100
                percentage_mae_per_feature.append(percentage_mae_i)
            else:
                 percentage_mae_i = float('inf')
            
            mlflow.log_metric(f"MAE_{feature_names[i]}", mae_i)
            mlflow.log_metric(f"RMSE_{feature_names[i]}",rmse_i)
            mlflow.log_metric(f"Percentage_MAE_{feature_names[i]}", percentage_mae_i)

        avg_mae = np.mean(mae_per_feature)
        #avg_rmse=np.mean(rmse_per_feature)
       
        mlflow.log_metric("avg_mae", np.mean(mae_list))
        mlflow.log_metric("avg_rmse", np.mean(rmse_list))

        trial.set_user_attr("model_state_dict", GPVAE.state_dict())
        trial.set_user_attr("mlflow_run_id", run.info.run_id)

        return avg_mae

    print("MAE per feature:", mae_per_feature)
    print("RMSE per feature",rmse_per_feature)
    print("Percentage MAE per feature:", percentage_mae_per_feature)
   

# Run Optuna study
mlflow.set_experiment("GP-VAE-2")
with mlflow.start_run(run_name="GPVAE_Optuna_Study(2)") as parent_run:
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)

    best_params = study.best_trial.params
    best_value = study.best_trial.value
    best_run_id = study.best_trial.user_attrs["mlflow_run_id"]


    
    # Log best parameters
    mlflow.log_params(best_params)

    # Log best metric(s)
    mlflow.log_metric("best_objective_value", best_value)
    mlflow.log_param("best_run_id", best_run_id)

    print("Best Parameters:", best_params)
    print("Best Objective Value:", best_value)

Column time has 0 NaN values
Column time has 0.0 Missing_rate
Column fr_eng has 0 NaN values
Column fr_eng has 0.0 Missing_rate
Column te_exh_cyl_out__0 has 0 NaN values
Column te_exh_cyl_out__0 has 0.0 Missing_rate
Column pd_air_ic__0 has 0 NaN values
Column pd_air_ic__0 has 0.0 Missing_rate
Column pr_exh_turb_out__0 has 316581 NaN values
Column pr_exh_turb_out__0 has 1.0 Missing_rate
Column te_air_ic_out__0 has 0 NaN values
Column te_air_ic_out__0 has 0.0 Missing_rate
Column te_seawater has 0 NaN values
Column te_seawater has 0.0 Missing_rate
Column te_air_comp_in_a__0 has 316581 NaN values
Column te_air_comp_in_a__0 has 1.0 Missing_rate
Column te_air_comp_in_b__0 has 316581 NaN values
Column te_air_comp_in_b__0 has 1.0 Missing_rate
Column fr_tc__0 has 316581 NaN values
Column fr_tc__0 has 1.0 Missing_rate
Column pr_baro has 0 NaN values
Column pr_baro has 0.0 Missing_rate
Column pd_air_ic__0_1 has 0 NaN values
Column pd_air_ic__0_1 has 0.0 Missing_rate
Column pr_exh_rec has 0 NaN va

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
[I 2025-05-20 00:37:21,647] A new study created in memory with name: no-name-f68d0a3f-5771-496e-8567-d324eeb96eab
2025-05-20 00:37:21 [INFO]: Using the given device: cuda
2025-05-20 00:37:21 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T003721
2025-05-20 00:37:21 [INFO]: Tensorboard file will be saved to /home/ec2-use

CUDA available: True
Using CUDA


2025-05-20 00:37:22 [INFO]: Epoch 001 - training loss (default): 29023.6647, validation loss: 36484.5052
2025-05-20 00:37:22 [INFO]: Epoch 002 - training loss (default): 12432.6056, validation loss: 35268.0334
2025-05-20 00:37:23 [INFO]: Epoch 003 - training loss (default): 10335.1963, validation loss: 34782.2183
2025-05-20 00:37:24 [INFO]: Epoch 004 - training loss (default): 10241.5189, validation loss: 34596.8290
2025-05-20 00:37:24 [INFO]: Epoch 005 - training loss (default): 10221.6641, validation loss: 34481.5977
2025-05-20 00:37:25 [INFO]: Epoch 006 - training loss (default): 10211.6875, validation loss: 34380.6163
2025-05-20 00:37:26 [INFO]: Epoch 007 - training loss (default): 10205.3616, validation loss: 34282.8368
2025-05-20 00:37:26 [INFO]: Epoch 008 - training loss (default): 10200.9662, validation loss: 34190.5156
2025-05-20 00:37:27 [INFO]: Epoch 009 - training loss (default): 10197.9365, validation loss: 34100.7474
2025-05-20 00:37:28 [INFO]: Epoch 010 - training loss (

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[W 2025-05-20 00:37:43,270] Trial 0 failed with parameters: {'lr': 0.0003174471955074421, 'epochs': 33, 'batch_size': 128, 'length_scale': 1.9734374565972177, 'beta': 0.9182668977722848, 'kernel': 'rbf'} because of the following error: AttributeError("type object 'GPVAE' has no attribute 'state_dict'").
Traceback (most recent call last):
  File "/home/ec2-user/SageMaker/sensor-imputation-thesis/.venv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-13-0ad50cddbc5f>", line 363, in objective
    trial.set_user_attr("model_state_dict", GPVAE.state_dict())
AttributeError: type object 'GPVAE' has no attribute 'state_dict'
[W 2025-05-20 00:37:43,272] Trial 0 failed with value None.


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/099574dbb9394265b2d09aab8318e34b
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318
🏃 View run GPVAE_Optuna_Study(2) at: http://localhost:5000/#/experiments/832352739106302318/runs/63f9b9e323ea4559a36fb66b465aaa86
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


AttributeError: type object 'GPVAE' has no attribute 'state_dict'

In [None]:
#Import Pypots Library
from pypots.optim import Adam
from pypots.imputation import GPVAE
#from pypots.utils.metrics import calc_mae
from pypots.nn.functional import calc_mae


import argparse
import hashlib
from pathlib import Path

import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.utils.data
import data_insight
from data_insight import setup_duckdb
from duckdb import DuckDBPyConnection as DuckDB
from duckdb import DuckDBPyRelation as Relation
from pathlib import Path
import hashlib
from duckdb import DuckDBPyConnection as DuckDB
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import optuna 
from optuna.visualization import plot_optimization_history




from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset, Dataset
from pygrinder.missing_completely_at_random import mcar
from tqdm.auto import tqdm

import sensor_imputation_thesis.shared.load_data as load

torch.cuda.empty_cache()
#PatchTST might be an ideal choise if SAITS is too slow 

##Drop columns with different indexes while loading data.. Or the mean values 

df=pd.read_parquet("/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/ny_df_for_pypots.parquet")

len(df)

#current length of the dataframe is 119439

# Check nan values in each column
for col in df.columns:
    print(f"Column {col} has {df[col].isna().sum()} NaN values")
    missing_rate=df[col].isna().sum()/len(df[col])
    print(f"Column {col} has {missing_rate} Missing_rate")


#Try with smaller dataset, size 4000
##SAMPLE the percengtage of the dataset, df.sample (averagely pick samples)
#not df.sample cuz it will randomly select 
original_size=len(df)
desired_fraction=0.3 #Select data every 3 minutes 
step=int(1/desired_fraction) #step_size=10 (sample every 10th (3/10) minute)

#Systematic sampling: Start at a random offset to avoid bias 
start=np.random.randint(0,step) #Random start between 0-9
df1=df.iloc[start::step].reset_index(drop=True)

print(f"Original size:{len(df)}, Sampled size: {len(df1)}")



# Custom Dataset class
class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Data processing code
sensor_cols = [col for col in df1.columns if col != "time"]
data = df1[sensor_cols].values

#¤get feature names for printing mae later 
feature_names=df1[sensor_cols].columns.tolist()

## Convert data to 3D arrays of shape n_samples, n_timesteps, n_features, X_ori refers to the original data without missing values 
## Reconstruct all columns simultaneously  #num_features: 119
n_features = data.shape[1]  # exclude the time column
n_steps = 20 #60 (was 60 previously) #(TRY TO CHANGE HERE)  # # window length, 1440 steps = 24 hours of 1-minute data, but here is revised to 60 again
#total_elements = data.shape[0] * data.shape[1]
n_samples = data.shape[0] // n_steps 



# Reshape to (n_samples // n_steps, n_steps, n_features)
#data_reshaped = data.reshape((n_samples, n_steps, n_features))
data_reshaped=data[:n_samples*n_steps].reshape(n_samples,n_steps,n_features)
print(f"Reshaped data:{data.shape}")

#Split into train, test, val, fit scaler only on the train set (prevent data leakage)

#train_size = int(0.6 * len(data))
#val_size = int(0.2 * len(data))
#test_size = len(data) - train_size - val_size

#train_data = data_reshaped[:train_size]
#val_data = data_reshaped[train_size:train_size + val_size]
#test_data= data_reshaped[train_size + val_size:]


#Apply time series split 
#Split into train(60%), val(20%), and test (20%)
train_data, temp_data=train_test_split(data_reshaped,test_size=0.4,shuffle=True)
val_data, test_data=train_test_split(temp_data, test_size=0.5, shuffle=False)

##Normalization is important because of the nature of mse calculation of saits, columns with large 
#values dominate the loss, making metrics meaningless. SAITS computes MSE/MAE column-wise and averages 
#them across all columns 
#  Apply minmax scaler here 
#normalize each feature independently
scalers={}


#train_scaled = np.zeros_like(data_reshaped[train_size])  # Initialize the normalized data array
#val_scaled=np.zeros_like(data_reshaped[train_size:train_size + val_size])
#test_scaled=np.zeros_like(data_reshaped[train_size + val_size:])

train_scaled = np.zeros_like(train_data)
val_scaled = np.zeros_like(val_data)
test_scaled = np.zeros_like(test_data)



for i in range(data_reshaped.shape[2]):
    scaler = MinMaxScaler(feature_range=(-1, 1)) #changed to -1,1
    # Flatten timesteps and samples for scaling
    train_scaled[:, :, i] = scaler.fit_transform(train_data[:, :, i].reshape(-1, 1)).reshape(train_data.shape[0], train_data.shape[1])
    val_scaled[:, :, i] = scaler.transform(val_data[:, :, i].reshape(-1, 1)).reshape(val_data.shape[0], val_data.shape[1])
    test_scaled[:, :, i] = scaler.transform(test_data[:, :, i].reshape(-1, 1)).reshape(test_data.shape[0], test_data.shape[1])
    scalers[i] = scaler  # Save scalers to inverse-transform later

#Inverse Scale
def inverse_scale(imputation, scalers):
    n_samples, n_timesteps, n_features = imputation.shape
    imputation_denorm = np.empty_like(imputation)
    
    for i in range(n_features):
        reshaped = imputation[:, :, i].reshape(-1, 1)
        inversed = scalers[i].inverse_transform(reshaped)
        imputation_denorm[:, :, i] = inversed.reshape(n_samples, n_timesteps)
    
    return imputation_denorm



#Optional: Artificially mask. Mask 20% of the data (MIT part). Try masking 30% here 
def mcar_f(X, mask_ratio=0.3):
    """Apply MCAR only to observed values."""
    observed_mask=~np.isnan(X) #find observed positions
    artificial_mask=mcar(X,mask_ratio).astype(bool) #generate MCAR mask, cast to boolean
    #combine masks 
    combined_mask=observed_mask & artificial_mask

    #Apply masking
    X_masked=X.copy()
    X_masked[combined_mask]=np.nan
    return X_masked,combined_mask


#Use mcar on validation data 
val_X_masked, val_mask =mcar_f(val_scaled)
val_X_ori=val_scaled.copy() 

test_X_masked, test_mask =mcar_f(test_scaled)
test_X_ori=test_scaled.copy() 


#?? Problem: Can't have the best input for testing
#1.Create synthetic test_data cuz if I drop nan values for test set, there's basically nothing left
#synthetic_data=np.random.randn(n_samples,n_steps,n_features)
#test_X_masked,test_mask=mcar_f(synthetic_data)
#test_X_ori=synthetic_data.copy() #Ground truth

# 2, Ensure no NaN values in synthetic data
#test_X_masked = np.nan_to_num(test_X_masked, nan=np.nanmean(test_X_masked))
#test_X_ori = np.nan_to_num(test_X_ori, nan=np.nanmean(test_X_ori))



class Config:
    no_cuda = False
    no_mps = False
    seed = 1

args=Config()

torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)


args.cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()

args.cuda = not args.no_cuda and torch.cuda.is_available()
print("CUDA available:", torch.cuda.is_available())


if args.cuda:
    device = torch.device("cuda")
    print("Using CUDA")
elif use_mps:
    device = torch.device("mps")
    print("Using MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

train_scaled = torch.tensor(train_scaled, dtype=torch.float32)
val_X_masked = torch.tensor(val_X_masked, dtype=torch.float32)
val_X_ori = torch.tensor(val_X_ori, dtype=torch.float32)

train_scaled = train_scaled.to(device)
val_X_masked = val_X_masked.to(device)
val_X_ori = val_X_ori.to(device)


#MLflow set up
mlflow.set_tracking_uri("http://localhost:5000")
client = mlflow.tracking.MlflowClient()
mlflow.set_experiment("GP_VAE_2")


# Optuna objective function
def objective(trial):
    params = {
        "lr": trial.suggest_float("lr", 1e-4, 1e-3, log=True),
        "epochs": trial.suggest_int("epochs", 10, 50),
        "batch_size": trial.suggest_int("batch_size", 32, 128, step=32),
        "length_scale": trial.suggest_float("length_scale",0.5,5.0),
        "beta": trial.suggest_float("beta",0.1,1.0),
        "kernel":trial.suggest_categorical("kernel",["cauchy", "diffusion", "rbf"]),



    }

    with mlflow.start_run(run_name="GP-VAE-Trial", nested=True):
        mlflow.log_params(params)

        gp_vae = GPVAE(
            n_steps=data_reshaped.shape[1],
            n_features=data_reshaped.shape[2],
            latent_size=37, #should be the latent dimensions 
            encoder_sizes=(128,128), #should I change it here too?
            decoder_sizes=(256,256), #should I change the model size?
            kernel=params["kernel"],
            beta=params["beta"], #The weight of KL divergence in ELBO
            M=1,  #The number of Monte Carlo samples for ELBO estimation during training.
            K=1,  #The number of importance weights for IWAE model training loss.
            sigma=1.005, # The scale parameter for a kernel function
            length_scale=params["length_scale"], #The length scale parameter for a kernel function
            kernel_scales=1, #The number of different length scales over latent space dimensions
            window_size=24,  # Window size for the inference CNN.
            batch_size=params["batch_size"],
            # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
            epochs=params["epochs"],
            # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
            # You can leave it to defualt as None to disable early stopping.
            patience=3,
            # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
            # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
            optimizer=Adam(lr=params["lr"]),
            # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
            # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
            # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
            num_workers=0,
            # just leave it to default as None, PyPOTS will automatically assign the best device for you.
            # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
            device=device,
            # set the path for saving tensorboard and trained model files 
            saving_path="/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model",
            # only save the best model after training finished.
            # You can also set it as "better" to save models performing better ever during training.
            model_saving_strategy="best",
        )




        # train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
        gp_vae.fit(train_set={"X": train_scaled}, val_set={"X": val_X_masked, "X_ori": val_X_ori})
        gp_vae_results = gp_vae.predict({"X": test_X_masked}, n_sampling_times=2)
        gp_vae_imputation = gp_vae_results["imputation"]

        print(f"The shape of gp_vae_imputation is {gp_vae_imputation.shape}")

        # for error calculation, we need to take the mean value of the multiple samplings for each data sample
        mean_gp_vae_imputation = gp_vae_imputation.mean(axis=1)

        test_imputation_denorm = inverse_scale(mean_gp_vae_imputation, scalers)
        test_ori_denorm = inverse_scale(test_X_ori, scalers)

        mae_list, rmse_list = [], []
        for i in range(n_features):
            mask_i = test_mask[:, :, i]
            pred = test_imputation_denorm[:, :, i][mask_i]
            true = test_ori_denorm[:, :, i][mask_i]
            if len(true) == 0: continue
            mae = np.mean(np.abs(pred - true))
            rmse = np.sqrt(mean_squared_error(true, pred))
            mae_list.append(mae)
            rmse_list.append(rmse)
            mlflow.log_metric(f"MAE_{i}", mae)
            mlflow.log_metric(f"RMSE_{i}", rmse)

         # Calculate metrics
        mae_per_feature = []
        rmse_per_feature=[]
        percentage_mae_per_feature = []

        for i in range(n_features):
            imputation_i = test_imputation_denorm[:, :, i]
            ground_truth_i = test_ori_denorm[:, :, i]
            mask_i = test_mask[:, :, i]
            if np.isnan(imputation_i).any() or np.isnan(ground_truth_i).any():
                continue
            mae_i = calc_mae(imputation_i, ground_truth_i, mask_i)
            mae_per_feature.append(mae_i)
            rmse_i = np.sqrt(mean_squared_error(imputation_i, ground_truth_i))
            rmse_per_feature.append(rmse)

            #Calculate the original standard deviation for the feature
            std_dev_i = np.std(ground_truth_i[mask_i == 1])
             # Calculate the percentage of MAE relative to the standard deviation   
            if std_dev_i != 0:
                percentage_mae_i = (mae_i / std_dev_i) * 100
                percentage_mae_per_feature.append(percentage_mae_i)
            else:
                 percentage_mae_i = float('inf')
            
            mlflow.log_metric(f"MAE_{feature_names[i]}", mae_i)
            mlflow.log_metric(f"RMSE_{feature_names[i]}",rmse_i)
            mlflow.log_metric(f"Percentage_MAE_{feature_names[i]}", percentage_mae_i)

        avg_mae = np.mean(mae_per_feature)
        #avg_rmse=np.mean(rmse_per_feature)
       
        mlflow.log_metric("avg_mae", np.mean(mae_list))
        mlflow.log_metric("avg_rmse", np.mean(rmse_list))

        trial.set_user_attr("mlflow_run_id", run.info.run_id)

        return avg_mae

    print("MAE per feature:", mae_per_feature)
    print("RMSE per feature",rmse_per_feature)
    print("Percentage MAE per feature:", percentage_mae_per_feature)
   

# Run Optuna study
mlflow.set_experiment("GP-VAE-2")
with mlflow.start_run(run_name="GPVAE_Optuna_Study(2)") as parent_run:
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)

    best_params = study.best_trial.params
    best_value = study.best_trial.value
    best_run_id = study.best_trial.user_attrs["mlflow_run_id"]


    
    # Log best parameters
    mlflow.log_params(best_params)

    # Log best metric(s)
    mlflow.log_metric("best_objective_value", best_value)
    mlflow.log_param("best_run_id", best_run_id)

    print("Best Parameters:", best_params)
    print("Best Objective Value:", best_value)

Column time has 0 NaN values
Column time has 0.0 Missing_rate
Column fr_eng has 0 NaN values
Column fr_eng has 0.0 Missing_rate
Column te_exh_cyl_out__0 has 0 NaN values
Column te_exh_cyl_out__0 has 0.0 Missing_rate
Column pd_air_ic__0 has 0 NaN values
Column pd_air_ic__0 has 0.0 Missing_rate
Column pr_exh_turb_out__0 has 316581 NaN values
Column pr_exh_turb_out__0 has 1.0 Missing_rate
Column te_air_ic_out__0 has 0 NaN values
Column te_air_ic_out__0 has 0.0 Missing_rate
Column te_seawater has 0 NaN values
Column te_seawater has 0.0 Missing_rate
Column te_air_comp_in_a__0 has 316581 NaN values
Column te_air_comp_in_a__0 has 1.0 Missing_rate
Column te_air_comp_in_b__0 has 316581 NaN values
Column te_air_comp_in_b__0 has 1.0 Missing_rate
Column fr_tc__0 has 316581 NaN values
Column fr_tc__0 has 1.0 Missing_rate
Column pr_baro has 0 NaN values
Column pr_baro has 0.0 Missing_rate
Column pd_air_ic__0_1 has 0 NaN values
Column pd_air_ic__0_1 has 0.0 Missing_rate
Column pr_exh_rec has 0 NaN va

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
[I 2025-05-20 00:38:31,085] A new study created in memory with name: no-name-6b973a1c-b40c-40eb-a08e-1ca5ab5381a4
2025-05-20 00:38:31 [INFO]: Using the given device: cuda
2025-05-20 00:38:31 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T003831
2025-05-20 00:38:31 [INFO]: Tensorboard file will be saved to /home/ec2-use

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[W 2025-05-20 00:38:42,179] Trial 0 failed with parameters: {'lr': 0.00014138309898143063, 'epochs': 13, 'batch_size': 64, 'length_scale': 2.0986642679908574, 'beta': 0.5303956187801935, 'kernel': 'rbf'} because of the following error: NameError("name 'run' is not defined").
Traceback (most recent call last):
  File "/home/ec2-user/SageMaker/sensor-imputation-thesis/.venv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-14-321dd7e3a26f>", line 363, in objective
    trial.set_user_attr("mlflow_run_id", run.info.run_id)
NameError: name 'run' is not defined
[W 2025-05-20 00:38:42,181] Trial 0 failed with value None.


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/0ab070eef2004f2992b99541db2825f4
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318
🏃 View run GPVAE_Optuna_Study(2) at: http://localhost:5000/#/experiments/832352739106302318/runs/13d977d4fd4a4eddba68fcd03986c20a
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


NameError: name 'run' is not defined

In [None]:
#Import Pypots Library
from pypots.optim import Adam
from pypots.imputation import GPVAE
#from pypots.utils.metrics import calc_mae
from pypots.nn.functional import calc_mae


import argparse
import hashlib
from pathlib import Path

import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.utils.data
import data_insight
from data_insight import setup_duckdb
from duckdb import DuckDBPyConnection as DuckDB
from duckdb import DuckDBPyRelation as Relation
from pathlib import Path
import hashlib
from duckdb import DuckDBPyConnection as DuckDB
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import optuna 
from optuna.visualization import plot_optimization_history




from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset, Dataset
from pygrinder.missing_completely_at_random import mcar
from tqdm.auto import tqdm

import sensor_imputation_thesis.shared.load_data as load

torch.cuda.empty_cache()
#PatchTST might be an ideal choise if SAITS is too slow 

##Drop columns with different indexes while loading data.. Or the mean values 

df=pd.read_parquet("/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/ny_df_for_pypots.parquet")

len(df)

#current length of the dataframe is 119439

# Check nan values in each column
for col in df.columns:
    print(f"Column {col} has {df[col].isna().sum()} NaN values")
    missing_rate=df[col].isna().sum()/len(df[col])
    print(f"Column {col} has {missing_rate} Missing_rate")


#Try with smaller dataset, size 4000
##SAMPLE the percengtage of the dataset, df.sample (averagely pick samples)
#not df.sample cuz it will randomly select 
original_size=len(df)
desired_fraction=0.3 #Select data every 3 minutes 
step=int(1/desired_fraction) #step_size=10 (sample every 10th (3/10) minute)

#Systematic sampling: Start at a random offset to avoid bias 
start=np.random.randint(0,step) #Random start between 0-9
df1=df.iloc[start::step].reset_index(drop=True)

print(f"Original size:{len(df)}, Sampled size: {len(df1)}")



# Custom Dataset class
class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Data processing code
sensor_cols = [col for col in df1.columns if col != "time"]
data = df1[sensor_cols].values

#¤get feature names for printing mae later 
feature_names=df1[sensor_cols].columns.tolist()

## Convert data to 3D arrays of shape n_samples, n_timesteps, n_features, X_ori refers to the original data without missing values 
## Reconstruct all columns simultaneously  #num_features: 119
n_features = data.shape[1]  # exclude the time column
n_steps = 20 #60 (was 60 previously) #(TRY TO CHANGE HERE)  # # window length, 1440 steps = 24 hours of 1-minute data, but here is revised to 60 again
#total_elements = data.shape[0] * data.shape[1]
n_samples = data.shape[0] // n_steps 



# Reshape to (n_samples // n_steps, n_steps, n_features)
#data_reshaped = data.reshape((n_samples, n_steps, n_features))
data_reshaped=data[:n_samples*n_steps].reshape(n_samples,n_steps,n_features)
print(f"Reshaped data:{data.shape}")

#Split into train, test, val, fit scaler only on the train set (prevent data leakage)

#train_size = int(0.6 * len(data))
#val_size = int(0.2 * len(data))
#test_size = len(data) - train_size - val_size

#train_data = data_reshaped[:train_size]
#val_data = data_reshaped[train_size:train_size + val_size]
#test_data= data_reshaped[train_size + val_size:]


#Apply time series split 
#Split into train(60%), val(20%), and test (20%)
train_data, temp_data=train_test_split(data_reshaped,test_size=0.4,shuffle=True)
val_data, test_data=train_test_split(temp_data, test_size=0.5, shuffle=False)

##Normalization is important because of the nature of mse calculation of saits, columns with large 
#values dominate the loss, making metrics meaningless. SAITS computes MSE/MAE column-wise and averages 
#them across all columns 
#  Apply minmax scaler here 
#normalize each feature independently
scalers={}


#train_scaled = np.zeros_like(data_reshaped[train_size])  # Initialize the normalized data array
#val_scaled=np.zeros_like(data_reshaped[train_size:train_size + val_size])
#test_scaled=np.zeros_like(data_reshaped[train_size + val_size:])

train_scaled = np.zeros_like(train_data)
val_scaled = np.zeros_like(val_data)
test_scaled = np.zeros_like(test_data)



for i in range(data_reshaped.shape[2]):
    scaler = MinMaxScaler(feature_range=(-1, 1)) #changed to -1,1
    # Flatten timesteps and samples for scaling
    train_scaled[:, :, i] = scaler.fit_transform(train_data[:, :, i].reshape(-1, 1)).reshape(train_data.shape[0], train_data.shape[1])
    val_scaled[:, :, i] = scaler.transform(val_data[:, :, i].reshape(-1, 1)).reshape(val_data.shape[0], val_data.shape[1])
    test_scaled[:, :, i] = scaler.transform(test_data[:, :, i].reshape(-1, 1)).reshape(test_data.shape[0], test_data.shape[1])
    scalers[i] = scaler  # Save scalers to inverse-transform later

#Inverse Scale
def inverse_scale(imputation, scalers):
    n_samples, n_timesteps, n_features = imputation.shape
    imputation_denorm = np.empty_like(imputation)
    
    for i in range(n_features):
        reshaped = imputation[:, :, i].reshape(-1, 1)
        inversed = scalers[i].inverse_transform(reshaped)
        imputation_denorm[:, :, i] = inversed.reshape(n_samples, n_timesteps)
    
    return imputation_denorm



#Optional: Artificially mask. Mask 20% of the data (MIT part). Try masking 30% here 
def mcar_f(X, mask_ratio=0.3):
    """Apply MCAR only to observed values."""
    observed_mask=~np.isnan(X) #find observed positions
    artificial_mask=mcar(X,mask_ratio).astype(bool) #generate MCAR mask, cast to boolean
    #combine masks 
    combined_mask=observed_mask & artificial_mask

    #Apply masking
    X_masked=X.copy()
    X_masked[combined_mask]=np.nan
    return X_masked,combined_mask


#Use mcar on validation data 
val_X_masked, val_mask =mcar_f(val_scaled)
val_X_ori=val_scaled.copy() 

test_X_masked, test_mask =mcar_f(test_scaled)
test_X_ori=test_scaled.copy() 


#?? Problem: Can't have the best input for testing
#1.Create synthetic test_data cuz if I drop nan values for test set, there's basically nothing left
#synthetic_data=np.random.randn(n_samples,n_steps,n_features)
#test_X_masked,test_mask=mcar_f(synthetic_data)
#test_X_ori=synthetic_data.copy() #Ground truth

# 2, Ensure no NaN values in synthetic data
#test_X_masked = np.nan_to_num(test_X_masked, nan=np.nanmean(test_X_masked))
#test_X_ori = np.nan_to_num(test_X_ori, nan=np.nanmean(test_X_ori))



class Config:
    no_cuda = False
    no_mps = False
    seed = 1

args=Config()

torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)


args.cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()

args.cuda = not args.no_cuda and torch.cuda.is_available()
print("CUDA available:", torch.cuda.is_available())


if args.cuda:
    device = torch.device("cuda")
    print("Using CUDA")
elif use_mps:
    device = torch.device("mps")
    print("Using MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

train_scaled = torch.tensor(train_scaled, dtype=torch.float32)
val_X_masked = torch.tensor(val_X_masked, dtype=torch.float32)
val_X_ori = torch.tensor(val_X_ori, dtype=torch.float32)

train_scaled = train_scaled.to(device)
val_X_masked = val_X_masked.to(device)
val_X_ori = val_X_ori.to(device)


#MLflow set up
mlflow.set_tracking_uri("http://localhost:5000")
client = mlflow.tracking.MlflowClient()
mlflow.set_experiment("GP_VAE_2")


# Optuna objective function
def objective(trial):
    params = {
        "lr": trial.suggest_float("lr", 1e-4, 1e-3, log=True),
        "epochs": trial.suggest_int("epochs", 10, 50),
        "batch_size": trial.suggest_int("batch_size", 32, 128, step=32),
        "length_scale": trial.suggest_float("length_scale",0.5,5.0),
        "beta": trial.suggest_float("beta",0.1,1.0),
        "kernel":trial.suggest_categorical("kernel",["cauchy", "diffusion", "rbf"]),
 }

    with mlflow.start_run(run_name="GP-VAE-Trial", nested=True) as run:
        mlflow.log_params(params)

        gp_vae = GPVAE(
            n_steps=data_reshaped.shape[1],
            n_features=data_reshaped.shape[2],
            latent_size=37, #should be the latent dimensions 
            encoder_sizes=(128,128), #should I change it here too?
            decoder_sizes=(256,256), #should I change the model size?
            kernel=params["kernel"],
            beta=params["beta"], #The weight of KL divergence in ELBO
            M=1,  #The number of Monte Carlo samples for ELBO estimation during training.
            K=1,  #The number of importance weights for IWAE model training loss.
            sigma=1.005, # The scale parameter for a kernel function
            length_scale=params["length_scale"], #The length scale parameter for a kernel function
            kernel_scales=1, #The number of different length scales over latent space dimensions
            window_size=24,  # Window size for the inference CNN.
            batch_size=params["batch_size"],
            # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
            epochs=params["epochs"],
            # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
            # You can leave it to defualt as None to disable early stopping.
            patience=3,
            # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
            # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
            optimizer=Adam(lr=params["lr"]),
            # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
            # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
            # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
            num_workers=0,
            # just leave it to default as None, PyPOTS will automatically assign the best device for you.
            # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
            device=device,
            # set the path for saving tensorboard and trained model files 
            saving_path="/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model",
            # only save the best model after training finished.
            # You can also set it as "better" to save models performing better ever during training.
            model_saving_strategy="best",
        )




        # train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
        gp_vae.fit(train_set={"X": train_scaled}, val_set={"X": val_X_masked, "X_ori": val_X_ori})
        gp_vae_results = gp_vae.predict({"X": test_X_masked}, n_sampling_times=2)
        gp_vae_imputation = gp_vae_results["imputation"]

        print(f"The shape of gp_vae_imputation is {gp_vae_imputation.shape}")

        # for error calculation, we need to take the mean value of the multiple samplings for each data sample
        mean_gp_vae_imputation = gp_vae_imputation.mean(axis=1)

        test_imputation_denorm = inverse_scale(mean_gp_vae_imputation, scalers)
        test_ori_denorm = inverse_scale(test_X_ori, scalers)

        mae_list, rmse_list = [], []
        for i in range(n_features):
            mask_i = test_mask[:, :, i]
            pred = test_imputation_denorm[:, :, i][mask_i]
            true = test_ori_denorm[:, :, i][mask_i]
            if len(true) == 0: continue
            mae = np.mean(np.abs(pred - true))
            rmse = np.sqrt(mean_squared_error(true, pred))
            mae_list.append(mae)
            rmse_list.append(rmse)
            mlflow.log_metric(f"MAE_{i}", mae)
            mlflow.log_metric(f"RMSE_{i}", rmse)

         # Calculate metrics
        mae_per_feature = []
        rmse_per_feature=[]
        percentage_mae_per_feature = []

        for i in range(n_features):
            imputation_i = test_imputation_denorm[:, :, i]
            ground_truth_i = test_ori_denorm[:, :, i]
            mask_i = test_mask[:, :, i]
            if np.isnan(imputation_i).any() or np.isnan(ground_truth_i).any():
                continue
            mae_i = calc_mae(imputation_i, ground_truth_i, mask_i)
            mae_per_feature.append(mae_i)
            rmse_i = np.sqrt(mean_squared_error(imputation_i, ground_truth_i))
            rmse_per_feature.append(rmse)

            #Calculate the original standard deviation for the feature
            std_dev_i = np.std(ground_truth_i[mask_i == 1])
             # Calculate the percentage of MAE relative to the standard deviation   
            if std_dev_i != 0:
                percentage_mae_i = (mae_i / std_dev_i) * 100
                percentage_mae_per_feature.append(percentage_mae_i)
            else:
                 percentage_mae_i = float('inf')
            
            mlflow.log_metric(f"MAE_{feature_names[i]}", mae_i)
            mlflow.log_metric(f"RMSE_{feature_names[i]}",rmse_i)
            mlflow.log_metric(f"Percentage_MAE_{feature_names[i]}", percentage_mae_i)

        avg_mae = np.mean(mae_per_feature)
        #avg_rmse=np.mean(rmse_per_feature)
       
        mlflow.log_metric("avg_mae", np.mean(mae_list))
        mlflow.log_metric("avg_rmse", np.mean(rmse_list))

        trial.set_user_attr("mlflow_run_id", run.info.run_id)

        return avg_mae

    print("MAE per feature:", mae_per_feature)
    print("RMSE per feature",rmse_per_feature)
    print("Percentage MAE per feature:", percentage_mae_per_feature)
   

# Run Optuna study
mlflow.set_experiment("GP-VAE-2")
with mlflow.start_run(run_name="GPVAE_Optuna_Study(2)") as parent_run:
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)

    best_params = study.best_trial.params
    best_value = study.best_trial.value
    best_run_id = study.best_trial.user_attrs["mlflow_run_id"]


    
    # Log best parameters
    mlflow.log_params(best_params)

    # Log best metric(s)
    mlflow.log_metric("best_objective_value", best_value)
    mlflow.log_param("best_run_id", best_run_id)

    print("Best Parameters:", best_params)
    print("Best Objective Value:", best_value)

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
[I 2025-05-20 00:40:02,727] A new study created in memory with name: no-name-61fc82e3-2735-47bf-99fe-ce81fd7afe95


Column time has 0 NaN values
Column time has 0.0 Missing_rate
Column fr_eng has 0 NaN values
Column fr_eng has 0.0 Missing_rate
Column te_exh_cyl_out__0 has 0 NaN values
Column te_exh_cyl_out__0 has 0.0 Missing_rate
Column pd_air_ic__0 has 0 NaN values
Column pd_air_ic__0 has 0.0 Missing_rate
Column pr_exh_turb_out__0 has 316581 NaN values
Column pr_exh_turb_out__0 has 1.0 Missing_rate
Column te_air_ic_out__0 has 0 NaN values
Column te_air_ic_out__0 has 0.0 Missing_rate
Column te_seawater has 0 NaN values
Column te_seawater has 0.0 Missing_rate
Column te_air_comp_in_a__0 has 316581 NaN values
Column te_air_comp_in_a__0 has 1.0 Missing_rate
Column te_air_comp_in_b__0 has 316581 NaN values
Column te_air_comp_in_b__0 has 1.0 Missing_rate
Column fr_tc__0 has 316581 NaN values
Column fr_tc__0 has 1.0 Missing_rate
Column pr_baro has 0 NaN values
Column pr_baro has 0.0 Missing_rate
Column pd_air_ic__0_1 has 0 NaN values
Column pd_air_ic__0_1 has 0.0 Missing_rate
Column pr_exh_rec has 0 NaN va

2025-05-20 00:40:02 [INFO]: Using the given device: cuda
2025-05-20 00:40:02 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004002
2025-05-20 00:40:02 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004002/tensorboard
2025-05-20 00:40:02 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678
2025-05-20 00:40:03 [INFO]: Epoch 001 - training loss (default): 722.8419, validation loss: 354.8317
2025-05-20 00:40:04 [INFO]: Epoch 002 - training loss (default): 565.1318, validation loss: 343.4969
2025-05-20 00:40:04 [INFO]: Epoch 003 - training loss (default): 552.5871, validation loss: 340.6999
2025-05-20 00:40:05 [INFO]: Epoch 004 - training loss (default): 550.3879, validation loss: 339.4278
2025-05-20 00:40:06 [INFO]: Epoch 005 - trai

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:40:13,677] Trial 0 finished with value: 7675.04213732518 and parameters: {'lr': 0.00048532276594338386, 'epochs': 16, 'batch_size': 128, 'length_scale': 1.6539034791803948, 'beta': 0.38234690972619034, 'kernel': 'cauchy'}. Best is trial 0 with value: 7675.04213732518.
2025-05-20 00:40:13 [INFO]: Using the given device: cuda
2025-05-20 00:40:13 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004013
2025-05-20 00:40:13 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004013/tensorboard
2025-05-20 00:40:13 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678
2025-05-20 00:40:13 [ERROR]: ❌ Exception: Expected parameter covariance_matrix (Tensor of shape (37, 20, 20)) of distribution MultivariateNormal(loc: torch.Size([

🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/f007d5e783e64b42a7bd6742a6301f95
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318
🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/2d67f3f8141842a58203908cd6ec7f57
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318
🏃 View run GPVAE_Optuna_Study(2) at: http://localhost:5000/#/experiments/832352739106302318/runs/c81e5b54f26545c38c34f7e47d9768cd
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


RuntimeError: Training got interrupted. Model was not trained. Please investigate the error printed above.

In [None]:
#Import Pypots Library
from pypots.optim import Adam
from pypots.imputation import GPVAE
#from pypots.utils.metrics import calc_mae
from pypots.nn.functional import calc_mae


import argparse
import hashlib
from pathlib import Path

import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.utils.data
import data_insight
from data_insight import setup_duckdb
from duckdb import DuckDBPyConnection as DuckDB
from duckdb import DuckDBPyRelation as Relation
from pathlib import Path
import hashlib
from duckdb import DuckDBPyConnection as DuckDB
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import optuna 
from optuna.visualization import plot_optimization_history




from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset, Dataset
from pygrinder.missing_completely_at_random import mcar
from tqdm.auto import tqdm

import sensor_imputation_thesis.shared.load_data as load

torch.cuda.empty_cache()
#PatchTST might be an ideal choise if SAITS is too slow 

##Drop columns with different indexes while loading data.. Or the mean values 

df=pd.read_parquet("/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/ny_df_for_pypots.parquet")

len(df)

#current length of the dataframe is 119439

# Check nan values in each column
for col in df.columns:
    print(f"Column {col} has {df[col].isna().sum()} NaN values")
    missing_rate=df[col].isna().sum()/len(df[col])
    print(f"Column {col} has {missing_rate} Missing_rate")


#Try with smaller dataset, size 4000
##SAMPLE the percengtage of the dataset, df.sample (averagely pick samples)
#not df.sample cuz it will randomly select 
original_size=len(df)
desired_fraction=0.3 #Select data every 3 minutes 
step=int(1/desired_fraction) #step_size=10 (sample every 10th (3/10) minute)

#Systematic sampling: Start at a random offset to avoid bias 
start=np.random.randint(0,step) #Random start between 0-9
df1=df.iloc[start::step].reset_index(drop=True)

print(f"Original size:{len(df)}, Sampled size: {len(df1)}")



# Custom Dataset class
class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Data processing code
sensor_cols = [col for col in df1.columns if col != "time"]
data = df1[sensor_cols].values

#¤get feature names for printing mae later 
feature_names=df1[sensor_cols].columns.tolist()

## Convert data to 3D arrays of shape n_samples, n_timesteps, n_features, X_ori refers to the original data without missing values 
## Reconstruct all columns simultaneously  #num_features: 119
n_features = data.shape[1]  # exclude the time column
n_steps = 20 #60 (was 60 previously) #(TRY TO CHANGE HERE)  # # window length, 1440 steps = 24 hours of 1-minute data, but here is revised to 60 again
#total_elements = data.shape[0] * data.shape[1]
n_samples = data.shape[0] // n_steps 



# Reshape to (n_samples // n_steps, n_steps, n_features)
#data_reshaped = data.reshape((n_samples, n_steps, n_features))
data_reshaped=data[:n_samples*n_steps].reshape(n_samples,n_steps,n_features)
print(f"Reshaped data:{data.shape}")

#Split into train, test, val, fit scaler only on the train set (prevent data leakage)

#train_size = int(0.6 * len(data))
#val_size = int(0.2 * len(data))
#test_size = len(data) - train_size - val_size

#train_data = data_reshaped[:train_size]
#val_data = data_reshaped[train_size:train_size + val_size]
#test_data= data_reshaped[train_size + val_size:]


#Apply time series split 
#Split into train(60%), val(20%), and test (20%)
train_data, temp_data=train_test_split(data_reshaped,test_size=0.4,shuffle=True)
val_data, test_data=train_test_split(temp_data, test_size=0.5, shuffle=False)

##Normalization is important because of the nature of mse calculation of saits, columns with large 
#values dominate the loss, making metrics meaningless. SAITS computes MSE/MAE column-wise and averages 
#them across all columns 
#  Apply minmax scaler here 
#normalize each feature independently
scalers={}


#train_scaled = np.zeros_like(data_reshaped[train_size])  # Initialize the normalized data array
#val_scaled=np.zeros_like(data_reshaped[train_size:train_size + val_size])
#test_scaled=np.zeros_like(data_reshaped[train_size + val_size:])

train_scaled = np.zeros_like(train_data)
val_scaled = np.zeros_like(val_data)
test_scaled = np.zeros_like(test_data)



for i in range(data_reshaped.shape[2]):
    scaler = MinMaxScaler(feature_range=(-1, 1)) #changed to -1,1
    # Flatten timesteps and samples for scaling
    train_scaled[:, :, i] = scaler.fit_transform(train_data[:, :, i].reshape(-1, 1)).reshape(train_data.shape[0], train_data.shape[1])
    val_scaled[:, :, i] = scaler.transform(val_data[:, :, i].reshape(-1, 1)).reshape(val_data.shape[0], val_data.shape[1])
    test_scaled[:, :, i] = scaler.transform(test_data[:, :, i].reshape(-1, 1)).reshape(test_data.shape[0], test_data.shape[1])
    scalers[i] = scaler  # Save scalers to inverse-transform later

#Inverse Scale
def inverse_scale(imputation, scalers):
    n_samples, n_timesteps, n_features = imputation.shape
    imputation_denorm = np.empty_like(imputation)
    
    for i in range(n_features):
        reshaped = imputation[:, :, i].reshape(-1, 1)
        inversed = scalers[i].inverse_transform(reshaped)
        imputation_denorm[:, :, i] = inversed.reshape(n_samples, n_timesteps)
    
    return imputation_denorm



#Optional: Artificially mask. Mask 20% of the data (MIT part). Try masking 30% here 
def mcar_f(X, mask_ratio=0.3):
    """Apply MCAR only to observed values."""
    observed_mask=~np.isnan(X) #find observed positions
    artificial_mask=mcar(X,mask_ratio).astype(bool) #generate MCAR mask, cast to boolean
    #combine masks 
    combined_mask=observed_mask & artificial_mask

    #Apply masking
    X_masked=X.copy()
    X_masked[combined_mask]=np.nan
    return X_masked,combined_mask


#Use mcar on validation data 
val_X_masked, val_mask =mcar_f(val_scaled)
val_X_ori=val_scaled.copy() 

test_X_masked, test_mask =mcar_f(test_scaled)
test_X_ori=test_scaled.copy() 


#?? Problem: Can't have the best input for testing
#1.Create synthetic test_data cuz if I drop nan values for test set, there's basically nothing left
#synthetic_data=np.random.randn(n_samples,n_steps,n_features)
#test_X_masked,test_mask=mcar_f(synthetic_data)
#test_X_ori=synthetic_data.copy() #Ground truth

# 2, Ensure no NaN values in synthetic data
#test_X_masked = np.nan_to_num(test_X_masked, nan=np.nanmean(test_X_masked))
#test_X_ori = np.nan_to_num(test_X_ori, nan=np.nanmean(test_X_ori))



class Config:
    no_cuda = False
    no_mps = False
    seed = 1

args=Config()

torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)


args.cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()

args.cuda = not args.no_cuda and torch.cuda.is_available()
print("CUDA available:", torch.cuda.is_available())


if args.cuda:
    device = torch.device("cuda")
    print("Using CUDA")
elif use_mps:
    device = torch.device("mps")
    print("Using MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

train_scaled = torch.tensor(train_scaled, dtype=torch.float32)
val_X_masked = torch.tensor(val_X_masked, dtype=torch.float32)
val_X_ori = torch.tensor(val_X_ori, dtype=torch.float32)

train_scaled = train_scaled.to(device)
val_X_masked = val_X_masked.to(device)
val_X_ori = val_X_ori.to(device)


#MLflow set up
mlflow.set_tracking_uri("http://localhost:5000")
client = mlflow.tracking.MlflowClient()
mlflow.set_experiment("GP_VAE_2")


# Optuna objective function
def objective(trial):
    params = {
        "lr": trial.suggest_float("lr", 1e-4, 1e-3, log=True),
        "epochs": trial.suggest_int("epochs", 10, 50),
        "batch_size": trial.suggest_int("batch_size", 32, 128, step=32),
        "length_scale": trial.suggest_float("length_scale",0.5,5.0),
        "beta": trial.suggest_float("beta",0.1,1.0)
 }

    with mlflow.start_run(run_name="GP-VAE-Trial", nested=True) as run:
        mlflow.log_params(params)

        gp_vae = GPVAE(
            n_steps=data_reshaped.shape[1],
            n_features=data_reshaped.shape[2],
            latent_size=37, #should be the latent dimensions 
            encoder_sizes=(128,128), #should I change it here too?
            decoder_sizes=(256,256), #should I change the model size?
            kernel="cauchy",
            beta=params["beta"], #The weight of KL divergence in ELBO
            M=1,  #The number of Monte Carlo samples for ELBO estimation during training.
            K=1,  #The number of importance weights for IWAE model training loss.
            sigma=1.005, # The scale parameter for a kernel function
            length_scale=params["length_scale"], #The length scale parameter for a kernel function
            kernel_scales=1, #The number of different length scales over latent space dimensions
            window_size=24,  # Window size for the inference CNN.
            batch_size=params["batch_size"],
            # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
            epochs=params["epochs"],
            # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
            # You can leave it to defualt as None to disable early stopping.
            patience=3,
            # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
            # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
            optimizer=Adam(lr=params["lr"]),
            # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
            # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
            # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
            num_workers=0,
            # just leave it to default as None, PyPOTS will automatically assign the best device for you.
            # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
            device=device,
            # set the path for saving tensorboard and trained model files 
            saving_path="/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model",
            # only save the best model after training finished.
            # You can also set it as "better" to save models performing better ever during training.
            model_saving_strategy="best",
        )




        # train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
        gp_vae.fit(train_set={"X": train_scaled}, val_set={"X": val_X_masked, "X_ori": val_X_ori})
        gp_vae_results = gp_vae.predict({"X": test_X_masked}, n_sampling_times=2)
        gp_vae_imputation = gp_vae_results["imputation"]

        print(f"The shape of gp_vae_imputation is {gp_vae_imputation.shape}")

        # for error calculation, we need to take the mean value of the multiple samplings for each data sample
        mean_gp_vae_imputation = gp_vae_imputation.mean(axis=1)

        test_imputation_denorm = inverse_scale(mean_gp_vae_imputation, scalers)
        test_ori_denorm = inverse_scale(test_X_ori, scalers)


         # Calculate metrics
        mae_per_feature = []
        rmse_per_feature=[]
        percentage_mae_per_feature = []

        for i in range(n_features):
            imputation_i = test_imputation_denorm[:, :, i]
            ground_truth_i = test_ori_denorm[:, :, i]
            mask_i = test_mask[:, :, i]
            if np.isnan(imputation_i).any() or np.isnan(ground_truth_i).any():
                continue
            mae_i = calc_mae(imputation_i, ground_truth_i, mask_i)
            mae_per_feature.append(mae_i)
            rmse_i = np.sqrt(mean_squared_error(imputation_i, ground_truth_i))
            rmse_per_feature.append(rmse)

            #Calculate the original standard deviation for the feature
            std_dev_i = np.std(ground_truth_i[mask_i == 1])
             # Calculate the percentage of MAE relative to the standard deviation   
            if std_dev_i != 0:
                percentage_mae_i = (mae_i / std_dev_i) * 100
                percentage_mae_per_feature.append(percentage_mae_i)
            else:
                 percentage_mae_i = float('inf')
            
            mlflow.log_metric(f"MAE_{feature_names[i]}", mae_i)
            mlflow.log_metric(f"RMSE_{feature_names[i]}",rmse_i)
            mlflow.log_metric(f"Percentage_MAE_{feature_names[i]}", percentage_mae_i)

        avg_mae = np.mean(mae_per_feature)
        #avg_rmse=np.mean(rmse_per_feature)
       
        mlflow.log_metric("avg_mae", np.mean(mae_list))
        mlflow.log_metric("avg_rmse", np.mean(rmse_list))

        trial.set_user_attr("mlflow_run_id", run.info.run_id)

        return avg_mae

    print("MAE per feature:", mae_per_feature)
    print("RMSE per feature",rmse_per_feature)
    print("Percentage MAE per feature:", percentage_mae_per_feature)
   

# Run Optuna study
mlflow.set_experiment("GP-VAE-2")
with mlflow.start_run(run_name="GPVAE_Optuna_Study(2)") as parent_run:
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)

    best_params = study.best_trial.params
    best_value = study.best_trial.value
    best_run_id = study.best_trial.user_attrs["mlflow_run_id"]


    
    # Log best parameters
    mlflow.log_params(best_params)

    # Log best metric(s)
    mlflow.log_metric("best_objective_value", best_value)
    mlflow.log_param("best_run_id", best_run_id)

    print("Best Parameters:", best_params)
    print("Best Objective Value:", best_value)

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
[I 2025-05-20 00:43:13,791] A new study created in memory with name: no-name-1e6f6177-6264-400c-b102-a2982cc10411


Column time has 0 NaN values
Column time has 0.0 Missing_rate
Column fr_eng has 0 NaN values
Column fr_eng has 0.0 Missing_rate
Column te_exh_cyl_out__0 has 0 NaN values
Column te_exh_cyl_out__0 has 0.0 Missing_rate
Column pd_air_ic__0 has 0 NaN values
Column pd_air_ic__0 has 0.0 Missing_rate
Column pr_exh_turb_out__0 has 316581 NaN values
Column pr_exh_turb_out__0 has 1.0 Missing_rate
Column te_air_ic_out__0 has 0 NaN values
Column te_air_ic_out__0 has 0.0 Missing_rate
Column te_seawater has 0 NaN values
Column te_seawater has 0.0 Missing_rate
Column te_air_comp_in_a__0 has 316581 NaN values
Column te_air_comp_in_a__0 has 1.0 Missing_rate
Column te_air_comp_in_b__0 has 316581 NaN values
Column te_air_comp_in_b__0 has 1.0 Missing_rate
Column fr_tc__0 has 316581 NaN values
Column fr_tc__0 has 1.0 Missing_rate
Column pr_baro has 0 NaN values
Column pr_baro has 0.0 Missing_rate
Column pd_air_ic__0_1 has 0 NaN values
Column pd_air_ic__0_1 has 0.0 Missing_rate
Column pr_exh_rec has 0 NaN va

2025-05-20 00:43:13 [INFO]: Using the given device: cuda
2025-05-20 00:43:13 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004313
2025-05-20 00:43:13 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004313/tensorboard
2025-05-20 00:43:13 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678
2025-05-20 00:43:15 [INFO]: Epoch 001 - training loss (default): 1974.3210, validation loss: 4101.7576
2025-05-20 00:43:16 [INFO]: Epoch 002 - training loss (default): 1702.0264, validation loss: 4097.6497
2025-05-20 00:43:17 [INFO]: Epoch 003 - training loss (default): 1700.6533, validation loss: 4094.6074
2025-05-20 00:43:18 [INFO]: Epoch 004 - training loss (default): 1699.8676, validation loss: 4090.9239
2025-05-20 00:43:19 [INFO]: Epoch 00

The shape of gp_vae_imputation is (1056, 2, 20, 31)
🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/d2cd207c3fbb495ca3e178b6c618899f
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318
🏃 View run GPVAE_Optuna_Study(2) at: http://localhost:5000/#/experiments/832352739106302318/runs/63c95a2dd80d44c28599e6f3d7c72d94
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


NameError: name 'rmse' is not defined

In [None]:
#Import Pypots Library
from pypots.optim import Adam
from pypots.imputation import GPVAE
#from pypots.utils.metrics import calc_mae
from pypots.nn.functional import calc_mae


import argparse
import hashlib
from pathlib import Path

import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.utils.data
import data_insight
from data_insight import setup_duckdb
from duckdb import DuckDBPyConnection as DuckDB
from duckdb import DuckDBPyRelation as Relation
from pathlib import Path
import hashlib
from duckdb import DuckDBPyConnection as DuckDB
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import optuna 
from optuna.visualization import plot_optimization_history




from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset, Dataset
from pygrinder.missing_completely_at_random import mcar
from tqdm.auto import tqdm

import sensor_imputation_thesis.shared.load_data as load

torch.cuda.empty_cache()
#PatchTST might be an ideal choise if SAITS is too slow 

##Drop columns with different indexes while loading data.. Or the mean values 

df=pd.read_parquet("/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/ny_df_for_pypots.parquet")

len(df)

#current length of the dataframe is 119439

# Check nan values in each column
for col in df.columns:
    print(f"Column {col} has {df[col].isna().sum()} NaN values")
    missing_rate=df[col].isna().sum()/len(df[col])
    print(f"Column {col} has {missing_rate} Missing_rate")


#Try with smaller dataset, size 4000
##SAMPLE the percengtage of the dataset, df.sample (averagely pick samples)
#not df.sample cuz it will randomly select 
original_size=len(df)
desired_fraction=0.3 #Select data every 3 minutes 
step=int(1/desired_fraction) #step_size=10 (sample every 10th (3/10) minute)

#Systematic sampling: Start at a random offset to avoid bias 
start=np.random.randint(0,step) #Random start between 0-9
df1=df.iloc[start::step].reset_index(drop=True)

print(f"Original size:{len(df)}, Sampled size: {len(df1)}")



# Custom Dataset class
class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Data processing code
sensor_cols = [col for col in df1.columns if col != "time"]
data = df1[sensor_cols].values

#¤get feature names for printing mae later 
feature_names=df1[sensor_cols].columns.tolist()

## Convert data to 3D arrays of shape n_samples, n_timesteps, n_features, X_ori refers to the original data without missing values 
## Reconstruct all columns simultaneously  #num_features: 119
n_features = data.shape[1]  # exclude the time column
n_steps = 20 #60 (was 60 previously) #(TRY TO CHANGE HERE)  # # window length, 1440 steps = 24 hours of 1-minute data, but here is revised to 60 again
#total_elements = data.shape[0] * data.shape[1]
n_samples = data.shape[0] // n_steps 



# Reshape to (n_samples // n_steps, n_steps, n_features)
#data_reshaped = data.reshape((n_samples, n_steps, n_features))
data_reshaped=data[:n_samples*n_steps].reshape(n_samples,n_steps,n_features)
print(f"Reshaped data:{data.shape}")

#Split into train, test, val, fit scaler only on the train set (prevent data leakage)

#train_size = int(0.6 * len(data))
#val_size = int(0.2 * len(data))
#test_size = len(data) - train_size - val_size

#train_data = data_reshaped[:train_size]
#val_data = data_reshaped[train_size:train_size + val_size]
#test_data= data_reshaped[train_size + val_size:]


#Apply time series split 
#Split into train(60%), val(20%), and test (20%)
train_data, temp_data=train_test_split(data_reshaped,test_size=0.4,shuffle=True)
val_data, test_data=train_test_split(temp_data, test_size=0.5, shuffle=False)

##Normalization is important because of the nature of mse calculation of saits, columns with large 
#values dominate the loss, making metrics meaningless. SAITS computes MSE/MAE column-wise and averages 
#them across all columns 
#  Apply minmax scaler here 
#normalize each feature independently
scalers={}


#train_scaled = np.zeros_like(data_reshaped[train_size])  # Initialize the normalized data array
#val_scaled=np.zeros_like(data_reshaped[train_size:train_size + val_size])
#test_scaled=np.zeros_like(data_reshaped[train_size + val_size:])

train_scaled = np.zeros_like(train_data)
val_scaled = np.zeros_like(val_data)
test_scaled = np.zeros_like(test_data)



for i in range(data_reshaped.shape[2]):
    scaler = MinMaxScaler(feature_range=(-1, 1)) #changed to -1,1
    # Flatten timesteps and samples for scaling
    train_scaled[:, :, i] = scaler.fit_transform(train_data[:, :, i].reshape(-1, 1)).reshape(train_data.shape[0], train_data.shape[1])
    val_scaled[:, :, i] = scaler.transform(val_data[:, :, i].reshape(-1, 1)).reshape(val_data.shape[0], val_data.shape[1])
    test_scaled[:, :, i] = scaler.transform(test_data[:, :, i].reshape(-1, 1)).reshape(test_data.shape[0], test_data.shape[1])
    scalers[i] = scaler  # Save scalers to inverse-transform later

#Inverse Scale
def inverse_scale(imputation, scalers):
    n_samples, n_timesteps, n_features = imputation.shape
    imputation_denorm = np.empty_like(imputation)
    
    for i in range(n_features):
        reshaped = imputation[:, :, i].reshape(-1, 1)
        inversed = scalers[i].inverse_transform(reshaped)
        imputation_denorm[:, :, i] = inversed.reshape(n_samples, n_timesteps)
    
    return imputation_denorm



#Optional: Artificially mask. Mask 20% of the data (MIT part). Try masking 30% here 
def mcar_f(X, mask_ratio=0.3):
    """Apply MCAR only to observed values."""
    observed_mask=~np.isnan(X) #find observed positions
    artificial_mask=mcar(X,mask_ratio).astype(bool) #generate MCAR mask, cast to boolean
    #combine masks 
    combined_mask=observed_mask & artificial_mask

    #Apply masking
    X_masked=X.copy()
    X_masked[combined_mask]=np.nan
    return X_masked,combined_mask


#Use mcar on validation data 
val_X_masked, val_mask =mcar_f(val_scaled)
val_X_ori=val_scaled.copy() 

test_X_masked, test_mask =mcar_f(test_scaled)
test_X_ori=test_scaled.copy() 


#?? Problem: Can't have the best input for testing
#1.Create synthetic test_data cuz if I drop nan values for test set, there's basically nothing left
#synthetic_data=np.random.randn(n_samples,n_steps,n_features)
#test_X_masked,test_mask=mcar_f(synthetic_data)
#test_X_ori=synthetic_data.copy() #Ground truth

# 2, Ensure no NaN values in synthetic data
#test_X_masked = np.nan_to_num(test_X_masked, nan=np.nanmean(test_X_masked))
#test_X_ori = np.nan_to_num(test_X_ori, nan=np.nanmean(test_X_ori))



class Config:
    no_cuda = False
    no_mps = False
    seed = 1

args=Config()

torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)


args.cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()

args.cuda = not args.no_cuda and torch.cuda.is_available()
print("CUDA available:", torch.cuda.is_available())


if args.cuda:
    device = torch.device("cuda")
    print("Using CUDA")
elif use_mps:
    device = torch.device("mps")
    print("Using MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

train_scaled = torch.tensor(train_scaled, dtype=torch.float32)
val_X_masked = torch.tensor(val_X_masked, dtype=torch.float32)
val_X_ori = torch.tensor(val_X_ori, dtype=torch.float32)

train_scaled = train_scaled.to(device)
val_X_masked = val_X_masked.to(device)
val_X_ori = val_X_ori.to(device)


#MLflow set up
mlflow.set_tracking_uri("http://localhost:5000")
client = mlflow.tracking.MlflowClient()
mlflow.set_experiment("GP_VAE_2")


# Optuna objective function
def objective(trial):
    params = {
        "lr": trial.suggest_float("lr", 1e-4, 1e-3, log=True),
        "epochs": trial.suggest_int("epochs", 10, 50),
        "batch_size": trial.suggest_int("batch_size", 32, 128, step=32),
        "length_scale": trial.suggest_float("length_scale",0.5,5.0),
        "beta": trial.suggest_float("beta",0.1,1.0)
 }

    with mlflow.start_run(run_name="GP-VAE-Trial", nested=True) as run:
        mlflow.log_params(params)

        gp_vae = GPVAE(
            n_steps=data_reshaped.shape[1],
            n_features=data_reshaped.shape[2],
            latent_size=37, #should be the latent dimensions 
            encoder_sizes=(128,128), #should I change it here too?
            decoder_sizes=(256,256), #should I change the model size?
            kernel="cauchy",
            beta=params["beta"], #The weight of KL divergence in ELBO
            M=1,  #The number of Monte Carlo samples for ELBO estimation during training.
            K=1,  #The number of importance weights for IWAE model training loss.
            sigma=1.005, # The scale parameter for a kernel function
            length_scale=params["length_scale"], #The length scale parameter for a kernel function
            kernel_scales=1, #The number of different length scales over latent space dimensions
            window_size=24,  # Window size for the inference CNN.
            batch_size=params["batch_size"],
            # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
            epochs=params["epochs"],
            # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
            # You can leave it to defualt as None to disable early stopping.
            patience=3,
            # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
            # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
            optimizer=Adam(lr=params["lr"]),
            # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
            # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
            # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
            num_workers=0,
            # just leave it to default as None, PyPOTS will automatically assign the best device for you.
            # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
            device=device,
            # set the path for saving tensorboard and trained model files 
            saving_path="/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model",
            # only save the best model after training finished.
            # You can also set it as "better" to save models performing better ever during training.
            model_saving_strategy="best",
        )




        # train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
        gp_vae.fit(train_set={"X": train_scaled}, val_set={"X": val_X_masked, "X_ori": val_X_ori})
        gp_vae_results = gp_vae.predict({"X": test_X_masked}, n_sampling_times=2)
        gp_vae_imputation = gp_vae_results["imputation"]

        print(f"The shape of gp_vae_imputation is {gp_vae_imputation.shape}")

        # for error calculation, we need to take the mean value of the multiple samplings for each data sample
        mean_gp_vae_imputation = gp_vae_imputation.mean(axis=1)

        test_imputation_denorm = inverse_scale(mean_gp_vae_imputation, scalers)
        test_ori_denorm = inverse_scale(test_X_ori, scalers)


         # Calculate metrics
        mae_per_feature = []
        rmse_per_feature=[]
        percentage_mae_per_feature = []

        for i in range(n_features):
            imputation_i = test_imputation_denorm[:, :, i]
            ground_truth_i = test_ori_denorm[:, :, i]
            mask_i = test_mask[:, :, i]
            if np.isnan(imputation_i).any() or np.isnan(ground_truth_i).any():
                continue
            mae_i = calc_mae(imputation_i, ground_truth_i, mask_i)
            mae_per_feature.append(mae_i)
            rmse_i = np.sqrt(mean_squared_error(imputation_i, ground_truth_i))
            rmse_per_feature.append(rmse_i)

            #Calculate the original standard deviation for the feature
            std_dev_i = np.std(ground_truth_i[mask_i == 1])
             # Calculate the percentage of MAE relative to the standard deviation   
            if std_dev_i != 0:
                percentage_mae_i = (mae_i / std_dev_i) * 100
                percentage_mae_per_feature.append(percentage_mae_i)
            else:
                 percentage_mae_i = float('inf')
            
            mlflow.log_metric(f"MAE_{feature_names[i]}", mae_i)
            mlflow.log_metric(f"RMSE_{feature_names[i]}",rmse_i)
            mlflow.log_metric(f"Percentage_MAE_{feature_names[i]}", percentage_mae_i)

        avg_mae = np.mean(mae_per_feature)
        #avg_rmse=np.mean(rmse_per_feature)
       
        mlflow.log_metric("avg_mae", np.mean(mae_list))
        mlflow.log_metric("avg_rmse", np.mean(rmse_list))

        trial.set_user_attr("mlflow_run_id", run.info.run_id)

        return avg_mae

    print("MAE per feature:", mae_per_feature)
    print("RMSE per feature",rmse_per_feature)
    print("Percentage MAE per feature:", percentage_mae_per_feature)
   

# Run Optuna study
mlflow.set_experiment("GP-VAE-2")
with mlflow.start_run(run_name="GPVAE_Optuna_Study(2)") as parent_run:
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)

    best_params = study.best_trial.params
    best_value = study.best_trial.value
    best_run_id = study.best_trial.user_attrs["mlflow_run_id"]


    
    # Log best parameters
    mlflow.log_params(best_params)

    # Log best metric(s)
    mlflow.log_metric("best_objective_value", best_value)
    mlflow.log_param("best_run_id", best_run_id)

    print("Best Parameters:", best_params)
    print("Best Objective Value:", best_value)

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
[I 2025-05-20 00:44:00,741] A new study created in memory with name: no-name-4a8855c3-887e-4d62-850b-d435bcacc905


Column time has 0 NaN values
Column time has 0.0 Missing_rate
Column fr_eng has 0 NaN values
Column fr_eng has 0.0 Missing_rate
Column te_exh_cyl_out__0 has 0 NaN values
Column te_exh_cyl_out__0 has 0.0 Missing_rate
Column pd_air_ic__0 has 0 NaN values
Column pd_air_ic__0 has 0.0 Missing_rate
Column pr_exh_turb_out__0 has 316581 NaN values
Column pr_exh_turb_out__0 has 1.0 Missing_rate
Column te_air_ic_out__0 has 0 NaN values
Column te_air_ic_out__0 has 0.0 Missing_rate
Column te_seawater has 0 NaN values
Column te_seawater has 0.0 Missing_rate
Column te_air_comp_in_a__0 has 316581 NaN values
Column te_air_comp_in_a__0 has 1.0 Missing_rate
Column te_air_comp_in_b__0 has 316581 NaN values
Column te_air_comp_in_b__0 has 1.0 Missing_rate
Column fr_tc__0 has 316581 NaN values
Column fr_tc__0 has 1.0 Missing_rate
Column pr_baro has 0 NaN values
Column pr_baro has 0.0 Missing_rate
Column pd_air_ic__0_1 has 0 NaN values
Column pd_air_ic__0_1 has 0.0 Missing_rate
Column pr_exh_rec has 0 NaN va

2025-05-20 00:44:00 [INFO]: Using the given device: cuda
2025-05-20 00:44:00 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004400
2025-05-20 00:44:00 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004400/tensorboard
2025-05-20 00:44:00 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678
2025-05-20 00:44:01 [INFO]: Epoch 001 - training loss (default): 869.0620, validation loss: 609.1907
2025-05-20 00:44:03 [INFO]: Epoch 002 - training loss (default): 632.1673, validation loss: 598.7252
2025-05-20 00:44:04 [INFO]: Epoch 003 - training loss (default): 622.1954, validation loss: 593.6872
2025-05-20 00:44:05 [INFO]: Epoch 004 - training loss (default): 619.6878, validation loss: 590.0230
2025-05-20 00:44:06 [INFO]: Epoch 005 - trai

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[W 2025-05-20 00:44:43,686] Trial 0 failed with parameters: {'lr': 0.0001126821627210784, 'epochs': 35, 'batch_size': 32, 'length_scale': 1.7300271829834157, 'beta': 0.5425219729252396} because of the following error: NameError("name 'mae_list' is not defined").
Traceback (most recent call last):
  File "/home/ec2-user/SageMaker/sensor-imputation-thesis/.venv/lib/python3.10/site-packages/optuna/study/_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
  File "<ipython-input-17-6cab5f49d7d7>", line 344, in objective
    mlflow.log_metric("avg_mae", np.mean(mae_list))
NameError: name 'mae_list' is not defined
[W 2025-05-20 00:44:43,690] Trial 0 failed with value None.


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/dbb34cdee13246a899d1d963a34c5323
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318
🏃 View run GPVAE_Optuna_Study(2) at: http://localhost:5000/#/experiments/832352739106302318/runs/1c25fd28eb544ff5ab730d7c69e4c280
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


NameError: name 'mae_list' is not defined

In [None]:
#Import Pypots Library
from pypots.optim import Adam
from pypots.imputation import GPVAE
#from pypots.utils.metrics import calc_mae
from pypots.nn.functional import calc_mae


import argparse
import hashlib
from pathlib import Path

import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.utils.data
import data_insight
from data_insight import setup_duckdb
from duckdb import DuckDBPyConnection as DuckDB
from duckdb import DuckDBPyRelation as Relation
from pathlib import Path
import hashlib
from duckdb import DuckDBPyConnection as DuckDB
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import optuna 
from optuna.visualization import plot_optimization_history




from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset, Dataset
from pygrinder.missing_completely_at_random import mcar
from tqdm.auto import tqdm

import sensor_imputation_thesis.shared.load_data as load

torch.cuda.empty_cache()
#PatchTST might be an ideal choise if SAITS is too slow 

##Drop columns with different indexes while loading data.. Or the mean values 

df=pd.read_parquet("/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/ny_df_for_pypots.parquet")

len(df)

#current length of the dataframe is 119439

# Check nan values in each column
for col in df.columns:
    print(f"Column {col} has {df[col].isna().sum()} NaN values")
    missing_rate=df[col].isna().sum()/len(df[col])
    print(f"Column {col} has {missing_rate} Missing_rate")


#Try with smaller dataset, size 4000
##SAMPLE the percengtage of the dataset, df.sample (averagely pick samples)
#not df.sample cuz it will randomly select 
original_size=len(df)
desired_fraction=0.3 #Select data every 3 minutes 
step=int(1/desired_fraction) #step_size=10 (sample every 10th (3/10) minute)

#Systematic sampling: Start at a random offset to avoid bias 
start=np.random.randint(0,step) #Random start between 0-9
df1=df.iloc[start::step].reset_index(drop=True)

print(f"Original size:{len(df)}, Sampled size: {len(df1)}")



# Custom Dataset class
class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Data processing code
sensor_cols = [col for col in df1.columns if col != "time"]
data = df1[sensor_cols].values

#¤get feature names for printing mae later 
feature_names=df1[sensor_cols].columns.tolist()

## Convert data to 3D arrays of shape n_samples, n_timesteps, n_features, X_ori refers to the original data without missing values 
## Reconstruct all columns simultaneously  #num_features: 119
n_features = data.shape[1]  # exclude the time column
n_steps = 20 #60 (was 60 previously) #(TRY TO CHANGE HERE)  # # window length, 1440 steps = 24 hours of 1-minute data, but here is revised to 60 again
#total_elements = data.shape[0] * data.shape[1]
n_samples = data.shape[0] // n_steps 



# Reshape to (n_samples // n_steps, n_steps, n_features)
#data_reshaped = data.reshape((n_samples, n_steps, n_features))
data_reshaped=data[:n_samples*n_steps].reshape(n_samples,n_steps,n_features)
print(f"Reshaped data:{data.shape}")

#Split into train, test, val, fit scaler only on the train set (prevent data leakage)

#train_size = int(0.6 * len(data))
#val_size = int(0.2 * len(data))
#test_size = len(data) - train_size - val_size

#train_data = data_reshaped[:train_size]
#val_data = data_reshaped[train_size:train_size + val_size]
#test_data= data_reshaped[train_size + val_size:]


#Apply time series split 
#Split into train(60%), val(20%), and test (20%)
train_data, temp_data=train_test_split(data_reshaped,test_size=0.4,shuffle=True)
val_data, test_data=train_test_split(temp_data, test_size=0.5, shuffle=False)

##Normalization is important because of the nature of mse calculation of saits, columns with large 
#values dominate the loss, making metrics meaningless. SAITS computes MSE/MAE column-wise and averages 
#them across all columns 
#  Apply minmax scaler here 
#normalize each feature independently
scalers={}


#train_scaled = np.zeros_like(data_reshaped[train_size])  # Initialize the normalized data array
#val_scaled=np.zeros_like(data_reshaped[train_size:train_size + val_size])
#test_scaled=np.zeros_like(data_reshaped[train_size + val_size:])

train_scaled = np.zeros_like(train_data)
val_scaled = np.zeros_like(val_data)
test_scaled = np.zeros_like(test_data)



for i in range(data_reshaped.shape[2]):
    scaler = MinMaxScaler(feature_range=(-1, 1)) #changed to -1,1
    # Flatten timesteps and samples for scaling
    train_scaled[:, :, i] = scaler.fit_transform(train_data[:, :, i].reshape(-1, 1)).reshape(train_data.shape[0], train_data.shape[1])
    val_scaled[:, :, i] = scaler.transform(val_data[:, :, i].reshape(-1, 1)).reshape(val_data.shape[0], val_data.shape[1])
    test_scaled[:, :, i] = scaler.transform(test_data[:, :, i].reshape(-1, 1)).reshape(test_data.shape[0], test_data.shape[1])
    scalers[i] = scaler  # Save scalers to inverse-transform later

#Inverse Scale
def inverse_scale(imputation, scalers):
    n_samples, n_timesteps, n_features = imputation.shape
    imputation_denorm = np.empty_like(imputation)
    
    for i in range(n_features):
        reshaped = imputation[:, :, i].reshape(-1, 1)
        inversed = scalers[i].inverse_transform(reshaped)
        imputation_denorm[:, :, i] = inversed.reshape(n_samples, n_timesteps)
    
    return imputation_denorm



#Optional: Artificially mask. Mask 20% of the data (MIT part). Try masking 30% here 
def mcar_f(X, mask_ratio=0.3):
    """Apply MCAR only to observed values."""
    observed_mask=~np.isnan(X) #find observed positions
    artificial_mask=mcar(X,mask_ratio).astype(bool) #generate MCAR mask, cast to boolean
    #combine masks 
    combined_mask=observed_mask & artificial_mask

    #Apply masking
    X_masked=X.copy()
    X_masked[combined_mask]=np.nan
    return X_masked,combined_mask


#Use mcar on validation data 
val_X_masked, val_mask =mcar_f(val_scaled)
val_X_ori=val_scaled.copy() 

test_X_masked, test_mask =mcar_f(test_scaled)
test_X_ori=test_scaled.copy() 


#?? Problem: Can't have the best input for testing
#1.Create synthetic test_data cuz if I drop nan values for test set, there's basically nothing left
#synthetic_data=np.random.randn(n_samples,n_steps,n_features)
#test_X_masked,test_mask=mcar_f(synthetic_data)
#test_X_ori=synthetic_data.copy() #Ground truth

# 2, Ensure no NaN values in synthetic data
#test_X_masked = np.nan_to_num(test_X_masked, nan=np.nanmean(test_X_masked))
#test_X_ori = np.nan_to_num(test_X_ori, nan=np.nanmean(test_X_ori))



class Config:
    no_cuda = False
    no_mps = False
    seed = 1

args=Config()

torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)


args.cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()

args.cuda = not args.no_cuda and torch.cuda.is_available()
print("CUDA available:", torch.cuda.is_available())


if args.cuda:
    device = torch.device("cuda")
    print("Using CUDA")
elif use_mps:
    device = torch.device("mps")
    print("Using MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

train_scaled = torch.tensor(train_scaled, dtype=torch.float32)
val_X_masked = torch.tensor(val_X_masked, dtype=torch.float32)
val_X_ori = torch.tensor(val_X_ori, dtype=torch.float32)

train_scaled = train_scaled.to(device)
val_X_masked = val_X_masked.to(device)
val_X_ori = val_X_ori.to(device)


#MLflow set up
mlflow.set_tracking_uri("http://localhost:5000")
client = mlflow.tracking.MlflowClient()
mlflow.set_experiment("GP_VAE_2")


# Optuna objective function
def objective(trial):
    params = {
        "lr": trial.suggest_float("lr", 1e-4, 1e-3, log=True),
        "epochs": trial.suggest_int("epochs", 10, 50),
        "batch_size": trial.suggest_int("batch_size", 32, 128, step=32),
        "length_scale": trial.suggest_float("length_scale",0.5,5.0),
        "beta": trial.suggest_float("beta",0.1,1.0)
 }

    with mlflow.start_run(run_name="GP-VAE-Trial", nested=True) as run:
        mlflow.log_params(params)

        gp_vae = GPVAE(
            n_steps=data_reshaped.shape[1],
            n_features=data_reshaped.shape[2],
            latent_size=37, #should be the latent dimensions 
            encoder_sizes=(128,128), #should I change it here too?
            decoder_sizes=(256,256), #should I change the model size?
            kernel="cauchy",
            beta=params["beta"], #The weight of KL divergence in ELBO
            M=1,  #The number of Monte Carlo samples for ELBO estimation during training.
            K=1,  #The number of importance weights for IWAE model training loss.
            sigma=1.005, # The scale parameter for a kernel function
            length_scale=params["length_scale"], #The length scale parameter for a kernel function
            kernel_scales=1, #The number of different length scales over latent space dimensions
            window_size=24,  # Window size for the inference CNN.
            batch_size=params["batch_size"],
            # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
            epochs=params["epochs"],
            # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
            # You can leave it to defualt as None to disable early stopping.
            patience=3,
            # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
            # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
            optimizer=Adam(lr=params["lr"]),
            # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
            # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
            # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
            num_workers=0,
            # just leave it to default as None, PyPOTS will automatically assign the best device for you.
            # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
            device=device,
            # set the path for saving tensorboard and trained model files 
            saving_path="/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model",
            # only save the best model after training finished.
            # You can also set it as "better" to save models performing better ever during training.
            model_saving_strategy="best",
        )




        # train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
        gp_vae.fit(train_set={"X": train_scaled}, val_set={"X": val_X_masked, "X_ori": val_X_ori})
        gp_vae_results = gp_vae.predict({"X": test_X_masked}, n_sampling_times=2)
        gp_vae_imputation = gp_vae_results["imputation"]

        print(f"The shape of gp_vae_imputation is {gp_vae_imputation.shape}")

        # for error calculation, we need to take the mean value of the multiple samplings for each data sample
        mean_gp_vae_imputation = gp_vae_imputation.mean(axis=1)

        test_imputation_denorm = inverse_scale(mean_gp_vae_imputation, scalers)
        test_ori_denorm = inverse_scale(test_X_ori, scalers)


         # Calculate metrics
        mae_per_feature = []
        rmse_per_feature=[]
        percentage_mae_per_feature = []

        for i in range(n_features):
            imputation_i = test_imputation_denorm[:, :, i]
            ground_truth_i = test_ori_denorm[:, :, i]
            mask_i = test_mask[:, :, i]
            if np.isnan(imputation_i).any() or np.isnan(ground_truth_i).any():
                continue
            mae_i = calc_mae(imputation_i, ground_truth_i, mask_i)
            mae_per_feature.append(mae_i)
            rmse_i = np.sqrt(mean_squared_error(imputation_i, ground_truth_i))
            rmse_per_feature.append(rmse_i)

            #Calculate the original standard deviation for the feature
            std_dev_i = np.std(ground_truth_i[mask_i == 1])
             # Calculate the percentage of MAE relative to the standard deviation   
            if std_dev_i != 0:
                percentage_mae_i = (mae_i / std_dev_i) * 100
                percentage_mae_per_feature.append(percentage_mae_i)
            else:
                 percentage_mae_i = float('inf')
            
            mlflow.log_metric(f"MAE_{feature_names[i]}", mae_i)
            mlflow.log_metric(f"RMSE_{feature_names[i]}",rmse_i)
            mlflow.log_metric(f"Percentage_MAE_{feature_names[i]}", percentage_mae_i)

        avg_mae = np.mean(mae_per_feature)
        avg_rmse=np.mean(rmse_per_feature)
       
        mlflow.log_metric("avg_mae", avg_mae)
        mlflow.log_metric("avg_rmse", avg_rmse)

        trial.set_user_attr("mlflow_run_id", run.info.run_id)

        return avg_mae

    print("MAE per feature:", mae_per_feature)
    print("RMSE per feature",rmse_per_feature)
    print("Percentage MAE per feature:", percentage_mae_per_feature)
   

# Run Optuna study
mlflow.set_experiment("GP-VAE-2")
with mlflow.start_run(run_name="GPVAE_Optuna_Study(2)") as parent_run:
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)

    best_params = study.best_trial.params
    best_value = study.best_trial.value
    best_run_id = study.best_trial.user_attrs["mlflow_run_id"]


    
    # Log best parameters
    mlflow.log_params(best_params)

    # Log best metric(s)
    mlflow.log_metric("best_objective_value", best_value)
    mlflow.log_param("best_run_id", best_run_id)

    print("Best Parameters:", best_params)
    print("Best Objective Value:", best_value)

Column time has 0 NaN values
Column time has 0.0 Missing_rate
Column fr_eng has 0 NaN values
Column fr_eng has 0.0 Missing_rate
Column te_exh_cyl_out__0 has 0 NaN values
Column te_exh_cyl_out__0 has 0.0 Missing_rate
Column pd_air_ic__0 has 0 NaN values
Column pd_air_ic__0 has 0.0 Missing_rate
Column pr_exh_turb_out__0 has 316581 NaN values
Column pr_exh_turb_out__0 has 1.0 Missing_rate
Column te_air_ic_out__0 has 0 NaN values
Column te_air_ic_out__0 has 0.0 Missing_rate
Column te_seawater has 0 NaN values
Column te_seawater has 0.0 Missing_rate
Column te_air_comp_in_a__0 has 316581 NaN values
Column te_air_comp_in_a__0 has 1.0 Missing_rate
Column te_air_comp_in_b__0 has 316581 NaN values
Column te_air_comp_in_b__0 has 1.0 Missing_rate
Column fr_tc__0 has 316581 NaN values
Column fr_tc__0 has 1.0 Missing_rate
Column pr_baro has 0 NaN values
Column pr_baro has 0.0 Missing_rate
Column pd_air_ic__0_1 has 0 NaN values
Column pd_air_ic__0_1 has 0.0 Missing_rate
Column pr_exh_rec has 0 NaN va

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
[I 2025-05-20 00:46:12,028] A new study created in memory with name: no-name-4d0fbc41-c27f-47d5-a555-65015c7c158d
2025-05-20 00:46:12 [INFO]: Using the given device: cuda
2025-05-20 00:46:12 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004612
2025-05-20 00:46:12 [INFO]: Tensorboard file will be saved to /home/ec2-use

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:46:32,375] Trial 0 finished with value: 7681.086558596051 and parameters: {'lr': 0.00035745162270512613, 'epochs': 30, 'batch_size': 128, 'length_scale': 2.0077899412830975, 'beta': 0.7618304568241496}. Best is trial 0 with value: 7681.086558596051.
2025-05-20 00:46:32 [INFO]: Using the given device: cuda
2025-05-20 00:46:32 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004632
2025-05-20 00:46:32 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004632/tensorboard
2025-05-20 00:46:32 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/a0bb5ff8807f4059a8d3e3bff6fd3cb4
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:46:33 [INFO]: Epoch 001 - training loss (default): 729.5931, validation loss: 401.3451
2025-05-20 00:46:33 [INFO]: Epoch 002 - training loss (default): 625.8241, validation loss: 395.7056
2025-05-20 00:46:34 [INFO]: Epoch 003 - training loss (default): 617.7882, validation loss: 398.6986
2025-05-20 00:46:34 [INFO]: Epoch 004 - training loss (default): 614.0817, validation loss: 400.5087
2025-05-20 00:46:35 [INFO]: Epoch 005 - training loss (default): 611.5978, validation loss: 401.1664
2025-05-20 00:46:35 [INFO]: Exceeded the training patience. Terminating the training procedure...
2025-05-20 00:46:35 [INFO]: Finished training. The best model is from epoch#2.
2025-05-20 00:46:35 [INFO]: Saved the model to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004632/GPVAE.pypots


The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:46:36,037] Trial 1 finished with value: 7896.3234836858055 and parameters: {'lr': 0.0009253207967568482, 'epochs': 15, 'batch_size': 128, 'length_scale': 1.391635180263604, 'beta': 0.8536322035686299}. Best is trial 0 with value: 7681.086558596051.
2025-05-20 00:46:36 [INFO]: Using the given device: cuda
2025-05-20 00:46:36 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004636
2025-05-20 00:46:36 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004636/tensorboard
2025-05-20 00:46:36 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/e276d6114fb54f20ab88bd491f4f0e84
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:46:36 [INFO]: Epoch 001 - training loss (default): 35626.6787, validation loss: 54946.1786
2025-05-20 00:46:37 [INFO]: Epoch 002 - training loss (default): 18566.2784, validation loss: 53376.8956
2025-05-20 00:46:38 [INFO]: Epoch 003 - training loss (default): 18473.0830, validation loss: 53134.3871
2025-05-20 00:46:38 [INFO]: Epoch 004 - training loss (default): 18466.0904, validation loss: 52986.6506
2025-05-20 00:46:39 [INFO]: Epoch 005 - training loss (default): 18462.0286, validation loss: 52842.9027
2025-05-20 00:46:40 [INFO]: Epoch 006 - training loss (default): 18459.3339, validation loss: 52710.0497
2025-05-20 00:46:40 [INFO]: Epoch 007 - training loss (default): 18457.4564, validation loss: 52575.4563
2025-05-20 00:46:41 [INFO]: Epoch 008 - training loss (default): 18455.9654, validation loss: 52450.3782
2025-05-20 00:46:42 [INFO]: Epoch 009 - training loss (default): 18454.7935, validation loss: 52335.2809
2025-05-20 00:46:42 [INFO]: Epoch 010 - training loss (

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:46:52,895] Trial 2 finished with value: 7862.8214861212355 and parameters: {'lr': 0.00049221691738657, 'epochs': 24, 'batch_size': 96, 'length_scale': 4.5554361322447745, 'beta': 0.7088218812233893}. Best is trial 0 with value: 7681.086558596051.
2025-05-20 00:46:52 [INFO]: Using the given device: cuda
2025-05-20 00:46:52 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004652
2025-05-20 00:46:52 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004652/tensorboard
2025-05-20 00:46:52 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/642a52e8aff94c989f2896bd9bd20501
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:46:53 [INFO]: Epoch 001 - training loss (default): 740.3638, validation loss: 281.3164
2025-05-20 00:46:54 [INFO]: Epoch 002 - training loss (default): 593.7057, validation loss: 275.4472
2025-05-20 00:46:55 [INFO]: Epoch 003 - training loss (default): 553.5827, validation loss: 272.9328
2025-05-20 00:46:56 [INFO]: Epoch 004 - training loss (default): 547.3174, validation loss: 271.7761
2025-05-20 00:46:56 [INFO]: Epoch 005 - training loss (default): 545.1988, validation loss: 271.1328
2025-05-20 00:46:57 [INFO]: Epoch 006 - training loss (default): 543.6671, validation loss: 270.7908
2025-05-20 00:46:58 [INFO]: Epoch 007 - training loss (default): 542.7140, validation loss: 270.5605
2025-05-20 00:46:59 [INFO]: Epoch 008 - training loss (default): 542.0782, validation loss: 270.3504
2025-05-20 00:47:00 [INFO]: Epoch 009 - training loss (default): 541.5363, validation loss: 270.1222
2025-05-20 00:47:01 [INFO]: Epoch 010 - training loss (default): 541.1547, validation loss:

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:47:10,664] Trial 3 finished with value: 7742.100039130347 and parameters: {'lr': 0.0001096864767395611, 'epochs': 21, 'batch_size': 64, 'length_scale': 1.4772926481933586, 'beta': 0.45541287831529687}. Best is trial 0 with value: 7681.086558596051.
2025-05-20 00:47:10 [INFO]: Using the given device: cuda
2025-05-20 00:47:10 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004710
2025-05-20 00:47:10 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004710/tensorboard
2025-05-20 00:47:10 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/4af395aea1af42af82f40a40d0f9a076
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:47:11 [INFO]: Epoch 001 - training loss (default): 568.2737, validation loss: 109.5377
2025-05-20 00:47:12 [INFO]: Epoch 002 - training loss (default): 493.4547, validation loss: 107.7137
2025-05-20 00:47:13 [INFO]: Epoch 003 - training loss (default): 481.3378, validation loss: 107.1298
2025-05-20 00:47:14 [INFO]: Epoch 004 - training loss (default): 479.7130, validation loss: 106.7201
2025-05-20 00:47:14 [INFO]: Epoch 005 - training loss (default): 478.8173, validation loss: 106.3582
2025-05-20 00:47:15 [INFO]: Epoch 006 - training loss (default): 478.2687, validation loss: 106.0288
2025-05-20 00:47:16 [INFO]: Epoch 007 - training loss (default): 477.7586, validation loss: 105.7073
2025-05-20 00:47:17 [INFO]: Epoch 008 - training loss (default): 477.3618, validation loss: 105.3650
2025-05-20 00:47:18 [INFO]: Epoch 009 - training loss (default): 476.9641, validation loss: 105.0312
2025-05-20 00:47:18 [INFO]: Epoch 010 - training loss (default): 476.2958, validation loss:

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:47:31,342] Trial 4 finished with value: 7755.536186999624 and parameters: {'lr': 0.00017410500622537273, 'epochs': 25, 'batch_size': 64, 'length_scale': 0.73933055149302, 'beta': 0.6678557067755405}. Best is trial 0 with value: 7681.086558596051.
2025-05-20 00:47:31 [INFO]: Using the given device: cuda
2025-05-20 00:47:31 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004731
2025-05-20 00:47:31 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004731/tensorboard
2025-05-20 00:47:31 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/827b357822f6422192b115a4fca5ead4
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:47:32 [INFO]: Epoch 001 - training loss (default): 581.5353, validation loss: 165.5222
2025-05-20 00:47:32 [INFO]: Epoch 002 - training loss (default): 518.2326, validation loss: 163.1140
2025-05-20 00:47:33 [INFO]: Epoch 003 - training loss (default): 514.8347, validation loss: 162.1802
2025-05-20 00:47:34 [INFO]: Epoch 004 - training loss (default): 513.6145, validation loss: 161.4055
2025-05-20 00:47:34 [INFO]: Epoch 005 - training loss (default): 513.1113, validation loss: 160.7555
2025-05-20 00:47:35 [INFO]: Epoch 006 - training loss (default): 512.5496, validation loss: 160.0362
2025-05-20 00:47:36 [INFO]: Epoch 007 - training loss (default): 511.9527, validation loss: 159.2276
2025-05-20 00:47:36 [INFO]: Epoch 008 - training loss (default): 511.3680, validation loss: 158.3934
2025-05-20 00:47:37 [INFO]: Epoch 009 - training loss (default): 511.0814, validation loss: 157.7014
2025-05-20 00:47:38 [INFO]: Epoch 010 - training loss (default): 510.5827, validation loss:

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:47:48,877] Trial 5 finished with value: 7661.802608801183 and parameters: {'lr': 0.0007334929012456116, 'epochs': 25, 'batch_size': 96, 'length_scale': 0.9672766126928345, 'beta': 0.77408893354034}. Best is trial 5 with value: 7661.802608801183.
2025-05-20 00:47:48 [INFO]: Using the given device: cuda
2025-05-20 00:47:48 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004748
2025-05-20 00:47:48 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004748/tensorboard
2025-05-20 00:47:48 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/c9fcc321d3b64eb79486182c9914367b
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:47:50 [INFO]: Epoch 001 - training loss (default): 534.9839, validation loss: 132.9820
2025-05-20 00:47:51 [INFO]: Epoch 002 - training loss (default): 496.3154, validation loss: 131.8449
2025-05-20 00:47:52 [INFO]: Epoch 003 - training loss (default): 494.8530, validation loss: 131.0219
2025-05-20 00:47:53 [INFO]: Epoch 004 - training loss (default): 494.0383, validation loss: 130.3757
2025-05-20 00:47:54 [INFO]: Epoch 005 - training loss (default): 493.4082, validation loss: 129.9068
2025-05-20 00:47:56 [INFO]: Epoch 006 - training loss (default): 492.8038, validation loss: 129.4393
2025-05-20 00:47:57 [INFO]: Epoch 007 - training loss (default): 492.4928, validation loss: 129.0161
2025-05-20 00:47:58 [INFO]: Epoch 008 - training loss (default): 492.1489, validation loss: 128.7140
2025-05-20 00:47:59 [INFO]: Epoch 009 - training loss (default): 491.7445, validation loss: 128.3860
2025-05-20 00:48:00 [INFO]: Epoch 010 - training loss (default): 491.0345, validation loss:

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:48:16,161] Trial 6 finished with value: 7674.316824349503 and parameters: {'lr': 0.00032159696898646084, 'epochs': 22, 'batch_size': 32, 'length_scale': 0.866207336845062, 'beta': 0.7170308384584969}. Best is trial 5 with value: 7661.802608801183.
2025-05-20 00:48:16 [INFO]: Using the given device: cuda
2025-05-20 00:48:16 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004816
2025-05-20 00:48:16 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004816/tensorboard
2025-05-20 00:48:16 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/c03faa9cfad34cd1a1651f347b97eb1e
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:48:16 [INFO]: Epoch 001 - training loss (default): 564.2214, validation loss: 82.5144
2025-05-20 00:48:17 [INFO]: Epoch 002 - training loss (default): 515.3027, validation loss: 81.7079
2025-05-20 00:48:18 [INFO]: Epoch 003 - training loss (default): 487.2850, validation loss: 80.8402
2025-05-20 00:48:18 [INFO]: Epoch 004 - training loss (default): 479.8074, validation loss: 80.3979
2025-05-20 00:48:19 [INFO]: Epoch 005 - training loss (default): 477.9726, validation loss: 80.1681
2025-05-20 00:48:20 [INFO]: Epoch 006 - training loss (default): 476.9682, validation loss: 79.9906
2025-05-20 00:48:20 [INFO]: Epoch 007 - training loss (default): 476.1901, validation loss: 79.8403
2025-05-20 00:48:21 [INFO]: Epoch 008 - training loss (default): 475.7935, validation loss: 79.7118
2025-05-20 00:48:22 [INFO]: Epoch 009 - training loss (default): 475.5791, validation loss: 79.5959
2025-05-20 00:48:22 [INFO]: Epoch 010 - training loss (default): 475.3203, validation loss: 79.4917


The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:48:29,680] Trial 7 finished with value: 7759.442389013492 and parameters: {'lr': 0.00022249164455507314, 'epochs': 20, 'batch_size': 128, 'length_scale': 0.9592040339838777, 'beta': 0.3767376894957277}. Best is trial 5 with value: 7661.802608801183.
2025-05-20 00:48:29 [INFO]: Using the given device: cuda
2025-05-20 00:48:29 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004829
2025-05-20 00:48:29 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004829/tensorboard
2025-05-20 00:48:29 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/7f358a3698e740f2b579256bd6bb56be
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:48:30 [INFO]: Epoch 001 - training loss (default): 635.7738, validation loss: 206.5086
2025-05-20 00:48:31 [INFO]: Epoch 002 - training loss (default): 522.5095, validation loss: 200.7939
2025-05-20 00:48:32 [INFO]: Epoch 003 - training loss (default): 507.4184, validation loss: 198.7440
2025-05-20 00:48:32 [INFO]: Epoch 004 - training loss (default): 504.4792, validation loss: 197.5117
2025-05-20 00:48:33 [INFO]: Epoch 005 - training loss (default): 503.5044, validation loss: 196.5619
2025-05-20 00:48:34 [INFO]: Epoch 006 - training loss (default): 502.5220, validation loss: 195.8170
2025-05-20 00:48:35 [INFO]: Epoch 007 - training loss (default): 501.8651, validation loss: 195.2048
2025-05-20 00:48:36 [INFO]: Epoch 008 - training loss (default): 501.5599, validation loss: 194.6842
2025-05-20 00:48:36 [INFO]: Epoch 009 - training loss (default): 501.2877, validation loss: 194.2250
2025-05-20 00:48:37 [INFO]: Epoch 010 - training loss (default): 500.9291, validation loss:

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:48:59,020] Trial 8 finished with value: 7720.197863605609 and parameters: {'lr': 0.0001669510261491101, 'epochs': 36, 'batch_size': 64, 'length_scale': 1.6510148682787178, 'beta': 0.22503744769961148}. Best is trial 5 with value: 7661.802608801183.
2025-05-20 00:48:59 [INFO]: Using the given device: cuda
2025-05-20 00:48:59 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004859
2025-05-20 00:48:59 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004859/tensorboard
2025-05-20 00:48:59 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/873d07d8ff154f7d935a18218765ad47
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:48:59 [INFO]: Epoch 001 - training loss (default): 1083.4903, validation loss: 649.3427
2025-05-20 00:49:00 [INFO]: Epoch 002 - training loss (default): 850.0285, validation loss: 638.8513
2025-05-20 00:49:00 [INFO]: Epoch 003 - training loss (default): 695.8480, validation loss: 629.3583
2025-05-20 00:49:01 [INFO]: Epoch 004 - training loss (default): 642.3141, validation loss: 623.4281
2025-05-20 00:49:02 [INFO]: Epoch 005 - training loss (default): 627.9804, validation loss: 619.8538
2025-05-20 00:49:02 [INFO]: Epoch 006 - training loss (default): 623.6435, validation loss: 617.3318
2025-05-20 00:49:03 [INFO]: Epoch 007 - training loss (default): 621.6120, validation loss: 615.3878
2025-05-20 00:49:04 [INFO]: Epoch 008 - training loss (default): 620.0744, validation loss: 613.7218
2025-05-20 00:49:04 [INFO]: Epoch 009 - training loss (default): 618.9038, validation loss: 611.8712
2025-05-20 00:49:05 [INFO]: Epoch 010 - training loss (default): 618.0272, validation loss

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:49:17,764] Trial 9 finished with value: 7701.9150700948885 and parameters: {'lr': 0.00015876258804683774, 'epochs': 28, 'batch_size': 128, 'length_scale': 1.846541739251264, 'beta': 0.4270146895828756}. Best is trial 5 with value: 7661.802608801183.
2025-05-20 00:49:17 [INFO]: Using the given device: cuda
2025-05-20 00:49:17 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004917
2025-05-20 00:49:17 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004917/tensorboard
2025-05-20 00:49:17 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/3686f94e1386462d8938b93c7dae2d8b
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:49:18 [INFO]: Epoch 001 - training loss (default): 13863.1958, validation loss: 26058.5918
2025-05-20 00:49:19 [INFO]: Epoch 002 - training loss (default): 8647.5544, validation loss: 25423.8794
2025-05-20 00:49:19 [INFO]: Epoch 003 - training loss (default): 8643.3694, validation loss: 25383.1621
2025-05-20 00:49:20 [INFO]: Epoch 004 - training loss (default): 8642.2802, validation loss: 25369.1401
2025-05-20 00:49:21 [INFO]: Epoch 005 - training loss (default): 8641.5062, validation loss: 25359.5989
2025-05-20 00:49:21 [INFO]: Epoch 006 - training loss (default): 8640.8433, validation loss: 25349.9904
2025-05-20 00:49:22 [INFO]: Epoch 007 - training loss (default): 8640.2585, validation loss: 25339.1475
2025-05-20 00:49:23 [INFO]: Epoch 008 - training loss (default): 8639.8814, validation loss: 25329.8809
2025-05-20 00:49:23 [INFO]: Epoch 009 - training loss (default): 8639.5593, validation loss: 25317.7351
2025-05-20 00:49:24 [INFO]: Epoch 010 - training loss (default)

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:49:50,305] Trial 10 finished with value: 7702.747248157424 and parameters: {'lr': 0.0009881062146305788, 'epochs': 47, 'batch_size': 96, 'length_scale': 3.2403571323096902, 'beta': 0.9917468362947446}. Best is trial 5 with value: 7661.802608801183.
2025-05-20 00:49:50 [INFO]: Using the given device: cuda
2025-05-20 00:49:50 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004950
2025-05-20 00:49:50 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T004950/tensorboard
2025-05-20 00:49:50 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/eb2c14bc505b41a2a558fff2232645fe
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:49:51 [INFO]: Epoch 001 - training loss (default): 4635.4631, validation loss: 10423.6187
2025-05-20 00:49:52 [INFO]: Epoch 002 - training loss (default): 3570.1056, validation loss: 10368.1017
2025-05-20 00:49:54 [INFO]: Epoch 003 - training loss (default): 3568.0122, validation loss: 10318.1197
2025-05-20 00:49:55 [INFO]: Epoch 004 - training loss (default): 3566.6923, validation loss: 10272.8298
2025-05-20 00:49:56 [INFO]: Epoch 005 - training loss (default): 3565.8259, validation loss: 10232.6030
2025-05-20 00:49:58 [INFO]: Epoch 006 - training loss (default): 3565.2266, validation loss: 10195.4661
2025-05-20 00:49:59 [INFO]: Epoch 007 - training loss (default): 3564.8014, validation loss: 10160.0702
2025-05-20 00:50:00 [INFO]: Epoch 008 - training loss (default): 3564.5367, validation loss: 10128.7408
2025-05-20 00:50:01 [INFO]: Epoch 009 - training loss (default): 3564.3861, validation loss: 10101.0913
2025-05-20 00:50:02 [INFO]: Epoch 010 - training loss (default):

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:50:03,476] Trial 11 finished with value: 7689.0795888830135 and parameters: {'lr': 0.0005748641475135172, 'epochs': 10, 'batch_size': 32, 'length_scale': 2.96713408813375, 'beta': 0.6041600669712559}. Best is trial 5 with value: 7661.802608801183.
2025-05-20 00:50:03 [INFO]: Using the given device: cuda
2025-05-20 00:50:03 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005003
2025-05-20 00:50:03 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005003/tensorboard
2025-05-20 00:50:03 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/894629cbddc54ec785154a8a0662012a
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:50:04 [INFO]: Epoch 001 - training loss (default): 502.2495, validation loss: 123.2171
2025-05-20 00:50:06 [INFO]: Epoch 002 - training loss (default): 473.7430, validation loss: 121.9633
2025-05-20 00:50:07 [INFO]: Epoch 003 - training loss (default): 472.7724, validation loss: 120.4752
2025-05-20 00:50:08 [INFO]: Epoch 004 - training loss (default): 471.6438, validation loss: 118.5636
2025-05-20 00:50:10 [INFO]: Epoch 005 - training loss (default): 470.1152, validation loss: 116.0891
2025-05-20 00:50:11 [INFO]: Epoch 006 - training loss (default): 469.1584, validation loss: 113.6763
2025-05-20 00:50:12 [INFO]: Epoch 007 - training loss (default): 468.8160, validation loss: 111.7024
2025-05-20 00:50:13 [INFO]: Epoch 008 - training loss (default): 468.5682, validation loss: 109.9479
2025-05-20 00:50:15 [INFO]: Epoch 009 - training loss (default): 468.4283, validation loss: 108.2748
2025-05-20 00:50:16 [INFO]: Epoch 010 - training loss (default): 468.3826, validation loss:

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[W 2025-05-20 00:52:53,386] Trial 12 failed with parameters: {'lr': 0.0005489003043475935, 'epochs': 37, 'batch_size': 32, 'length_scale': 0.5911738963397587, 'beta': 0.8838948069479122} because of the following error: MlflowException("API request to http://localhost:5000/api/2.0/mlflow/runs/get failed with exception HTTPConnectionPool(host='localhost', port=5000): Max retries exceeded with url: /api/2.0/mlflow/runs/get?run_uuid=622e0a78641f42fdbfa5a25f0cd22ed4&run_id=622e0a78641f42fdbfa5a25f0cd22ed4 (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f1a967cfd60>: Failed to establish a new connection: [Errno 111] Connection refused'))").
Traceback (most recent call last):
  File "/home/ec2-user/SageMaker/sensor-imputation-thesis/.venv/lib/python3.10/site-packages/urllib3/connection.py", line 199, in _new_conn
    sock = connection.create_connection(
  File "/home/ec2-user/SageMaker/sensor-imputation-thesis/.venv/lib/python3.10/site-packages/urllib3/util/conn

MlflowException: API request to http://localhost:5000/api/2.0/mlflow/runs/get failed with exception HTTPConnectionPool(host='localhost', port=5000): Max retries exceeded with url: /api/2.0/mlflow/runs/get?run_uuid=886d8065455e497ab608c33e67318f7c&run_id=886d8065455e497ab608c33e67318f7c (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f1a966c33d0>: Failed to establish a new connection: [Errno 111] Connection refused'))

In [None]:
#Import Pypots Library
from pypots.optim import Adam
from pypots.imputation import GPVAE
#from pypots.utils.metrics import calc_mae
from pypots.nn.functional import calc_mae


import argparse
import hashlib
from pathlib import Path

import matplotlib.pyplot as plt
import mlflow
import mlflow.pytorch
import numpy as np
import pandas as pd
import seaborn as sns
import torch
import torch.utils.data
import data_insight
from data_insight import setup_duckdb
from duckdb import DuckDBPyConnection as DuckDB
from duckdb import DuckDBPyRelation as Relation
from pathlib import Path
import hashlib
from duckdb import DuckDBPyConnection as DuckDB
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import optuna 
from optuna.visualization import plot_optimization_history




from torch import nn, optim
from torch.nn import functional as F
from torch.utils.data import TensorDataset, Dataset
from pygrinder.missing_completely_at_random import mcar
from tqdm.auto import tqdm

import sensor_imputation_thesis.shared.load_data as load

torch.cuda.empty_cache()
#PatchTST might be an ideal choise if SAITS is too slow 

##Drop columns with different indexes while loading data.. Or the mean values 

df=pd.read_parquet("/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/ny_df_for_pypots.parquet")

len(df)

#current length of the dataframe is 119439

# Check nan values in each column
for col in df.columns:
    print(f"Column {col} has {df[col].isna().sum()} NaN values")
    missing_rate=df[col].isna().sum()/len(df[col])
    print(f"Column {col} has {missing_rate} Missing_rate")


#Try with smaller dataset, size 4000
##SAMPLE the percengtage of the dataset, df.sample (averagely pick samples)
#not df.sample cuz it will randomly select 
original_size=len(df)
desired_fraction=0.3 #Select data every 3 minutes 
step=int(1/desired_fraction) #step_size=10 (sample every 10th (3/10) minute)

#Systematic sampling: Start at a random offset to avoid bias 
start=np.random.randint(0,step) #Random start between 0-9
df1=df.iloc[start::step].reset_index(drop=True)

print(f"Original size:{len(df)}, Sampled size: {len(df1)}")



# Custom Dataset class
class Dataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Data processing code
sensor_cols = [col for col in df1.columns if col != "time"]
data = df1[sensor_cols].values

#¤get feature names for printing mae later 
feature_names=df1[sensor_cols].columns.tolist()

## Convert data to 3D arrays of shape n_samples, n_timesteps, n_features, X_ori refers to the original data without missing values 
## Reconstruct all columns simultaneously  #num_features: 119
n_features = data.shape[1]  # exclude the time column
n_steps = 20 #60 (was 60 previously) #(TRY TO CHANGE HERE)  # # window length, 1440 steps = 24 hours of 1-minute data, but here is revised to 60 again
#total_elements = data.shape[0] * data.shape[1]
n_samples = data.shape[0] // n_steps 



# Reshape to (n_samples // n_steps, n_steps, n_features)
#data_reshaped = data.reshape((n_samples, n_steps, n_features))
data_reshaped=data[:n_samples*n_steps].reshape(n_samples,n_steps,n_features)
print(f"Reshaped data:{data.shape}")

#Split into train, test, val, fit scaler only on the train set (prevent data leakage)

#train_size = int(0.6 * len(data))
#val_size = int(0.2 * len(data))
#test_size = len(data) - train_size - val_size

#train_data = data_reshaped[:train_size]
#val_data = data_reshaped[train_size:train_size + val_size]
#test_data= data_reshaped[train_size + val_size:]


#Apply time series split 
#Split into train(60%), val(20%), and test (20%)
train_data, temp_data=train_test_split(data_reshaped,test_size=0.4,shuffle=True)
val_data, test_data=train_test_split(temp_data, test_size=0.5, shuffle=False)

##Normalization is important because of the nature of mse calculation of saits, columns with large 
#values dominate the loss, making metrics meaningless. SAITS computes MSE/MAE column-wise and averages 
#them across all columns 
#  Apply minmax scaler here 
#normalize each feature independently
scalers={}


#train_scaled = np.zeros_like(data_reshaped[train_size])  # Initialize the normalized data array
#val_scaled=np.zeros_like(data_reshaped[train_size:train_size + val_size])
#test_scaled=np.zeros_like(data_reshaped[train_size + val_size:])

train_scaled = np.zeros_like(train_data)
val_scaled = np.zeros_like(val_data)
test_scaled = np.zeros_like(test_data)



for i in range(data_reshaped.shape[2]):
    scaler = MinMaxScaler(feature_range=(-1, 1)) #changed to -1,1
    # Flatten timesteps and samples for scaling
    train_scaled[:, :, i] = scaler.fit_transform(train_data[:, :, i].reshape(-1, 1)).reshape(train_data.shape[0], train_data.shape[1])
    val_scaled[:, :, i] = scaler.transform(val_data[:, :, i].reshape(-1, 1)).reshape(val_data.shape[0], val_data.shape[1])
    test_scaled[:, :, i] = scaler.transform(test_data[:, :, i].reshape(-1, 1)).reshape(test_data.shape[0], test_data.shape[1])
    scalers[i] = scaler  # Save scalers to inverse-transform later

#Inverse Scale
def inverse_scale(imputation, scalers):
    n_samples, n_timesteps, n_features = imputation.shape
    imputation_denorm = np.empty_like(imputation)
    
    for i in range(n_features):
        reshaped = imputation[:, :, i].reshape(-1, 1)
        inversed = scalers[i].inverse_transform(reshaped)
        imputation_denorm[:, :, i] = inversed.reshape(n_samples, n_timesteps)
    
    return imputation_denorm



#Optional: Artificially mask. Mask 20% of the data (MIT part). Try masking 30% here 
def mcar_f(X, mask_ratio=0.3):
    """Apply MCAR only to observed values."""
    observed_mask=~np.isnan(X) #find observed positions
    artificial_mask=mcar(X,mask_ratio).astype(bool) #generate MCAR mask, cast to boolean
    #combine masks 
    combined_mask=observed_mask & artificial_mask

    #Apply masking
    X_masked=X.copy()
    X_masked[combined_mask]=np.nan
    return X_masked,combined_mask


#Use mcar on validation data 
val_X_masked, val_mask =mcar_f(val_scaled)
val_X_ori=val_scaled.copy() 

test_X_masked, test_mask =mcar_f(test_scaled)
test_X_ori=test_scaled.copy() 


#?? Problem: Can't have the best input for testing
#1.Create synthetic test_data cuz if I drop nan values for test set, there's basically nothing left
#synthetic_data=np.random.randn(n_samples,n_steps,n_features)
#test_X_masked,test_mask=mcar_f(synthetic_data)
#test_X_ori=synthetic_data.copy() #Ground truth

# 2, Ensure no NaN values in synthetic data
#test_X_masked = np.nan_to_num(test_X_masked, nan=np.nanmean(test_X_masked))
#test_X_ori = np.nan_to_num(test_X_ori, nan=np.nanmean(test_X_ori))



class Config:
    no_cuda = False
    no_mps = False
    seed = 1

args=Config()

torch.manual_seed(args.seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(args.seed)
np.random.seed(args.seed)


args.cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()

args.cuda = not args.no_cuda and torch.cuda.is_available()
print("CUDA available:", torch.cuda.is_available())


if args.cuda:
    device = torch.device("cuda")
    print("Using CUDA")
elif use_mps:
    device = torch.device("mps")
    print("Using MPS")
else:
    device = torch.device("cpu")
    print("Using CPU")

train_scaled = torch.tensor(train_scaled, dtype=torch.float32)
val_X_masked = torch.tensor(val_X_masked, dtype=torch.float32)
val_X_ori = torch.tensor(val_X_ori, dtype=torch.float32)

train_scaled = train_scaled.to(device)
val_X_masked = val_X_masked.to(device)
val_X_ori = val_X_ori.to(device)


#MLflow set up
mlflow.set_tracking_uri("http://localhost:5000")
client = mlflow.tracking.MlflowClient()
mlflow.set_experiment("GP_VAE_2")


# Optuna objective function
def objective(trial):
    params = {
        "lr": trial.suggest_float("lr", 1e-4, 1e-3, log=True),
        "epochs": trial.suggest_int("epochs", 10, 50),
        "batch_size": trial.suggest_int("batch_size", 32, 128, step=32),
        "length_scale": trial.suggest_float("length_scale",0.5,5.0),
        "beta": trial.suggest_float("beta",0.1,1.0)
 }

    with mlflow.start_run(run_name="GP-VAE-Trial", nested=True) as run:
        mlflow.log_params(params)

        gp_vae = GPVAE(
            n_steps=data_reshaped.shape[1],
            n_features=data_reshaped.shape[2],
            latent_size=37, #should be the latent dimensions 
            encoder_sizes=(128,128), #should I change it here too?
            decoder_sizes=(256,256), #should I change the model size?
            kernel="cauchy",
            beta=params["beta"], #The weight of KL divergence in ELBO
            M=1,  #The number of Monte Carlo samples for ELBO estimation during training.
            K=1,  #The number of importance weights for IWAE model training loss.
            sigma=1.005, # The scale parameter for a kernel function
            length_scale=params["length_scale"], #The length scale parameter for a kernel function
            kernel_scales=1, #The number of different length scales over latent space dimensions
            window_size=24,  # Window size for the inference CNN.
            batch_size=params["batch_size"],
            # here we set epochs=10 for a quick demo, you can set it to 100 or more for better performance
            epochs=params["epochs"],
            # here we set patience=3 to early stop the training if the evaluting loss doesn't decrease for 3 epoches.
            # You can leave it to defualt as None to disable early stopping.
            patience=3,
            # give the optimizer. Different from torch.optim.Optimizer, you don't have to specify model's parameters when
            # initializing pypots.optim.Optimizer. You can also leave it to default. It will initilize an Adam optimizer with lr=0.001.
            optimizer=Adam(lr=params["lr"]),
            # this num_workers argument is for torch.utils.data.Dataloader. It's the number of subprocesses to use for data loading.
            # Leaving it to default as 0 means data loading will be in the main process, i.e. there won't be subprocesses.
            # You can increase it to >1 if you think your dataloading is a bottleneck to your model training speed
            num_workers=0,
            # just leave it to default as None, PyPOTS will automatically assign the best device for you.
            # Set it as 'cpu' if you don't have CUDA devices. You can also set it to 'cuda:0' or 'cuda:1' if you have multiple CUDA devices, even parallelly on ['cuda:0', 'cuda:1']
            device=device,
            # set the path for saving tensorboard and trained model files 
            saving_path="/home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model",
            # only save the best model after training finished.
            # You can also set it as "better" to save models performing better ever during training.
            model_saving_strategy="best",
        )




        # train the model on the training set, and validate it on the validating set to select the best model for testing in the next step
        gp_vae.fit(train_set={"X": train_scaled}, val_set={"X": val_X_masked, "X_ori": val_X_ori})
        gp_vae_results = gp_vae.predict({"X": test_X_masked}, n_sampling_times=2)
        gp_vae_imputation = gp_vae_results["imputation"]

        print(f"The shape of gp_vae_imputation is {gp_vae_imputation.shape}")

        # for error calculation, we need to take the mean value of the multiple samplings for each data sample
        mean_gp_vae_imputation = gp_vae_imputation.mean(axis=1)

        test_imputation_denorm = inverse_scale(mean_gp_vae_imputation, scalers)
        test_ori_denorm = inverse_scale(test_X_ori, scalers)


         # Calculate metrics
        mae_per_feature = []
        rmse_per_feature=[]
        percentage_mae_per_feature = []

        for i in range(n_features):
            imputation_i = test_imputation_denorm[:, :, i]
            ground_truth_i = test_ori_denorm[:, :, i]
            mask_i = test_mask[:, :, i]
            if np.isnan(imputation_i).any() or np.isnan(ground_truth_i).any():
                continue
            mae_i = calc_mae(imputation_i, ground_truth_i, mask_i)
            mae_per_feature.append(mae_i)
            rmse_i = np.sqrt(mean_squared_error(imputation_i, ground_truth_i))
            rmse_per_feature.append(rmse_i)

            #Calculate the original standard deviation for the feature
            std_dev_i = np.std(ground_truth_i[mask_i == 1])
             # Calculate the percentage of MAE relative to the standard deviation   
            if std_dev_i != 0:
                percentage_mae_i = (mae_i / std_dev_i) * 100
                percentage_mae_per_feature.append(percentage_mae_i)
            else:
                 percentage_mae_i = float('inf')
            
            mlflow.log_metric(f"MAE_{feature_names[i]}", mae_i)
            mlflow.log_metric(f"RMSE_{feature_names[i]}",rmse_i)
            mlflow.log_metric(f"Percentage_MAE_{feature_names[i]}", percentage_mae_i)

        avg_mae = np.mean(mae_per_feature)
        avg_rmse=np.mean(rmse_per_feature)
       
        mlflow.log_metric("avg_mae", avg_mae)
        mlflow.log_metric("avg_rmse", avg_rmse)

        trial.set_user_attr("mlflow_run_id", run.info.run_id)

        return avg_mae

    print("MAE per feature:", mae_per_feature)
    print("RMSE per feature",rmse_per_feature)
    print("Percentage MAE per feature:", percentage_mae_per_feature)
   

# Run Optuna study
mlflow.set_experiment("GP-VAE-2")
with mlflow.start_run(run_name="GPVAE_Optuna_Study(2)") as parent_run:
    study = optuna.create_study(direction="minimize")
    study.optimize(objective, n_trials=20)

    best_params = study.best_trial.params
    best_value = study.best_trial.value
    best_run_id = study.best_trial.user_attrs["mlflow_run_id"]


    
    # Log best parameters
    mlflow.log_params(best_params)

    # Log best metric(s)
    mlflow.log_metric("best_objective_value", best_value)
    mlflow.log_param("best_run_id", best_run_id)

    print("Best Parameters:", best_params)
    print("Best Objective Value:", best_value)

Column time has 0 NaN values
Column time has 0.0 Missing_rate
Column fr_eng has 0 NaN values
Column fr_eng has 0.0 Missing_rate
Column te_exh_cyl_out__0 has 0 NaN values
Column te_exh_cyl_out__0 has 0.0 Missing_rate
Column pd_air_ic__0 has 0 NaN values
Column pd_air_ic__0 has 0.0 Missing_rate
Column pr_exh_turb_out__0 has 316581 NaN values
Column pr_exh_turb_out__0 has 1.0 Missing_rate
Column te_air_ic_out__0 has 0 NaN values
Column te_air_ic_out__0 has 0.0 Missing_rate
Column te_seawater has 0 NaN values
Column te_seawater has 0.0 Missing_rate
Column te_air_comp_in_a__0 has 316581 NaN values
Column te_air_comp_in_a__0 has 1.0 Missing_rate
Column te_air_comp_in_b__0 has 316581 NaN values
Column te_air_comp_in_b__0 has 1.0 Missing_rate
Column fr_tc__0 has 316581 NaN values
Column fr_tc__0 has 1.0 Missing_rate
Column pr_baro has 0 NaN values
Column pr_baro has 0.0 Missing_rate
Column pd_air_ic__0_1 has 0 NaN values
Column pd_air_ic__0_1 has 0.0 Missing_rate
Column pr_exh_rec has 0 NaN va

  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
  return xp.asarray(numpy.nanmin(X, axis=axis))
  return xp.asarray(numpy.nanmax(X, axis=axis))
[I 2025-05-20 00:57:43,021] A new study created in memory with name: no-name-3d6a6bf1-75a8-48eb-98d1-69ce229069ff
2025-05-20 00:57:43 [INFO]: Using the given device: cuda
2025-05-20 00:57:43 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005743
2025-05-20 00:57:43 [INFO]: Tensorboard file will be saved to /home/ec2-use

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:58:15,405] Trial 0 finished with value: 7669.867760159568 and parameters: {'lr': 0.00025886707779089533, 'epochs': 47, 'batch_size': 96, 'length_scale': 2.9032511643432954, 'beta': 0.6220489199186152}. Best is trial 0 with value: 7669.867760159568.
2025-05-20 00:58:15 [INFO]: Using the given device: cuda
2025-05-20 00:58:15 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005815
2025-05-20 00:58:15 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005815/tensorboard
2025-05-20 00:58:15 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/27e36dc1f0814678894742cae6a5e15c
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:58:16 [INFO]: Epoch 001 - training loss (default): 5549.1198, validation loss: 9613.2180
2025-05-20 00:58:16 [INFO]: Epoch 002 - training loss (default): 3382.6455, validation loss: 9385.7774
2025-05-20 00:58:17 [INFO]: Epoch 003 - training loss (default): 3378.0290, validation loss: 9369.0943
2025-05-20 00:58:18 [INFO]: Epoch 004 - training loss (default): 3376.5573, validation loss: 9360.9138
2025-05-20 00:58:18 [INFO]: Epoch 005 - training loss (default): 3375.8012, validation loss: 9353.0008
2025-05-20 00:58:19 [INFO]: Epoch 006 - training loss (default): 3375.1069, validation loss: 9345.0131
2025-05-20 00:58:20 [INFO]: Epoch 007 - training loss (default): 3374.4601, validation loss: 9335.8580
2025-05-20 00:58:20 [INFO]: Epoch 008 - training loss (default): 3374.0187, validation loss: 9327.1689
2025-05-20 00:58:21 [INFO]: Epoch 009 - training loss (default): 3373.6748, validation loss: 9317.7734
2025-05-20 00:58:22 [INFO]: Epoch 010 - training loss (default): 3373.579

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:58:33,507] Trial 1 finished with value: 7673.5243591065655 and parameters: {'lr': 0.0007837202581459632, 'epochs': 26, 'batch_size': 96, 'length_scale': 3.010983823446949, 'beta': 0.5227149508146103}. Best is trial 0 with value: 7669.867760159568.
2025-05-20 00:58:33 [INFO]: Using the given device: cuda
2025-05-20 00:58:33 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005833
2025-05-20 00:58:33 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005833/tensorboard
2025-05-20 00:58:33 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/7bcc647ef9c5494196337f197fa83866
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:58:34 [INFO]: Epoch 001 - training loss (default): 544.4703, validation loss: 128.1299
2025-05-20 00:58:35 [INFO]: Epoch 002 - training loss (default): 475.2225, validation loss: 127.5210
2025-05-20 00:58:35 [INFO]: Epoch 003 - training loss (default): 471.4964, validation loss: 127.5934
2025-05-20 00:58:36 [INFO]: Epoch 004 - training loss (default): 470.5382, validation loss: 127.2535
2025-05-20 00:58:37 [INFO]: Epoch 005 - training loss (default): 470.1760, validation loss: 126.8746
2025-05-20 00:58:38 [INFO]: Epoch 006 - training loss (default): 469.8465, validation loss: 126.4669
2025-05-20 00:58:39 [INFO]: Epoch 007 - training loss (default): 469.3890, validation loss: 126.0887
2025-05-20 00:58:39 [INFO]: Epoch 008 - training loss (default): 468.6625, validation loss: 125.7014
2025-05-20 00:58:40 [INFO]: Epoch 009 - training loss (default): 467.7936, validation loss: 125.3369
2025-05-20 00:58:41 [INFO]: Epoch 010 - training loss (default): 466.8862, validation loss:

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:59:10,574] Trial 2 finished with value: 7684.9697188667 and parameters: {'lr': 0.00036432831979090385, 'epochs': 45, 'batch_size': 64, 'length_scale': 0.5334382463286361, 'beta': 0.946294598906401}. Best is trial 0 with value: 7669.867760159568.
2025-05-20 00:59:10 [INFO]: Using the given device: cuda
2025-05-20 00:59:10 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005910
2025-05-20 00:59:10 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005910/tensorboard
2025-05-20 00:59:10 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/40fc11eeaa794325a190bb707f4b6790
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:59:11 [INFO]: Epoch 001 - training loss (default): 489.3693, validation loss: 36.9562
2025-05-20 00:59:12 [INFO]: Epoch 002 - training loss (default): 453.9106, validation loss: 36.6587
2025-05-20 00:59:13 [INFO]: Epoch 003 - training loss (default): 451.9676, validation loss: 36.4459
2025-05-20 00:59:13 [INFO]: Epoch 004 - training loss (default): 451.1696, validation loss: 36.2601
2025-05-20 00:59:14 [INFO]: Epoch 005 - training loss (default): 450.3631, validation loss: 36.0891
2025-05-20 00:59:15 [INFO]: Epoch 006 - training loss (default): 449.3554, validation loss: 35.9154
2025-05-20 00:59:16 [INFO]: Epoch 007 - training loss (default): 448.4563, validation loss: 35.7512
2025-05-20 00:59:17 [INFO]: Epoch 008 - training loss (default): 447.9314, validation loss: 35.6131
2025-05-20 00:59:17 [INFO]: Epoch 009 - training loss (default): 447.6712, validation loss: 35.4632
2025-05-20 00:59:18 [INFO]: Epoch 010 - training loss (default): 447.3082, validation loss: 35.3252


The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 00:59:47,495] Trial 3 finished with value: 7709.205387372332 and parameters: {'lr': 0.0003859200724579461, 'epochs': 45, 'batch_size': 64, 'length_scale': 0.6141147439985831, 'beta': 0.25315684009494477}. Best is trial 0 with value: 7669.867760159568.
2025-05-20 00:59:47 [INFO]: Using the given device: cuda
2025-05-20 00:59:47 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005947
2025-05-20 00:59:47 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T005947/tensorboard
2025-05-20 00:59:47 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/b6dc0dfd69394ffd995ebc87280bbb36
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 00:59:48 [INFO]: Epoch 001 - training loss (default): 2396.5461, validation loss: 2281.7020
2025-05-20 00:59:49 [INFO]: Epoch 002 - training loss (default): 1358.6825, validation loss: 2226.6020
2025-05-20 00:59:49 [INFO]: Epoch 003 - training loss (default): 1093.7280, validation loss: 2197.7651
2025-05-20 00:59:50 [INFO]: Epoch 004 - training loss (default): 1061.5229, validation loss: 2180.8514
2025-05-20 00:59:51 [INFO]: Epoch 005 - training loss (default): 1053.8107, validation loss: 2168.6415
2025-05-20 00:59:52 [INFO]: Epoch 006 - training loss (default): 1050.4967, validation loss: 2158.8166
2025-05-20 00:59:53 [INFO]: Epoch 007 - training loss (default): 1048.3797, validation loss: 2150.1668
2025-05-20 00:59:53 [INFO]: Epoch 008 - training loss (default): 1047.1401, validation loss: 2142.8459
2025-05-20 00:59:54 [INFO]: Epoch 009 - training loss (default): 1046.3984, validation loss: 2136.3121
2025-05-20 00:59:55 [INFO]: Epoch 010 - training loss (default): 1045.999

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:00:22,610] Trial 4 finished with value: 7664.664779482548 and parameters: {'lr': 0.00012021352690757769, 'epochs': 43, 'batch_size': 64, 'length_scale': 2.3487754392417886, 'beta': 0.45088192430534046}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:00:22 [INFO]: Using the given device: cuda
2025-05-20 01:00:22 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010022
2025-05-20 01:00:22 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010022/tensorboard
2025-05-20 01:00:22 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/259728ca815f4e7fa06ac124e62fa4f7
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:00:23 [INFO]: Epoch 001 - training loss (default): 496.0277, validation loss: 51.9901
2025-05-20 01:00:25 [INFO]: Epoch 002 - training loss (default): 458.2804, validation loss: 51.6145
2025-05-20 01:00:26 [INFO]: Epoch 003 - training loss (default): 456.7171, validation loss: 51.3118
2025-05-20 01:00:27 [INFO]: Epoch 004 - training loss (default): 455.9602, validation loss: 51.0407
2025-05-20 01:00:28 [INFO]: Epoch 005 - training loss (default): 455.4130, validation loss: 50.7801
2025-05-20 01:00:29 [INFO]: Epoch 006 - training loss (default): 454.7341, validation loss: 50.5429
2025-05-20 01:00:31 [INFO]: Epoch 007 - training loss (default): 453.7897, validation loss: 50.3125
2025-05-20 01:00:32 [INFO]: Epoch 008 - training loss (default): 453.0438, validation loss: 50.0887
2025-05-20 01:00:33 [INFO]: Epoch 009 - training loss (default): 452.5990, validation loss: 49.8736
2025-05-20 01:00:34 [INFO]: Epoch 010 - training loss (default): 452.2415, validation loss: 49.6548


The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:01:11,744] Trial 5 finished with value: 7764.636355451376 and parameters: {'lr': 0.00017871967272931443, 'epochs': 40, 'batch_size': 32, 'length_scale': 0.6219436362577234, 'beta': 0.355810074145431}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:01:11 [INFO]: Using the given device: cuda
2025-05-20 01:01:11 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010111
2025-05-20 01:01:11 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010111/tensorboard
2025-05-20 01:01:11 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/6e6cf13ce45f46eb9fad31a1dc83efaf
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:01:12 [INFO]: Epoch 001 - training loss (default): 989.4459, validation loss: 1266.3272
2025-05-20 01:01:14 [INFO]: Epoch 002 - training loss (default): 811.4540, validation loss: 1255.2776
2025-05-20 01:01:15 [INFO]: Epoch 003 - training loss (default): 809.6540, validation loss: 1247.7605
2025-05-20 01:01:16 [INFO]: Epoch 004 - training loss (default): 808.8941, validation loss: 1242.5047
2025-05-20 01:01:17 [INFO]: Epoch 005 - training loss (default): 808.1918, validation loss: 1237.4741
2025-05-20 01:01:19 [INFO]: Epoch 006 - training loss (default): 807.8758, validation loss: 1233.7165
2025-05-20 01:01:20 [INFO]: Epoch 007 - training loss (default): 807.4945, validation loss: 1230.7543
2025-05-20 01:01:21 [INFO]: Epoch 008 - training loss (default): 807.2998, validation loss: 1228.3886
2025-05-20 01:01:22 [INFO]: Epoch 009 - training loss (default): 806.9639, validation loss: 1225.5584
2025-05-20 01:01:23 [INFO]: Epoch 010 - training loss (default): 807.0220, validat

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:01:49,153] Trial 6 finished with value: 7981.207828600384 and parameters: {'lr': 0.0004555324343993705, 'epochs': 30, 'batch_size': 32, 'length_scale': 1.958614821449487, 'beta': 0.6962865502148616}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:01:49 [INFO]: Using the given device: cuda
2025-05-20 01:01:49 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010149
2025-05-20 01:01:49 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010149/tensorboard
2025-05-20 01:01:49 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/b2de9279e40c4c76b19bd81787f1ed51
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:01:49 [INFO]: Epoch 001 - training loss (default): 10581.4894, validation loss: 13670.1480
2025-05-20 01:01:50 [INFO]: Epoch 002 - training loss (default): 5040.5104, validation loss: 13221.5676
2025-05-20 01:01:51 [INFO]: Epoch 003 - training loss (default): 4649.4156, validation loss: 13099.0653
2025-05-20 01:01:51 [INFO]: Epoch 004 - training loss (default): 4632.8065, validation loss: 13056.2556
2025-05-20 01:01:52 [INFO]: Epoch 005 - training loss (default): 4627.8174, validation loss: 13026.5353
2025-05-20 01:01:53 [INFO]: Epoch 006 - training loss (default): 4624.8992, validation loss: 12998.2837
2025-05-20 01:01:53 [INFO]: Epoch 007 - training loss (default): 4622.7121, validation loss: 12972.6780
2025-05-20 01:01:54 [INFO]: Epoch 008 - training loss (default): 4621.2216, validation loss: 12948.2343
2025-05-20 01:01:54 [INFO]: Epoch 009 - training loss (default): 4620.1189, validation loss: 12925.8358
2025-05-20 01:01:55 [INFO]: Epoch 010 - training loss (default)

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:02:09,799] Trial 7 finished with value: 7699.871230387868 and parameters: {'lr': 0.0003689571079285201, 'epochs': 31, 'batch_size': 128, 'length_scale': 3.8583632102331533, 'beta': 0.24865609459032972}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:02:09 [INFO]: Using the given device: cuda
2025-05-20 01:02:09 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010209
2025-05-20 01:02:09 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010209/tensorboard
2025-05-20 01:02:09 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/b44afb216df244f4833e6b06dbbaabd7
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:02:11 [INFO]: Epoch 001 - training loss (default): 22367.2716, validation loss: 32717.9982
2025-05-20 01:02:12 [INFO]: Epoch 002 - training loss (default): 11385.1933, validation loss: 32026.7265
2025-05-20 01:02:13 [INFO]: Epoch 003 - training loss (default): 10947.8143, validation loss: 31680.5275
2025-05-20 01:02:14 [INFO]: Epoch 004 - training loss (default): 10879.9530, validation loss: 31425.0185
2025-05-20 01:02:16 [INFO]: Epoch 005 - training loss (default): 10858.5507, validation loss: 31225.6940
2025-05-20 01:02:17 [INFO]: Epoch 006 - training loss (default): 10849.3779, validation loss: 31064.4976
2025-05-20 01:02:18 [INFO]: Epoch 007 - training loss (default): 10844.5407, validation loss: 30932.6919
2025-05-20 01:02:19 [INFO]: Epoch 008 - training loss (default): 10841.7055, validation loss: 30811.9387
2025-05-20 01:02:21 [INFO]: Epoch 009 - training loss (default): 10839.8575, validation loss: 30707.6836
2025-05-20 01:02:22 [INFO]: Epoch 010 - training loss (

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:02:54,453] Trial 8 finished with value: 7864.355183212267 and parameters: {'lr': 0.00010433242531794538, 'epochs': 36, 'batch_size': 32, 'length_scale': 4.345805014963809, 'beta': 0.45117333706517215}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:02:54 [INFO]: Using the given device: cuda
2025-05-20 01:02:54 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010254
2025-05-20 01:02:54 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010254/tensorboard
2025-05-20 01:02:54 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/a97c268b9cd649d381f0d97cfd1818ed
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:02:55 [INFO]: Epoch 001 - training loss (default): 476.0661, validation loss: 44.4445
2025-05-20 01:02:56 [INFO]: Epoch 002 - training loss (default): 456.7899, validation loss: 43.9670
2025-05-20 01:02:58 [INFO]: Epoch 003 - training loss (default): 455.6383, validation loss: 43.5770
2025-05-20 01:02:59 [INFO]: Epoch 004 - training loss (default): 454.8821, validation loss: 43.2697
2025-05-20 01:03:00 [INFO]: Epoch 005 - training loss (default): 454.1021, validation loss: 43.0018
2025-05-20 01:03:01 [INFO]: Epoch 006 - training loss (default): 453.1842, validation loss: 42.7158
2025-05-20 01:03:03 [INFO]: Epoch 007 - training loss (default): 452.5992, validation loss: 42.4859
2025-05-20 01:03:04 [INFO]: Epoch 008 - training loss (default): 452.3109, validation loss: 42.2584
2025-05-20 01:03:05 [INFO]: Epoch 009 - training loss (default): 452.1425, validation loss: 42.0828
2025-05-20 01:03:06 [INFO]: Epoch 010 - training loss (default): 452.0164, validation loss: 41.8983


The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:03:12,053] Trial 9 finished with value: 7724.759744442614 and parameters: {'lr': 0.0005285519017606626, 'epochs': 14, 'batch_size': 32, 'length_scale': 0.759798862029593, 'beta': 0.2714230654017563}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:03:12 [INFO]: Using the given device: cuda
2025-05-20 01:03:12 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010312
2025-05-20 01:03:12 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010312/tensorboard
2025-05-20 01:03:12 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/e538ff9c7a794766b67de28bf61209ac
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:03:12 [INFO]: Epoch 001 - training loss (default): 670.6750, validation loss: 182.3160
2025-05-20 01:03:13 [INFO]: Epoch 002 - training loss (default): 608.5812, validation loss: 180.3310
2025-05-20 01:03:14 [INFO]: Epoch 003 - training loss (default): 552.8417, validation loss: 178.2213
2025-05-20 01:03:14 [INFO]: Epoch 004 - training loss (default): 517.3310, validation loss: 176.3697
2025-05-20 01:03:15 [INFO]: Epoch 005 - training loss (default): 500.1164, validation loss: 175.0572
2025-05-20 01:03:15 [INFO]: Epoch 006 - training loss (default): 493.8599, validation loss: 174.1248
2025-05-20 01:03:16 [INFO]: Epoch 007 - training loss (default): 491.0825, validation loss: 173.4272
2025-05-20 01:03:17 [INFO]: Epoch 008 - training loss (default): 489.4451, validation loss: 172.8874
2025-05-20 01:03:18 [INFO]: Epoch 009 - training loss (default): 488.2642, validation loss: 172.4269
2025-05-20 01:03:19 [INFO]: Epoch 010 - training loss (default): 487.6115, validation loss:

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:03:25,985] Trial 10 finished with value: 7854.583647898926 and parameters: {'lr': 0.00010872555931990879, 'epochs': 20, 'batch_size': 128, 'length_scale': 1.887251258669379, 'beta': 0.1068312281901258}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:03:26 [INFO]: Using the given device: cuda
2025-05-20 01:03:26 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010326
2025-05-20 01:03:26 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010326/tensorboard
2025-05-20 01:03:26 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/885c5da4ad2e4e9cbe0eb8d2256fef82
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:03:26 [INFO]: Epoch 001 - training loss (default): 9271.4218, validation loss: 10704.8013
2025-05-20 01:03:27 [INFO]: Epoch 002 - training loss (default): 4581.7434, validation loss: 10409.1409
2025-05-20 01:03:28 [INFO]: Epoch 003 - training loss (default): 3544.2606, validation loss: 10257.9053
2025-05-20 01:03:28 [INFO]: Epoch 004 - training loss (default): 3459.6607, validation loss: 10172.1635
2025-05-20 01:03:29 [INFO]: Epoch 005 - training loss (default): 3441.5273, validation loss: 10108.2076
2025-05-20 01:03:30 [INFO]: Epoch 006 - training loss (default): 3433.9532, validation loss: 10057.5363
2025-05-20 01:03:30 [INFO]: Epoch 007 - training loss (default): 3429.9052, validation loss: 10015.7954
2025-05-20 01:03:31 [INFO]: Epoch 008 - training loss (default): 3427.4687, validation loss: 9978.2260
2025-05-20 01:03:32 [INFO]: Epoch 009 - training loss (default): 3425.8663, validation loss: 9945.6511
2025-05-20 01:03:33 [INFO]: Epoch 010 - training loss (default): 3

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:04:00,884] Trial 11 finished with value: 7731.685925499208 and parameters: {'lr': 0.00020666308989589628, 'epochs': 50, 'batch_size': 96, 'length_scale': 2.8693244504795192, 'beta': 0.6965711335224195}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:04:00 [INFO]: Using the given device: cuda
2025-05-20 01:04:00 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010400
2025-05-20 01:04:00 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010400/tensorboard
2025-05-20 01:04:00 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/4038f4670a344eaca00d15fbf8094028
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:04:01 [INFO]: Epoch 001 - training loss (default): 1745.7657, validation loss: 1533.1462
2025-05-20 01:04:02 [INFO]: Epoch 002 - training loss (default): 1035.1328, validation loss: 1492.9006
2025-05-20 01:04:02 [INFO]: Epoch 003 - training loss (default): 876.7581, validation loss: 1472.5517
2025-05-20 01:04:03 [INFO]: Epoch 004 - training loss (default): 862.2320, validation loss: 1462.5298
2025-05-20 01:04:04 [INFO]: Epoch 005 - training loss (default): 858.4526, validation loss: 1455.5197
2025-05-20 01:04:05 [INFO]: Epoch 006 - training loss (default): 856.2639, validation loss: 1449.9067
2025-05-20 01:04:05 [INFO]: Epoch 007 - training loss (default): 854.8336, validation loss: 1445.0288
2025-05-20 01:04:06 [INFO]: Epoch 008 - training loss (default): 853.9370, validation loss: 1440.9417
2025-05-20 01:04:07 [INFO]: Epoch 009 - training loss (default): 853.2285, validation loss: 1437.0120
2025-05-20 01:04:07 [INFO]: Epoch 010 - training loss (default): 852.7816, valid

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:04:34,776] Trial 12 finished with value: 7669.15731490494 and parameters: {'lr': 0.0002005559598063997, 'epochs': 49, 'batch_size': 96, 'length_scale': 2.023676863540397, 'beta': 0.667226308340292}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:04:34 [INFO]: Using the given device: cuda
2025-05-20 01:04:34 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010434
2025-05-20 01:04:34 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010434/tensorboard
2025-05-20 01:04:34 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/04e4c0e53a854d8cb1deac168cd57366
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:04:35 [INFO]: Epoch 001 - training loss (default): 1472.4955, validation loss: 1241.1260
2025-05-20 01:04:36 [INFO]: Epoch 002 - training loss (default): 903.3558, validation loss: 1206.6314
2025-05-20 01:04:37 [INFO]: Epoch 003 - training loss (default): 809.5294, validation loss: 1193.2080
2025-05-20 01:04:38 [INFO]: Epoch 004 - training loss (default): 799.6141, validation loss: 1185.9316
2025-05-20 01:04:38 [INFO]: Epoch 005 - training loss (default): 796.5939, validation loss: 1180.1626
2025-05-20 01:04:39 [INFO]: Epoch 006 - training loss (default): 794.8908, validation loss: 1175.4418
2025-05-20 01:04:40 [INFO]: Epoch 007 - training loss (default): 793.8057, validation loss: 1171.4098
2025-05-20 01:04:41 [INFO]: Epoch 008 - training loss (default): 793.2595, validation loss: 1167.9206
2025-05-20 01:04:42 [INFO]: Epoch 009 - training loss (default): 792.6180, validation loss: 1164.7672
2025-05-20 01:04:42 [INFO]: Epoch 010 - training loss (default): 792.2148, valida

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:05:06,593] Trial 13 finished with value: 7670.815126466501 and parameters: {'lr': 0.00015039393075808947, 'epochs': 39, 'batch_size': 64, 'length_scale': 1.8094513531125571, 'beta': 0.9150480155907227}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:05:06 [INFO]: Using the given device: cuda
2025-05-20 01:05:06 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010506
2025-05-20 01:05:06 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010506/tensorboard
2025-05-20 01:05:06 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/f38bbe1d526d4485bb23a0493fc7d0df
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:05:07 [INFO]: Epoch 001 - training loss (default): 2969.6740, validation loss: 2829.6861
2025-05-20 01:05:08 [INFO]: Epoch 002 - training loss (default): 1820.8667, validation loss: 2769.4239
2025-05-20 01:05:08 [INFO]: Epoch 003 - training loss (default): 1309.5530, validation loss: 2724.3452
2025-05-20 01:05:09 [INFO]: Epoch 004 - training loss (default): 1223.6584, validation loss: 2699.3177
2025-05-20 01:05:10 [INFO]: Epoch 005 - training loss (default): 1206.0836, validation loss: 2682.9581
2025-05-20 01:05:11 [INFO]: Epoch 006 - training loss (default): 1199.5232, validation loss: 2670.8293
2025-05-20 01:05:12 [INFO]: Epoch 007 - training loss (default): 1195.9972, validation loss: 2660.6425
2025-05-20 01:05:12 [INFO]: Epoch 008 - training loss (default): 1193.7717, validation loss: 2651.9840
2025-05-20 01:05:13 [INFO]: Epoch 009 - training loss (default): 1192.2481, validation loss: 2644.4151
2025-05-20 01:05:14 [INFO]: Epoch 010 - training loss (default): 1191.132

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:05:35,867] Trial 14 finished with value: 7665.744823679699 and parameters: {'lr': 0.0001503162230264827, 'epochs': 41, 'batch_size': 96, 'length_scale': 2.2121061893036917, 'beta': 0.7723305118047362}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:05:35 [INFO]: Using the given device: cuda
2025-05-20 01:05:35 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010535
2025-05-20 01:05:35 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010535/tensorboard
2025-05-20 01:05:35 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/75383959ecf14212b1dcc0519bc6d69b
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:05:36 [INFO]: Epoch 001 - training loss (default): 23567.9265, validation loss: 28805.5323
2025-05-20 01:05:37 [INFO]: Epoch 002 - training loss (default): 11046.0269, validation loss: 28100.8187
2025-05-20 01:05:38 [INFO]: Epoch 003 - training loss (default): 9075.0433, validation loss: 27782.6474
2025-05-20 01:05:39 [INFO]: Epoch 004 - training loss (default): 8881.0312, validation loss: 27578.3022
2025-05-20 01:05:39 [INFO]: Epoch 005 - training loss (default): 8825.5529, validation loss: 27430.6219
2025-05-20 01:05:40 [INFO]: Epoch 006 - training loss (default): 8801.7859, validation loss: 27317.8296
2025-05-20 01:05:41 [INFO]: Epoch 007 - training loss (default): 8789.2793, validation loss: 27212.8851
2025-05-20 01:05:42 [INFO]: Epoch 008 - training loss (default): 8782.1162, validation loss: 27121.6610
2025-05-20 01:05:43 [INFO]: Epoch 009 - training loss (default): 8777.8493, validation loss: 27042.6999
2025-05-20 01:05:44 [INFO]: Epoch 010 - training loss (default

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:06:05,488] Trial 15 finished with value: 7785.683721577448 and parameters: {'lr': 0.00013776655442489678, 'epochs': 36, 'batch_size': 64, 'length_scale': 3.381012701607, 'beta': 0.822443359429768}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:06:05 [INFO]: Using the given device: cuda
2025-05-20 01:06:05 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010605
2025-05-20 01:06:05 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010605/tensorboard
2025-05-20 01:06:05 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/eaec2f1376154c00bafbc69cd142f268
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:06:06 [INFO]: Epoch 001 - training loss (default): 4493.9357, validation loss: 4315.7177
2025-05-20 01:06:06 [INFO]: Epoch 002 - training loss (default): 3400.7521, validation loss: 4272.0326
2025-05-20 01:06:07 [INFO]: Epoch 003 - training loss (default): 2309.5217, validation loss: 4226.7852
2025-05-20 01:06:08 [INFO]: Epoch 004 - training loss (default): 1808.7393, validation loss: 4191.9220
2025-05-20 01:06:08 [INFO]: Epoch 005 - training loss (default): 1665.8845, validation loss: 4170.1793
2025-05-20 01:06:09 [INFO]: Epoch 006 - training loss (default): 1624.2300, validation loss: 4155.3594
2025-05-20 01:06:10 [INFO]: Epoch 007 - training loss (default): 1607.6605, validation loss: 4144.3818
2025-05-20 01:06:10 [INFO]: Epoch 008 - training loss (default): 1599.6806, validation loss: 4134.3438
2025-05-20 01:06:11 [INFO]: Epoch 009 - training loss (default): 1594.9072, validation loss: 4125.6857
2025-05-20 01:06:12 [INFO]: Epoch 010 - training loss (default): 1592.030

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:06:33,702] Trial 16 finished with value: 7686.786999274495 and parameters: {'lr': 0.00012482136418713805, 'epochs': 42, 'batch_size': 128, 'length_scale': 2.3752912420933403, 'beta': 0.7978168675538333}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:06:33 [INFO]: Using the given device: cuda
2025-05-20 01:06:33 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010633
2025-05-20 01:06:33 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010633/tensorboard
2025-05-20 01:06:33 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/a0464e71da4e47cba7e96dc20d785590
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:06:34 [INFO]: Epoch 001 - training loss (default): 630.3319, validation loss: 182.6553
2025-05-20 01:06:35 [INFO]: Epoch 002 - training loss (default): 546.0525, validation loss: 180.0120
2025-05-20 01:06:35 [INFO]: Epoch 003 - training loss (default): 526.0583, validation loss: 179.4218
2025-05-20 01:06:36 [INFO]: Epoch 004 - training loss (default): 520.8473, validation loss: 178.3478
2025-05-20 01:06:37 [INFO]: Epoch 005 - training loss (default): 519.1317, validation loss: 177.6463
2025-05-20 01:06:37 [INFO]: Epoch 006 - training loss (default): 518.3940, validation loss: 177.1719
2025-05-20 01:06:38 [INFO]: Epoch 007 - training loss (default): 517.9708, validation loss: 176.7881
2025-05-20 01:06:39 [INFO]: Epoch 008 - training loss (default): 517.6482, validation loss: 176.4862
2025-05-20 01:06:39 [INFO]: Epoch 009 - training loss (default): 517.4393, validation loss: 176.2450
2025-05-20 01:06:40 [INFO]: Epoch 010 - training loss (default): 517.1016, validation loss:

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:06:50,012] Trial 17 finished with value: 7694.538604954915 and parameters: {'lr': 0.00025918970883937993, 'epochs': 32, 'batch_size': 96, 'length_scale': 1.2605550406001043, 'beta': 0.48261393074719566}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:06:50 [INFO]: Using the given device: cuda
2025-05-20 01:06:50 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010650
2025-05-20 01:06:50 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010650/tensorboard
2025-05-20 01:06:50 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/4d8251559bfa4fcabd576dbc04a7cb3b
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:06:50 [INFO]: Epoch 001 - training loss (default): 54201.9327, validation loss: 67773.3948
2025-05-20 01:06:51 [INFO]: Epoch 002 - training loss (default): 26064.0104, validation loss: 65885.0124
2025-05-20 01:06:52 [INFO]: Epoch 003 - training loss (default): 22824.6580, validation loss: 65168.2907
2025-05-20 01:06:53 [INFO]: Epoch 004 - training loss (default): 22569.3374, validation loss: 64755.7475
2025-05-20 01:06:54 [INFO]: Epoch 005 - training loss (default): 22497.4194, validation loss: 64425.7128
2025-05-20 01:06:54 [INFO]: Epoch 006 - training loss (default): 22465.9395, validation loss: 64145.6955
2025-05-20 01:06:55 [INFO]: Epoch 007 - training loss (default): 22449.1510, validation loss: 63905.2061
2025-05-20 01:06:56 [INFO]: Epoch 008 - training loss (default): 22439.0125, validation loss: 63695.7872
2025-05-20 01:06:57 [INFO]: Epoch 009 - training loss (default): 22432.6114, validation loss: 63512.5545
2025-05-20 01:06:58 [INFO]: Epoch 010 - training loss (

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:07:09,943] Trial 18 finished with value: 8044.142798093759 and parameters: {'lr': 0.00015586061225537153, 'epochs': 24, 'batch_size': 64, 'length_scale': 4.7600643770276365, 'beta': 0.7974871928040819}. Best is trial 4 with value: 7664.664779482548.
2025-05-20 01:07:09 [INFO]: Using the given device: cuda
2025-05-20 01:07:09 [INFO]: Model files will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010709
2025-05-20 01:07:09 [INFO]: Tensorboard file will be saved to /home/ec2-user/SageMaker/sensor-imputation-thesis/src/sensor_imputation_thesis/nadire/gp_vae/best_model/20250520_T010709/tensorboard
2025-05-20 01:07:09 [INFO]: GPVAE initialized with the given hyperparameters, the number of trainable parameters: 209,678


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/a87360c5d14a499d9e48c7ec1c8b18e2
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318


2025-05-20 01:07:10 [INFO]: Epoch 001 - training loss (default): 14176.3963, validation loss: 17276.3695
2025-05-20 01:07:11 [INFO]: Epoch 002 - training loss (default): 6433.6481, validation loss: 16789.6317
2025-05-20 01:07:12 [INFO]: Epoch 003 - training loss (default): 5533.9209, validation loss: 16617.4325
2025-05-20 01:07:12 [INFO]: Epoch 004 - training loss (default): 5482.6444, validation loss: 16533.7953
2025-05-20 01:07:13 [INFO]: Epoch 005 - training loss (default): 5467.5532, validation loss: 16467.5840
2025-05-20 01:07:14 [INFO]: Epoch 006 - training loss (default): 5459.5771, validation loss: 16410.7124
2025-05-20 01:07:14 [INFO]: Epoch 007 - training loss (default): 5454.8097, validation loss: 16362.3166
2025-05-20 01:07:15 [INFO]: Epoch 008 - training loss (default): 5451.8163, validation loss: 16318.2273
2025-05-20 01:07:16 [INFO]: Epoch 009 - training loss (default): 5449.7641, validation loss: 16278.8545
2025-05-20 01:07:16 [INFO]: Epoch 010 - training loss (default)

The shape of gp_vae_imputation is (1056, 2, 20, 31)


[I 2025-05-20 01:07:35,670] Trial 19 finished with value: 7742.206275543968 and parameters: {'lr': 0.00024272238891417084, 'epochs': 36, 'batch_size': 96, 'length_scale': 3.5478580789988197, 'beta': 0.40201733936255823}. Best is trial 4 with value: 7664.664779482548.


🏃 View run GP-VAE-Trial at: http://localhost:5000/#/experiments/832352739106302318/runs/be94307aeecf4034a1e4d0c9fd03f9e4
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318
Best Parameters: {'lr': 0.00012021352690757769, 'epochs': 43, 'batch_size': 64, 'length_scale': 2.3487754392417886, 'beta': 0.45088192430534046}
Best Objective Value: 7664.664779482548
🏃 View run GPVAE_Optuna_Study(2) at: http://localhost:5000/#/experiments/832352739106302318/runs/fc110587ae72462385114c2ee013d857
🧪 View experiment at: http://localhost:5000/#/experiments/832352739106302318
