In [1]:
%load_ext autoreload
%autoreload 

In [2]:
import os
import multiprocessing
multiprocessing.set_start_method("spawn", force=True)

##> import libraries
import sys
from pathlib import Path
import random
import time
from itertools import product

root_dir = Path.cwd().resolve().parent
if root_dir.exists():
    sys.path.append(str(root_dir))
else:
    raise FileNotFoundError('Root directory not found')

#> import flower
import flwr as fl

#> import custom libraries
from src.load import load_df_to_dataset
from src.EAE import EvidentialTransformerDenoiseAutoEncoder, evidential_regression
from src.client import train_and_evaluate_local, evaluate_saved_model
from src.datasets import TrajectoryDataset, clean_outliers_by_quantile
from src.plot import plot_loss, plot_tsne_with_uncertainty, plot_uncertainty

#> torch libraries
import torch
from torch.utils.data import DataLoader
import torch.optim as optim
import torch.nn as nn
import pandas as pd
import numpy as np
import statsmodels.api as sm

from sklearn.model_selection import train_test_split

#> Plot
import matplotlib.pyplot as plt
import seaborn as sns
# import scienceplots  # https://github.com/garrettj403/SciencePlots?tab=readme-ov-file
#plt.style.use(['science', 'grid', 'notebook'])  # , 'ieee'


# %matplotlib inline
#%matplotlib widget


In [None]:
  # Define the dataset catalog
assets_dir = root_dir.parents[3] / 'aistraj' / 'bin'/ 'tvt_assets'
assets_dir = assets_dir.resolve()
print(f"Assets Directory: {assets_dir}")
if not assets_dir.exists():
    raise FileNotFoundError('Assets directory not found')
    
saved_model_dir = root_dir / 'models'
saved_model_dir = saved_model_dir.resolve()
print(f"Assets Directory: {saved_model_dir}")
if not saved_model_dir.exists():
    raise FileNotFoundError('Model directory not found')

In [4]:
def load_datasets(assets_dir, seq_len=960, batch_size=32):

    # train dataset
    train_pickle_path_extend = assets_dir / 'extended' / 'cleaned_extended_train_df.parquet'
    train_df_extend = load_df_to_dataset(train_pickle_path_extend).data

    # validation dataset
    validate_pickle_path_extend = assets_dir / 'extended' / 'cleaned_extended_validate_df.parquet'
    validate_df_extend = load_df_to_dataset(validate_pickle_path_extend).data

    # Define the list of features to discard
    drop_features_list = ['epoch', 'datetime', 'obj_id', 'traj_id', 'stopped', 'curv', 'abs_ccs']
    
    columns_to_clean = ['speed_c', 'lon', 'lat']  # Specify columns to clean
    cleaned_train_data = clean_outliers_by_quantile(train_df_extend, columns_to_clean, remove_na=False)
    cleaned_val_data = clean_outliers_by_quantile(validate_df_extend, columns_to_clean, remove_na=False)
    
    # Create training and validation datasets
    train_dataset_traj = TrajectoryDataset(
        cleaned_train_data,
        seq_len=seq_len,
        mode='ae',
        drop_features_list=drop_features_list,
        scaler_method='QuantileTransformer',
        filter_less_seq_len = seq_len
    )
    val_dataset_traj = TrajectoryDataset(
        cleaned_val_data,
        seq_len=seq_len,
        mode='ae',
        drop_features_list=drop_features_list,
        scaler_method='QuantileTransformer',
        filter_less_seq_len = seq_len
    )

    # Creating Data Loader
    train_dataloader_traj = DataLoader(
        train_dataset_traj,
        batch_size=batch_size,
        num_workers=2,
        shuffle=True,
        pin_memory=False
    )
    val_dataloader_traj = DataLoader(
        val_dataset_traj,
        batch_size=batch_size,
        num_workers=2,
        shuffle=False,
        pin_memory=False
    )

    return train_dataloader_traj, val_dataloader_traj, train_dataset_traj.n_features, val_dataset_traj, cleaned_val_data

In [None]:
# Load Dataset
train_dataloader_traj, val_dataloader_traj, input_dim, val_dataset_traj, cleaned_val_data = load_datasets(assets_dir)

In [6]:
# Define parameters
learning_rate = 1e-4  # Ensure this is a float, not a tuple
lambda_reg = 0.5
num_epochs = 20

save_model_path = saved_model_dir + '/eae_model_qt_lambda05_960_20e.pth'

# Define the model, criterion, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EvidentialTransformerDenoiseAutoEncoder(
    input_dim=input_dim,
    d_model=8,
    nhead=4,
    num_encoder_layers=2,
    num_decoder_layers=2,
    dim_feedforward=32,
    max_seq_length=960,
    dropout_rate=0.1
)

criterion = evidential_regression
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
(train_losses, val_losses, train_aleatoric_uncertainties, train_epistemic_uncertainties,
 val_aleatoric_uncertainties, val_epistemic_uncertainties, train_aleatoric_uncertainties_avg, train_epistemic_uncertainties_avg,
            val_aleatoric_uncertainties_avg, val_epistemic_uncertainties_avg, latent_representations, recon_error) = train_and_evaluate_local(
    model=model,
    criterion=criterion,
    optimizer=optimizer,
    train_dataloader=train_dataloader_traj,
    val_dataloader=val_dataloader_traj,
    num_epochs=num_epochs,
    lambda_reg=lambda_reg,
    offset=2.5,
    device=device,
    return_latent=True,
    save_model_path=save_model_path
)

In [None]:
epochs = range(1, 21)
plot_loss(epochs, train_losses, val_losses)

In [None]:
plot_uncertainty(epochs, train_aleatoric_uncertainties_avg, val_aleatoric_uncertainties_avg, train_epistemic_uncertainties_avg, val_epistemic_uncertainties_avg)

In [None]:
plot_tsne_with_uncertainty(latent_representations, val_epistemic_uncertainties, uncertainty_type='epistemic')

In [None]:
plot_tsne_with_uncertainty(latent_representations, val_aleatoric_uncertainties, uncertainty_type='aleatoric')

In [None]:
val_loss, val_aleatoric_uncertainties, val_epistemic_uncertainties, avg_aleatoric_uncertainty, avg_epistemic_uncertainty, latent_representations_eval, recon_error = evaluate_saved_model(
    model_class=model, 
    model_path=save_model_path, 
    criterion=evidential_regression, 
    val_dataloader=val_dataloader_traj, 
    lambda_reg=lambda_reg, 
    offset=2.5, 
    device='cuda', 
    return_latent=True
)