# VAE Part

## Imports and initialisation

In [24]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

from sklearn.model_selection import TimeSeriesSplit, train_test_split
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
#from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression

import xgboost as xgb
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.adam import Adam
from torch.utils.data import Dataset, DataLoader, random_split

import joblib

from suave import SuaveClassifier


In [None]:
# read the parquet files in the data folder
full_data = pd.read_parquet('../data/train-0.parquet')
full_data

In [61]:
# preprocess the data into features and target
def data_preprocess(data, max_lag = 3, symbol=None):

    if symbol is not None:
        # Filter data for the selected symbol
        df = data[data['symbol_id'] == symbol]
    else:
        df = data

    # Create lags
    for lag in range(1, max_lag + 1):
        df[f'responder_6_lag{lag}'] = df.groupby('symbol_id')['responder_6'].shift(lag)
    
    # Drop rows with NaN
    df.dropna(subset=[f'responder_6_lag{lag}' for lag in range(1, max_lag+1)], inplace=True)

    # Drop columns with more than 50% missing values
    missing_values = df.isnull().mean()
    missing_values = missing_values[missing_values > 0.5]
    df.drop(columns=missing_values.index, inplace=True)

    # Drop columns not needed
    exclude_cols = ['date_id', 'time_id'] + [f'responder_{i}' for i in range(9) if i != 6]
    df.drop(columns=exclude_cols, inplace=True)

    # Fill missing values with previous and next values
    df.fillna(method='ffill', inplace=True)
    df.fillna(method='bfill', inplace=True)

    # Drop columns with zero standard deviation
    std = df.std()
    exclude_cols = std[std == 0].index.tolist()

    # Separate features and target
    X = df.drop(columns=exclude_cols + ['responder_6'])
    y = df['responder_6']

    return X, y


## Overview of features importances and model sensitivity to symbol

In [53]:
def data_preprocess(symbol, data):
    # Define columns to exclude
    exclude_cols = ['date_id', 'time_id'] + [f'responder_{i}' for i in range(0, 9)]

    # Filter data for the selected symbol
    df = data[data['symbol_id'] == symbol]

    # Create lags
    for lag in [1, 2, 3]:
        df[f'responder_6_lag{lag}'] = df['responder_6'].shift(lag)
    
    # Drop rows with NaN
    df = df.dropna(subset=[f'responder_6_lag{lag}' for lag in [1, 2, 3]])

    # Separate features and target
    X = df.drop(columns=exclude_cols)
    y = df['responder_6']

    return X,y

def train_model_xgboost(symbol, data):
    # Data preprocessing
    X, y = data_preprocess(symbol, data)
    X['symbol_id'] = X['symbol_id'].astype('category')

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    # Convert to DMatrix
    dtrain = xgb.DMatrix(X_train, label=y_train, enable_categorical=True)
    dtest = xgb.DMatrix(X_test, label=y_test, enable_categorical=True)

    # Training of a XGBoost model with verbose evaluation
    params = {
        'objective': 'reg:squarederror',
        'eval_metric': 'rmse',
        'seed': 42
    }
    evals = [(dtrain, 'train'), (dtest, 'eval')]
    model = xgb.train(params, dtrain, num_boost_round=1000, evals=evals, early_stopping_rounds=10, verbose_eval=10)
    
    # Predictions
    y_pred = model.predict(dtest)
    mse = mean_squared_error(y_test, y_pred)
    print(f"Final MSE: {mse:.2f}")

    return model, mse

In [None]:
# create as many xgboost models as there are symbols and compare the results

mse = {}
features_importances = {}
symbols = full_data['symbol_id'].unique().tolist()
    
for symbol in symbols:
    print(f"Predictions for symbol {symbol}")

    # Train XGBoost model and evaluate rmse
    model, mse[f'symbol_{symbol}'] = train_model_xgboost(symbol, full_data)

    # save the feature importances
    importances = model.get_score(importance_type='weight')
    features_names = model.get_score(importance_type='weight').keys()

    for features in features_names:
        if features in features_importances:
            features_importances[f'{features}'].append(importances[features])
        else:
            features_importances[f'{features}'] = [importances[features]]
 


In [None]:
 # Plot RMSE Bar Chart and Feature Importance Boxplot

# RMSE Bar Chart
plt.figure(figsize=(10, 6))
plt.bar(mse.keys(), mse.values())
plt.xticks(rotation=90)
plt.xlabel('Symbol')
plt.ylabel('MSE')
plt.title('MSE for each symbol')
plt.show()

# Feature Importance Boxplot
plt.figure(figsize=(10, 6))
plt.boxplot(features_importances.values())
plt.xticks(range(1, len(features_importances) + 1), features_importances.keys(), rotation=90)
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.title('Feature Importance for each symbol')
plt.show()


## Building a VAE

In [101]:
# Create class KaggleDataset which is a child of Dataset class from torch.util.data

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class KaggleDataset(Dataset):
    
    def __init__(self, df):
        """
        Args:
            df (pd.DataFrame): DataFrame containing the data
        """
        # Store the preprocessed DataFrame in self.dataframe (can parametrize single or all symbols in data_preprocess)
        X, y = data_preprocess(df)
        self.dataframe = pd.concat([X, y], axis=1)

        # Extract the features for easier manipulation
        self.features = self.dataframe.values

        # Calculate mean and std for normalization
        self.mean = self.features.mean(axis=0)
        self.std = self.features.std(axis=0)

        # Apply normalization to features
        self.features = (self.features - self.mean) / self.std

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        
        # Return the item at index idx in the form of tensor
        features = torch.tensor(self.features[idx], dtype=torch.float32).to(device)
        return features

# Create a KaggleDataset object
dataset = KaggleDataset(full_data)

# Then, the batch_size and input_dim were set. The dataset was divided into train and test datasets. Each of these was loaded into a DataLoader. The device was set to ‘cuda’ if available. 

In [102]:
# Define batchsize and input dimensions
batch_size = 64
input_dim = dataset.features.shape[1]
latent_dim = input_dim // 2
hidden_dim = (latent_dim + input_dim) // 2
# Define learning rate
learning_rate = 1e-3

# Split dataset into train and test without shuffle in the ratio of 80:20
train_dataset, test_dataset = random_split(dataset, [0.8,0.2])

# Use DataLoader for batching and shuffling
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [92]:
class VAE(nn.Module):
    def __init__(self, input_dim, hidden_dim, latent_dim, device=device):
        super(VAE, self).__init__()

        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, latent_dim),
            nn.LeakyReLU(0.2)
            )
        
        # Latent mean and variance 
        self.mean_layer = nn.Linear(latent_dim, 1)
        self.logvar_layer = nn.Linear(latent_dim, 1)
        
        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(1, latent_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(latent_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, input_dim)
            )
        
    # Encode function
    def encode(self, x):
        x = self.encoder(x)
        mean, log_var = self.mean_layer(x), self.logvar_layer(x)
        return mean, log_var
    
    # Add Reparameterization
    def reparameterization(self, mean, var):
        epsilon = torch.randn_like(var).to(device)      
        z = mean + var*epsilon
        return z

    # Decode function
    def decode(self, x):
        return self.decoder(x)

    # Forward Function
    def forward(self, x):
        mean, log_var = self.encode(x)
        z = self.reparameterization(mean, log_var)
        x_hat = self.decode(z)
        return x_hat, mean, log_var
    
    # Reconstruct input from compressed form
    def reconstruction(self, mean, log_var):
        z = self.reparameterization(mean, log_var)
        x_hat = self.decode(z)
        return x_hat


In [93]:
def loss_function(x, x_hat, mean, log_var):
    # Reproduction Loss
    reproduction_loss = nn.functional.mse_loss(x_hat, x)
    # KL Divergence Loss
    KLD = - 0.5 * torch.sum(1+ log_var - mean.pow(2) - log_var.exp())
    return reproduction_loss + KLD

# VAE Model created and stored in device
model = VAE(input_dim = input_dim,hidden_dim=hidden_dim,  latent_dim=latent_dim).to(device)

# Optimizer defined
optimizer = Adam(model.parameters(), lr=learning_rate)

In [None]:
def train(model, optimizer, epochs, device):
    
    # Set model to training mode
    model.train()
    
    # Loop for each epoch
    for epoch in range(epochs):
        overall_loss = 0
        
        # Iterate over the batches formed by DataLoader
        for batch_idx, x in enumerate(train_dataloader):
            x = x.to(device)
            
            # Reset Gradient
            optimizer.zero_grad()
            x_hat, mean, log_var = model(x)
            
            # Calculate batch loss and then overall loss
            loss = loss_function(x, x_hat, mean, log_var)
            overall_loss += loss.item()
            
            # Backpropagate the loss and train the optimizer
            loss.backward()
            optimizer.step()

        print("\tEpoch", epoch + 1, "\tAverage Loss: ", overall_loss/(batch_idx*batch_size))
    return overall_loss

# Train the model for 5 epochs
train(model, optimizer, epochs=5, device=device)

In [None]:
s_per_epoch = 37.1 / 5
s_per_epoch

In [None]:
# Get the latent representation of the data
X_train_latent = model.encoder(torch.tensor(train_dataset.dataset.features, dtype=torch.float32).to(device))
X_test_latent = model.encoder(torch.tensor(test_dataset.dataset.features, dtype=torch.float32).to(device))

# Fit a linear regression model on the latent representation
linear_model = LinearRegression()
linear_model.fit(X_train_latent.detach().numpy(), train_dataset.dataset.dataframe['responder_6'])

# Predict the target using the linear model
y_pred = linear_model.predict(X_test_latent.detach().numpy())
y_true = test_dataset.dataset.dataframe['responder_6']

# Calculate the mean squared error
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
mae = mean_absolute_error(y_true, y_pred)
print(f"RMSE: {rmse:.2f}")
print(f"MAE: {mae:.2f}")


In [None]:
from itertools import product

# Build a cross-validation grid search for our VAE model

# Define the hyperparameters to search
hyperparameters = {
    'hidden_dim': [input_dim - 1 , input_dim - 3],
    'latent_dim': [input_dim - 10, input_dim - 20],
    'learning_rate': [1e-3, 1e-4],
    'batch_size': [64, 128]
}

# Define the number of epochs
epochs = 5

# Define the number of folds for cross-validation
n_splits = 2

# Create a list of all hyperparameter combinations
keys, values = zip(*hyperparameters.items())
combinations = [dict(zip(keys, v)) for v in product(*values)]

# Create a list of models and their corresponding hyperparameters
models = []
for combination in combinations:
    model = VAE(input_dim=input_dim, hidden_dim=combination['hidden_dim'], latent_dim=combination['latent_dim']).to(device)
    optimizer = Adam(model.parameters(), lr=combination['learning_rate'])
    models.append((model, optimizer, combination))

# Cross-validation for each model
for model, optimizer, combination in models:
    print(f"Training model with hyperparameters: {combination}")
    for fold, (train_idx, test_idx) in enumerate(TimeSeriesSplit(n_splits=n_splits).split(dataset)):
        train_dataset = torch.utils.data.Subset(dataset, train_idx)
        test_dataset = torch.utils.data.Subset(dataset, test_idx)
        train_dataloader = DataLoader(train_dataset, batch_size=combination['batch_size'], shuffle=False)
        test_dataloader = DataLoader(test_dataset, batch_size=combination['batch_size'], shuffle=False)
        train(model, optimizer, epochs, device=device)
        mean, var = predict(model, test_dataloader)
        print(f"Model with hyperparameters {combination} and fold {fold} has been trained and evaluated.")
        print("-------------------------------------------------------------")

## XGBoost based model on all symbols

In [None]:
# create a single xgboost model for all symbols
max_lag = 3

# create the lagged responder_6 columns
for lag in range(1, max_lag + 1):
    full_data[f'responder_6_lag{lag}'] = full_data.groupby('symbol_id')['responder_6'].shift(lag)

# drop rows with NaN
full_data.dropna(subset=[f'responder_6_lag{lag}' for lag in range(1, max_lag + 1)])

# separate features and target

exclude_cols = ['date_id', 'time_id'] + [f'responder_{i}' for i in range(0, 9)]
X = full_data.drop(columns=exclude_cols)
y = full_data['responder_6']

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# convert to DMatrix
dtrain = xgb.DMatrix(X_train, label=y_train, enable_categorical=True)
dtest = xgb.DMatrix(X_test, label=y_test, enable_categorical=True)

# train the model
params = {
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse',
    'seed': 42
}
evals = [(dtrain, 'train'), (dtest, 'eval')]
model = xgb.train(params, dtrain, num_boost_round=1000, evals=evals, early_stopping_rounds=10, verbose_eval=10)

# predict the test set
y_pred = model.predict(dtest)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Final RMSE: {rmse:.2f}")

# plot feature importance in ascending order
sorted_importances = {k: v for k, v in sorted(importances.items(), key=lambda item: item[1])}

plt.figure(figsize=(10, 6))
plt.xticks(rotation=90)
plt.bar(sorted_importances.keys(), sorted_importances.values())
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.title('Feature Importance')
plt.show()

# plot predictions vs actual values
plt.figure(figsize=(10, 6))
plt.plot(y_test, label='Actual')
plt.plot(y_pred, label='Predicted')
plt.xlabel('Index')
plt.ylabel('Responder 6')
plt.title('Predictions vs Actual Values')
plt.legend()
plt.show()

## XGboost benchmarking differents dimension reduction methods

In [38]:
# disable the warnings
import warnings
warnings.filterwarnings('ignore')

# Data X and y preprocessing
X,y = data_preprocess(full_data.copy(), symbol=0)

# Normaliser les données
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Diviser les données en ensembles d'entraînement et de test
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, shuffle=False)

# dimensionality reduction with PCA, VAE and Autoencoder
dim = X.shape[1] // 2


In [None]:
# Define an Autoencoder
class Autoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim, hidden_dim=64):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.LeakyReLU(0.2),
            nn.Linear(hidden_dim, input_dim)
        )
        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)

    def forward(self, x):
        z = self.encoder(x)
        x_reconstructed = self.decoder(z)
        return x_reconstructed, z

    def train_model(self, training_set, num_epochs = 10, batch_size=64):  
        for epoch in range(num_epochs):
            print(f"Epoch {epoch + 1}")
            self.train()
            self.optimizer.zero_grad()
            X_reconstructed, X_latent = self(training_set)
            loss = self.criterion(X_reconstructed, training_set)
            loss.backward()
            self.optimizer.step()

autoencoder = Autoencoder(input_dim=X_train.shape[1], latent_dim=dim)
autoencoder.train_model(torch.tensor(X_train, dtype=torch.float32))

# save the model
torch.save(autoencoder, 'autoencoder.pth')

In [None]:
import time as time

# Fonction pour réduire la dimensionnalité et retourner les nouvelles données d'entrainement et de test
def reduce_dimensionality(X_train, X_test, dim, method):
    if method == 'pca':
        pca = PCA(n_components=dim)
        X_train_reduced = pca.fit_transform(X_train)
        X_test_reduced = pca.transform(X_test)
    elif method == 'vae':
        vae = joblib.load("suave-model.pkl")
        X_train_reduced = vae.transform(X_train)
        X_test_reduced = vae.transform(X_test)
    elif method == 'autoencoder':
        # load the model
        autoencoder = torch.load('autoencoder.pth')
        with torch.no_grad():
            _, X_train_latent = autoencoder(torch.tensor(X_train, dtype=torch.float32))
            _, X_test_latent = autoencoder(torch.tensor(X_test, dtype=torch.float32))
        X_train_reduced = X_train_latent.numpy()
        X_test_reduced = X_test_latent.numpy()
    else:
        X_train_reduced, X_test_reduced = X_train, X_test
    
    return X_train_reduced, X_test_reduced

# Fonction pour évaluer les performances du modèle
def evaluate_model(X_train, X_test, y_train, y_test, model, method=None):
    start = time.time()
    X_train_reduced, X_test_reduced = reduce_dimensionality(X_train, X_test, dim, method)
    model.fit(X_train_reduced, y_train)
    end = time.time()
    
    y_pred = model.predict(X_test_reduced)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    training_time = end - start
    
    return mse, mae, training_time

# Définir les modèles à évaluer
models = {
    'xgb': xgb.XGBRegressor(),
    'linear': LinearRegression()
}

# Create a graphic table that shows the MSE and time_complexity Benchmark for each model and each dimensionality reduction method
# Create a DataFrame to store the results
results = pd.DataFrame(columns=['Model', 'Method', 'MSE', 'MAE', 'Time Complexity'])

for name, model in models.items():
    mse, mae, time_ = evaluate_model(X_train, X_test, y_train, y_test, model)
    mse_pca, mae_pca, time_pca = evaluate_model(X_train, X_test, y_train, y_test, model, method='pca')
    mse_autoencoder, mae_autoencoder, time_autoencoder = evaluate_model(X_train, X_test, y_train, y_test, model, method='autoencoder')
    mse_vae, mae_vae, time_vae = evaluate_model(X_train, X_test, y_train, y_test, model, method='vae')
    # concatenate the results
    results = pd.concat([results, pd.DataFrame({'Model': [name]*4, 'Method': ['None', 'PCA', 'Autoencoder', 'VAE'], 'MSE': [mse, mse_pca, mse_autoencoder, mse_vae], 'MAE': [mae, mae_pca, mae_autoencoder, mae_vae], 'Time Complexity': [time_, time_pca, time_autoencoder, time_vae]})])
    print(f"Model {name} has MSE {mse:.2f}, MAE {mae:.2f} and Time Complexity {time_:.2f} without dimensionality reduction")
    print(f"Model {name} has MSE {mse_pca:.2f}, MAE {mae_pca:.2f} and Time Complexity {time_pca:.2f} with PCA")
    print(f"Model {name} has MSE {mse_autoencoder:.2f}, MAE {mae_autoencoder:.2f} and Time Complexity {time_autoencoder:.2f} with Autoencoder")
    print(f"Model {name} has MSE {mse_vae:.2f}, MAE {mae_vae:.2f} and Time Complexity {time_vae:.2f} with VAE")
    print("------------------------------------------------------------------------")

# Evaluate the models and their performance in mse and time complexity with multiple bar charts aside
# A color for each model
colors = ['green','blue']

# Create a figure and axis
fig, ax = plt.subplots(1, 3, figsize=(12, 6))

# Create a bar chart for MSE
for i, model in enumerate(models.keys()):
    mse = results[results['Model'] == model]['MSE']
    method = results[results['Model'] == model]['Method']
    ax[0].bar(method, mse, color=colors[i], label=model,alpha=1-0.5*i)

# Set the title and labels
ax[0].set_title('MSE for each model and method')
ax[0].set_xlabel('Method')
ax[0].set_ylabel('MSE')
ax[0].legend()


# Create a bar chart for MAE
for i, model in enumerate(models.keys()):
    mae = results[results['Model'] == model]['MAE']
    method = results[results['Model'] == model]['Method']
    ax[1].bar(method, mae, color=colors[i], label=model, alpha=1-0.5*i)

# Set the title and labels
ax[1].set_title('MAE for each model and method')
ax[1].set_xlabel('Method')
ax[1].set_ylabel('MAE')
ax[1].legend()

# Create a bar chart for Time Complexity
for i, model in enumerate(models.keys()):
    time_complexity = results[results['Model'] == model]['Time Complexity']
    method = results[results['Model'] == model]['Method']
    ax[2].bar(method, time_complexity, color=colors[i], label=model, alpha=1-0.5*i)

# Set the title and labels
ax[2].set_title('Time Complexity for each model and method')
ax[2].set_xlabel('Method')
ax[2].set_ylabel('Time Complexity')
ax[2].legend()

# Show the plot
plt.show()


## SUAVE-ML VAE

#### SUAVE: Supervised and Unified Analysis of Variational Embeddings

SUAVE is a Python package built upon a Hybrid Variational Autoencoder (VAE) . It unifies unsupervised latent representation learning with supervised prediction tasks:

- Supervised Learning : Utilizes VAE to map high-dimensional input features to a low-dimensional, independent latent space. This approach not only retains feature interpretability but also effectively addresses multicollinearity issues, enhancing the model's robustness and generalization capabilities when handling highly correlated features.
- Representation Learning : Guides the latent space with label information, enabling dimensionality reduction and producing discriminative and interpretable embeddings beneficial for downstream classification or regression tasks. Additionally, SUAVE integrates multi-task learning, allowing the incorporation of information from various downstream prediction tasks into the latent space learning process by adjusting task weights

In [None]:
X,y = data_preprocess(full_data.copy(), symbol=0)

In [6]:
# Preparing data for SuaveClassifier
# transforming the target variable into a 3 bins classification problem
Y = pd.concat([y, pd.qcut(y, 3, labels=False)], axis=1, keys=['responder_6', 'target'])
target = pd.DataFrame(Y['target'])
# Splitting the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, target, test_size=0.2, shuffle=False)
# convert the data into dataframes
Y_train = pd.DataFrame(Y_train, columns=['target'])
Y_test = pd.DataFrame(Y_test, columns=['target'])

In [None]:

# Instantiate the model
suave_model = SuaveClassifier(input_dim=X_train.shape[1],             # Input feature dimension
                        task_classes=[len(Y_train['target'].unique())],   # Number of binary classification tasks
                        latent_dim=X_train.shape[1]//2                 # Latent dimension
                        )

# Fit the model on training data
suave_model.fit(X_train, Y_train, epochs=100, animate_monitor=True, verbose=1)

# save the model
joblib.dump(suave_model, "suave-model.pkl")

In [None]:

# Get the latent representation of the data
X_train_latent_SUAVE = vae_model.transform(X_train.values)
X_test_latent_SUAVE = vae_model.transform(X_test.values)

# Fit a linear regression model on the latent representation
linear_model = LinearRegression()
linear_model.fit(X_train_latent_SUAVE, Y_train)

# Predict the target variable on the test set
y_pred = linear_model.predict(X_test_latent_SUAVE)
_,_,_,y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Calculate the mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse:.2f}")

# Plot the predictions vs actual values
plt.figure(figsize=(10, 6))
plt.plot(y_test.values, label='Actual')
plt.plot(y_pred, label='Predicted')
plt.xlabel('Index')
plt.ylabel('Responder 6')
plt.title('Predictions vs Actual Values for symbol 0 with reduced dimensionality using SUAVE VAE and Linear Regression')
plt.legend()
plt.show()