In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from drcomp.reducers import PCA, AutoEncoder
from drcomp.autoencoder import FullyConnectedAE
from drcomp.utils.notebooks import get_dataset
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
import torch
import torch.nn as nn
import numpy as np
from skorch.callbacks import EarlyStopping, LRScheduler
from drcomp.plotting import (
    compare_metrics,
    plot_reconstructions,
    visualize_2D_latent_space,
)
import matplotlib.pyplot as plt
import scienceplots

In [None]:
X, y = get_dataset("ICMR", root_dir="..")
preprocessor = StandardScaler().fit(X)
X_train = preprocessor.transform(X)

In [None]:
intrinsic_dim = 2  # for visualization purposes
# channels, height, width = (1, 62, 47)

In [None]:
def get_autoencoder(baseClass):
    callbacks = [
        EarlyStopping(patience=10, monitor="valid_loss"),
        LRScheduler(policy="ExponentialLR", gamma=0.9, monitor="valid_loss"),
    ]
    return AutoEncoder(
        AutoEncoderClass=baseClass,
        criterion=nn.MSELoss,
        optimizer=torch.optim.Adam,
        lr=0.1,
        contractive=False,
        callbacks=callbacks,
        max_epochs=100,
        batch_size=8,
        device="cuda" if torch.cuda.is_available() else "cpu",
    )


def get_base_encoder(activations, hidden_layer_dims, tied_weights: bool = False):
    return FullyConnectedAE(
        input_size=20531,
        intrinsic_dim=intrinsic_dim,
        hidden_layer_dims=hidden_layer_dims,
        act_fn=activations,
        include_batch_norm=False,
        tied_weights=tied_weights,
    )

In [None]:
models = {
    "PCA": PCA(n_components=intrinsic_dim),
    "Linear shallow AE": get_autoencoder(get_base_encoder(nn.Identity, [])),
    "Linear shallow AE (tied)": get_autoencoder(
        get_base_encoder(nn.Identity, [], tied_weights=True)
    ),
    "Linear deep AE": get_autoencoder(get_base_encoder(nn.Identity, [256])),
    "Nonlinear shallow AE": get_autoencoder(get_base_encoder(nn.Sigmoid, [])),
    "Nonlinear shallow AE (tied)": get_autoencoder(
        get_base_encoder(nn.Sigmoid, [], tied_weights=True)
    ),
    "Nonlinear deep AE": get_autoencoder(get_base_encoder(nn.Sigmoid, [256])),
}

In [None]:
all_metrics = {}
for name, model in models.items():
    model.fit(X_train)
    Y = model.transform(X_train)
    metrics = model.evaluate(X_train, Y, max_K=100)
    all_metrics[name] = metrics

In [None]:
fig, axs = compare_metrics(all_metrics, figsize=(8, 8))
plt.show()

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(20, 5))
plt.style.use(["science", "scatter"])
labels = np.unique(y)
color_map = {label: i for i, label in enumerate(labels)}
colors = [color_map[label] for label in y.reshape(-1)]
for i, (ax, (name, model)) in enumerate(zip(axs.flat, models.items())):
    Y = model.transform(X_train)
    scatter = ax.scatter(Y[:, 0], Y[:, 1], c=colors, alpha=0.8)
plt.legend(*scatter.legend_elements(), loc="upper right", title="Classes")
plt.tight_layout()