In [1]:
import torch
import torch.nn as nn
import numpy as np

import matplotlib.pyplot as plt
from collections import Counter

from base import *

Importing all assests from base.py...
Imported modules   : numpy, pandas, matplotlib.pyplot
Imported functions : npy, axes_off, get_var_name, shapes, tqdm, plot_history, minmax, values


# Testing different AE types (Linear and Convolutional)

In [2]:
from code.models import GroupedModel, BaseAutoencoder
from functools import partial # for creating different kinds of autoencoders

## Linear AE

In [3]:
# sanity check: inputs of shape 5 (1d), Linear-AE follows 5->2->1->2->5 (1 = embed_dim)

testconfig_linear = {
    'enc_channels': (5,2,1),
    'dec_channels': (1,2,5), 
    'bias': False, 
    'activations': nn.ReLU(),
    'device': 'cuda'
}

test_n_clusters = 2

LinearAutoencoder = partial(BaseAutoencoder, layer_type=nn.Linear)

ae = LinearAutoencoder(**testconfig_linear)
gae = GroupedModel(test_n_clusters, LinearAutoencoder, **testconfig_linear)

dummy_input = torch.randn(64, 5)        # 64 is the dummy batch size
dummy_true = torch.randn(64, 5) 
dummy_centers = torch.randn(test_n_clusters, 5)

print(f"Expected: (batch, {ae.enc_channels[-1]}), (batch, 5)")
embed, out = ae(x=dummy_input, return_embed=True)
shapes(embed, out)

print(f"\nExpected: (batch, {test_n_clusters}, {ae.enc_channels[-1]}), (batch, {test_n_clusters}, 5)")
embed, out = gae.forward_with_centers(x=dummy_input, centers=dummy_centers, return_embed=True)
shapes(embed, out)

print(f"\nExpected: (batch, {ae.enc_channels[-1]}), (batch, 5)")
embed, out = gae.forward_with_clust(x=dummy_input, centers=dummy_centers, clust=0, return_embed=True)
shapes(embed, out)

Expected: (batch, 1), (batch, 5)
embed  : torch.Size([64, 1])
out    : torch.Size([64, 5])

Expected: (batch, 2, 1), (batch, 2, 5)
embed  : torch.Size([64, 2, 1])
out    : torch.Size([64, 2, 5])

Expected: (batch, 1), (batch, 5)
embed  : torch.Size([64, 1])
out    : torch.Size([64, 5])


## Convolutional AE

In [4]:
# sanity check: inputs of shape (3,32,32), CNN-AE follows 3->10->20->10->3 (channels)

testconfig_conv = {
    'enc_channels': (3,10,20),
    'dec_channels': (20,10,3), 
    'kernel_size': 3, 
    'activations': nn.ReLU(),
    'device': 'cuda',
    'padding': 1
}

test_n_clusters = 2

CNNAutoencoder = partial(BaseAutoencoder, layer_type=nn.Conv2d)

ae = CNNAutoencoder(**testconfig_conv)
gae = GroupedModel(test_n_clusters, CNNAutoencoder, **testconfig_conv)

dummy_input_conv = torch.randn(64,3,32,32)
dummy_true_conv = torch.randn(64,3,32,32) 
dummy_centers_conv = torch.randn(test_n_clusters,3,32,32)

print(f"Expected: (batch, {ae.enc_channels[-1]}, 32, 32), (batch, 3, 32, 32)")
embed, out = ae(x=dummy_input_conv, return_embed=True)
shapes(embed, out)

print(f"\nExpected: (batch, {test_n_clusters}, {ae.enc_channels[-1]}, 32, 32), (batch, {test_n_clusters}, 3, 32, 32)")
embed, out = gae.forward_with_centers(x=dummy_input_conv, centers=dummy_centers_conv, return_embed=True)
shapes(embed, out)

print(f"\nExpected: (batch, {ae.enc_channels[-1]}, 32, 32), (batch, 3, 32, 32)")
embed, out = gae.forward_with_clust(x=dummy_input_conv, centers=dummy_centers_conv, clust=0, return_embed=True)
shapes(embed, out)

Expected: (batch, 20, 32, 32), (batch, 3, 32, 32)
embed  : torch.Size([64, 20, 32, 32])
out    : torch.Size([64, 3, 32, 32])

Expected: (batch, 2, 20, 32, 32), (batch, 2, 3, 32, 32)
embed  : torch.Size([64, 2, 20, 32, 32])
out    : torch.Size([64, 2, 3, 32, 32])

Expected: (batch, 20, 32, 32), (batch, 3, 32, 32)
embed  : torch.Size([64, 20, 32, 32])
out    : torch.Size([64, 3, 32, 32])


# TAE Working test

In [5]:
from code.tae import TensorizedAutoencoder, to_npy
from code.train_utils import train_tae

In [7]:
# train_tae working test for 1d data (N,5)

ae = LinearAutoencoder(**testconfig_linear)
gae = GroupedModel(test_n_clusters, LinearAutoencoder, **testconfig_linear)

tae = TensorizedAutoencoder(gae, dummy_true)
warmup, batched, clusts = train_tae(tae, dummy_input, dummy_true, 50, 5e-3, 8, warmup_optim=torch.optim.SGD, warmup=10, verbose=1, grad_clip=1)

PHASE 1: Warmup — 10/50


Epoch: 10/10 |████████████████████████████████| [00:04<00:00, loss: 0.9105]


PHASE 2: Batched — 40/50


Epoch: 40/40 |█████████████████████████| [00:00<00:00, loss: 0.8813, es: 1]
Epoch: 40/40 |█████████████████████████| [00:00<00:00, loss: 0.9561, es: 0]


In [8]:
# train_tae working test for image data (N,3,32,32)

ae = CNNAutoencoder(**testconfig_conv)
gae = GroupedModel(test_n_clusters, CNNAutoencoder, **testconfig_conv)

tae = TensorizedAutoencoder(gae, dummy_true_conv)
warmup, batched, clusts = train_tae(tae, dummy_input_conv, dummy_true_conv, 50, 5e-3, 8, warmup=0.2, warmup_optim=torch.optim.SGD, warmup_lr=5e-2, verbose=1, grad_clip=0.5)

PHASE 1: Warmup — 10/50


Epoch: 10/10 |████████████████████████████████| [00:06<00:00, loss: 0.9958]


PHASE 2: Batched — 40/50


Epoch: 40/40 |█████████████████████████| [00:01<00:00, loss: 0.9933, es: 0]
Epoch: 40/40 |█████████████████████████| [00:01<00:00, loss: 0.9592, es: 0]


# Testing TAE clustering performance on synthetic datasets

In [10]:
from data.data import parallel_line, orthogonal, triangle, lines_3D
from sklearn.metrics.cluster import adjusted_rand_score as ari
from sklearn.metrics import mean_squared_error as mse
from sklearn.cluster import KMeans
from code.train_utils import GenericDataset, train_ae


X, Y, X_noise, n_clusters = parallel_line(noise=0.1)
randperm = torch.randperm(len(X))
X, Y, X_noise = X[randperm].float(), Y[randperm].float(), X_noise[randperm].float()

In [11]:
config = {
    'enc_channels': (5,2),
    'dec_channels': (2,5), 
    'bias': False, 
    'activations': None     # data is a set of points and is relatively simple, no activations needed
}

In [12]:
ae = LinearAutoencoder(**config)
optimizer = torch.optim.Adam(ae.parameters(), 5e-3)
dataloader = GenericDataset(X, X).get_dataloader(batch_size=8, shuffle=False)

ae, losses = train_ae(ae, dataloader, optimizer, 50, verbose=1)

Epoch: 50/50 |█████████████████████████| [00:00<00:00, loss: 0.0060, es: 0]


In [13]:
# ari after AE
ari(KMeans(n_clusters, n_init='auto').fit(to_npy(ae.enc(X.float()))).labels_,Y)

0.06502463764112751

In [16]:
gae = GroupedModel(n_clusters, LinearAutoencoder, **config)
tae = TensorizedAutoencoder(gae, X)

warmup_losses, losses, clusts = train_tae(
    tae, X.float(), X.float(), 
    epochs=50, 
    lr=5e-3, batch_size=8, 
    warmup=0.4, warmup_optim=torch.optim.SGD, warmup_lr=5e-2, 
    verbose=1, grad_clip=0.5
)

PHASE 1: Warmup — 20/50


Epoch: 20/20 |████████████████████████████████| [00:04<00:00, loss: 0.8308]


PHASE 2: Batched — 30/50


Epoch: 30/30 |█████████████████████████| [00:00<00:00, loss: 0.0132, es: 0]
Epoch: 30/30 |█████████████████████████| [00:00<00:00, loss: 0.0160, es: 0]


In [17]:
# ari after TAE
ari(clusts, Y)

1.0

# Working with reconstruction mse

In [18]:
num_runs = 1
verbose = 1

config = {
    'enc_channels': (5,2),
    'dec_channels': (2,5), 
    'bias': False, 
    'activations': None
}

lae, ltae, lkm = [], [], []
for run in tqdm(range(num_runs), disable=verbose):
    # kmeans_ari = ari(KMeans(n_clusters).fit(to_npy(X)).labels_,Y)
    
    ae = LinearAutoencoder(**config)
    optimizer = torch.optim.Adam(ae.parameters(), 5e-3)
    dataloader = GenericDataset(X, X).get_dataloader(batch_size=8, shuffle=False)

    ae, ae_losses = train_ae(ae, dataloader, optimizer, 50, verbose=verbose)
    # ae_ari = ari(KMeans(n_clusters).fit(to_npy(ae.enc(X.float()))).labels_,Y)

    ae_mse = mse(to_npy(ae(X)), X)

    gae = GroupedModel(n_clusters, LinearAutoencoder, **config)
    tae = TensorizedAutoencoder(gae, X, regularizer_coef=0.1)

    warmup_losses, tae_losses, clusts = train_tae(tae, X.float(), X.float(), 150, lr=5e-3, batch_size=16, warmup=15, warmup_optim=torch.optim.SGD, warmup_lr=5e-2, verbose=verbose, grad_clip=0.5)
    # tae_ari = ari(clusts, Y)

    tae_mse = np.zeros(X.shape)
    for i in range(len(X)):
        tae_mse[i] = to_npy(tae.forward_with_clust(X[i:i+1], clusts[i:i+1]) + tae.centers[clusts[i]])

    tae_mse = mse(tae_mse, X)

    lae.append(ae_mse); ltae.append(tae_mse)

print(f"TAE: {np.mean(ltae):.4f}")
print(f"AE: {np.mean(lae):.4f}")

Epoch: 50/50 |█████████████████████████| [00:00<00:00, loss: 0.0059, es: 0]


PHASE 1: Warmup — 15/150


Epoch: 15/15 |████████████████████████████████| [00:03<00:00, loss: 1.0196]


PHASE 2: Batched — 135/150


Epoch: 135/135 |███████████████████████| [00:00<00:00, loss: 0.0055, es: 1]
Epoch: 135/135 |███████████████████████| [00:01<00:00, loss: 0.0060, es: 1]


TAE: 0.0061
AE: 0.0058


## Testing on MNIST