In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
import numpy as np

import matplotlib.pyplot as plt
from collections import Counter

from base import *

Importing all assests from base.py...
Imported modules   : numpy, pandas, matplotlib.pyplot
Imported functions : npy, axes_off, get_var_name, shapes, tqdm, plot_history, minmax, values


In [2]:
from code.models import GroupedModel, Autoencoder

In [3]:
# sanity check: inputs of shape 5 (1d), AE follows 5->2->1->2->5 (1 = embed_dim)

kwargs = {'enc_channels': (5,2,1),
          'dec_channels': (1,2,5), 
          'bias': False, 
          'activations': nn.ReLU(),
          'device': 'cuda'
          }

test_n_clusters = 2

ae = Autoencoder(**kwargs)
gae = GroupedModel(test_n_clusters, Autoencoder, **kwargs)

dummy_input = torch.randn(64, 5)
dummy_true = torch.randn(64,5) 
dummy_centers = torch.randn(test_n_clusters, 5)

In [4]:
embed, out = ae(x=dummy_input, return_embed=True)
shapes(embed, out)

embed, out = gae.forward_with_centers(x=dummy_input, centers=dummy_centers, return_embed=True)
shapes(embed, out)

embed, out = gae.forward_with_clust(x=dummy_input, centers=dummy_centers, clust=0, return_embed=True)
shapes(embed, out)

embed  : torch.Size([64, 1])
out    : torch.Size([64, 5])
embed  : torch.Size([64, 2, 1])
out    : torch.Size([64, 2, 5])
embed  : torch.Size([64, 1])
out    : torch.Size([64, 5])


In [5]:
from code.tae import TensorizedAutoencoder, to_npy
from code.train_utils import train_tae

In [6]:
# train_tae working test

ae = Autoencoder(**kwargs)
gae = GroupedModel(test_n_clusters, Autoencoder, **kwargs)

tae = TensorizedAutoencoder(gae, dummy_true)
warmup, batched, clusts = train_tae(tae, dummy_input, dummy_true, 50, 5e-3, 8, warmup_optim=torch.optim.SGD, warmup=10, verbose=1, grad_clip=1)

PHASE 1: Warmup — 10/50


Epoch: 10/10 |████████████████████████████████| [00:05<00:00, loss: 1.0347]


PHASE 2: Batched — 40/50


Epoch: 40/40 |█████████████████████████| [00:01<00:00, loss: 1.0592, es: 1]
Epoch: 40/40 |█████████████████████████| [00:00<00:00, loss: 0.9440, es: 0]


# Testing on datasets

In [7]:
from data.data import parallel_line, orthogonal, triangle, lines_3D
from sklearn.metrics.cluster import adjusted_rand_score as ari
from sklearn.metrics import mean_squared_error as mse
from sklearn.cluster import KMeans
from code.train_utils import GenericDataset, train_ae


X, Y, X_noise, n_clusters = parallel_line(noise=0.1)
randperm = torch.randperm(len(X))
X, Y, X_noise = X[randperm].float(), Y[randperm].float(), X_noise[randperm].float()

  warn(f"Failed to load image Python extension: {e}")


In [8]:
config = {'enc_channels': (5,2),
          'dec_channels': (2,5), 
          'bias': False, 
          'activations': None}

In [9]:
ae = Autoencoder(**config)
optimizer = torch.optim.Adam(ae.parameters(), 5e-3)
dataloader = GenericDataset(X, X).get_dataloader(batch_size=8, shuffle=False)

ae, losses = train_ae(ae, dataloader, optimizer, 50, verbose=1)

Epoch: 50/50 |█████████████████████████| [00:00<00:00, loss: 0.0059, es: 0]


In [10]:
# ari after AE
ari(KMeans(n_clusters, n_init='auto').fit(to_npy(ae.enc(X.float()))).labels_,Y)

0.17663738964066483

In [11]:
gae = GroupedModel(n_clusters, Autoencoder, **config)
tae = TensorizedAutoencoder(gae, X)

warmup_losses, losses, clusts = train_tae(
    tae, X.float(), X.float(), 
    epochs=50, 
    lr=5e-3, batch_size=8, 
    warmup=0.4, warmup_optim=torch.optim.SGD, warmup_lr=5e-2, 
    verbose=1, grad_clip=0.5
)

PHASE 1: Warmup — 20/50


Epoch: 20/20 |███████████████████████████████████| [00:04<00:00, loss: nan]


PHASE 2: Batched — 30/50


Epoch: 30/30 |████████████████████████████| [00:00<00:00, loss: nan, es: 0]


In [12]:
# ari after TAE
ari(clusts, Y)

0.0

# Working with reconstruction mse

In [13]:
num_runs = 1
verbose = 1

config = {'enc_channels': (5,2),
          'dec_channels': (2,5), 
          'bias': False, 
          'activations': None}

lae, ltae, lkm = [], [], []
for run in tqdm(range(num_runs), disable=verbose):
    # kmeans_ari = ari(KMeans(n_clusters).fit(to_npy(X)).labels_,Y)
    
    ae = Autoencoder(**config)
    optimizer = torch.optim.Adam(ae.parameters(), 5e-3)
    dataloader = GenericDataset(X, X).get_dataloader(batch_size=8, shuffle=False)

    ae, ae_losses = train_ae(ae, dataloader, optimizer, 50, verbose=verbose)
    # ae_ari = ari(KMeans(n_clusters).fit(to_npy(ae.enc(X.float()))).labels_,Y)

    ae_mse = mse(to_npy(ae(X)), X)

    gae = GroupedModel(n_clusters, Autoencoder, **config)
    tae = TensorizedAutoencoder(gae, X, regularizer_coef=0.1)

    warmup_losses, tae_losses, clusts = train_tae(tae, X.float(), X.float(), 150, lr=5e-3, batch_size=16, warmup=15, warmup_optim=torch.optim.SGD, warmup_lr=5e-2, verbose=verbose, grad_clip=0.5)
    # tae_ari = ari(clusts, Y)

    tae_mse = np.zeros(X.shape)
    for i in range(len(X)):
        tae_mse[i] = to_npy(tae.forward_with_clust(X[i:i+1], clusts[i:i+1]) + tae.centers[clusts[i]])

    tae_mse = mse(tae_mse, X)

    lae.append(ae_mse); ltae.append(tae_mse)

print(f"TAE: {np.mean(ltae):.4f}")
print(f"AE: {np.mean(lae):.4f}")

Epoch: 50/50 |█████████████████████████| [00:00<00:00, loss: 0.0059, es: 0]


PHASE 1: Warmup — 15/150


Epoch: 15/15 |████████████████████████████████| [00:05<00:00, loss: 1.5785]


PHASE 2: Batched — 135/150


Epoch: 135/135 |███████████████████████| [00:00<00:00, loss: 0.0066, es: 0]
Epoch: 135/135 |███████████████████████| [00:01<00:00, loss: 0.0056, es: 1]

TAE: 0.0057
AE: 0.0058





# Testing CNN autoencoders

In [14]:
class CNNDenseBlock(nn.Module):
    def __init__(self, in_features, out_features, kernel_size, activation=None):
        super().__init__()
        self.layer = nn.Conv2d(in_features, out_features, kernel_size=kernel_size, padding=1)
        self.act = activation

    def forward(self, x):
        if self.act is None:
            return self.layer(x)
        
        return self.act(self.layer(x))


class CNNAutoencoder(nn.Module):
    def __init__(self, enc_channels, dec_channels, kernel_size, activations=nn.ReLU(), device=None):
        """
        REQUIRES: device, enc_channels, dec_channels, bias, activations (nn.ReLU(), nn.GELU(), etc)
        """
        super(CNNAutoencoder, self).__init__()

        self.enc_channels, self.dec_channels = enc_channels, dec_channels
        self.activations = activations
        self.kernel_size = kernel_size
        self.device = device or 'cpu'

        if self.enc_channels[-1] != self.dec_channels[0]: 
            print("[WARN] First shape of dec_channels does not match the terminal channel in enc_channels, proceeding with additional layer...")
            self.dec_channels = (self.enc_channels[-1],)+self.dec_channels

        self.enc = nn.Sequential()
        for i in range(len(self.enc_channels)-1):
            self.enc.add_module(f'enc_dense{i}', CNNDenseBlock(self.enc_channels[i], self.enc_channels[i+1], kernel_size=self.kernel_size, activation=self.activations).to(self.device))

        self.dec = nn.Sequential()
        for i in range(len(self.dec_channels)-1):
            self.dec.add_module(f'dec_dense{i}', CNNDenseBlock(self.dec_channels[i], self.dec_channels[i+1], kernel_size=self.kernel_size, activation=self.activations).to(self.device))

    def forward(self, x, return_embed=False):
        x = x.to(self.device).float()
        embed = self.enc(x)
        out = self.dec(embed)

        if return_embed: return embed, out
        return out

In [15]:
# sanity check: inputs of shape 5 (1d), AE follows 5->2->1->2->5 (1 = embed_dim)

kwargs = {'enc_channels': (3,10,20),
          'dec_channels': (20,10,3), 
          'kernel_size': 3, 
          'activations': nn.ReLU(),
          'device': 'cuda'}

test_n_clusters = 2

ae = CNNAutoencoder(**kwargs)
gae = GroupedModel(test_n_clusters, CNNAutoencoder, **kwargs)

dummy_input = torch.randn(64,3,32,32)
dummy_true = torch.randn(64,3,32,32) 
dummy_centers = torch.randn(test_n_clusters,3,32,32)

In [16]:
print(f"Expected: (batch, {ae.enc_channels[-1]}, 32, 32), (batch, 3, 32, 32)")
embed, out = ae(x=dummy_input, return_embed=True)
shapes(embed, out)

print(f"\nExpected: (batch, {test_n_clusters}, {ae.enc_channels[-1]}, 32, 32), (batch, {test_n_clusters}, 3, 32, 32)")
embed, out = gae.forward_with_centers(x=dummy_input, centers=dummy_centers, return_embed=True)
shapes(embed, out)

print(f"\nExpected: (batch, {ae.enc_channels[-1]}, 32, 32), (batch, 3, 32, 32)")
embed, out = gae.forward_with_clust(x=dummy_input, centers=dummy_centers, clust=0, return_embed=True)
shapes(embed, out)

Expected: (batch, 20, 32, 32), (batch, 3, 32, 32)
embed  : torch.Size([64, 20, 32, 32])
out    : torch.Size([64, 3, 32, 32])

Expected: (batch, 2, 20, 32, 32), (batch, 2, 3, 32, 32)
embed  : torch.Size([64, 2, 20, 32, 32])
out    : torch.Size([64, 2, 3, 32, 32])

Expected: (batch, 20, 32, 32), (batch, 3, 32, 32)
embed  : torch.Size([64, 20, 32, 32])
out    : torch.Size([64, 3, 32, 32])


In [17]:
# train_tae working test

ae = CNNAutoencoder(**kwargs)
gae = GroupedModel(test_n_clusters, CNNAutoencoder, **kwargs)
tae = TensorizedAutoencoder(gae, dummy_true)

warmup, batched, clusts = train_tae(tae, dummy_input, dummy_true, 50, 5e-3, 8, warmup=0.2, warmup_optim=torch.optim.SGD, warmup_lr=5e-2, verbose=1, grad_clip=0.5)

PHASE 1: Warmup — 10/50


Epoch: 10/10 |████████████████████████████████| [00:07<00:00, loss: 0.9990]


PHASE 2: Batched — 40/50


Epoch: 40/40 |█████████████████████████| [00:01<00:00, loss: 0.9976, es: 0]
Epoch: 40/40 |█████████████████████████| [00:00<00:00, loss: 0.9448, es: 1]


## Testing on MNIST