In [15]:
from autoencoders import get_ae, get_net
from torch.utils.data import DataLoader
import numpy as np
import yaml

Let's read some default config and make a model

In [3]:
with open("../ae_configs/dcec_default.yml") as f:
  model_config = yaml.safe_load(f)

In [4]:
model_config

{'model_cfg': {'arch': 'dcec',
  'encoder': {'arch': 'fc_vec',
   'linear_layer_type': 'euclidian',
   'l_hidden': [80],
   'activation': ['relu'],
   'out_activation': 'linear'},
  'decoder': {'arch': 'fc_vec',
   'linear_layer_type': 'euclidian',
   'l_hidden': [80],
   'activation': ['relu'],
   'out_activation': 'relu'},
  'x_dim': 512,
  'z_dim': 64,
  'n_clusters': 10}}

In [5]:
model = get_ae(**model_config["model_cfg"])

You can also configure encoder and decoder separatly if you design your own autoencoder logic.

In [17]:
encoder = get_net(**model_config["model_cfg"]["encoder"], in_dim=128, out_dim=64)
decoder = get_net(**model_config["model_cfg"]["decoder"], in_dim=64, out_dim=128)

Let's check the structure

In [6]:
model

DCEC(
  (encoder): FC_vec(
    (net): Sequential(
      (0): Linear(in_features=512, out_features=80, bias=True)
      (1): ReLU(inplace=True)
      (2): Linear(in_features=80, out_features=64, bias=True)
    )
  )
  (decoder): FC_vec(
    (net): Sequential(
      (0): Linear(in_features=64, out_features=80, bias=True)
      (1): ReLU(inplace=True)
      (2): Linear(in_features=80, out_features=512, bias=True)
      (3): ReLU(inplace=True)
    )
  )
  (loss): MSELoss()
  (clustering_layer): ClusteringLayer()
  (loss_kld): KLDivLoss()
  (loss_mse): MSELoss()
)

In [18]:
encoder

FC_vec(
  (net): Sequential(
    (0): Linear(in_features=128, out_features=80, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=80, out_features=64, bias=True)
  )
)

In [19]:
decoder

FC_vec(
  (net): Sequential(
    (0): Linear(in_features=64, out_features=80, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=80, out_features=128, bias=True)
    (3): ReLU(inplace=True)
  )
)

Now, let's generate some random data and fit the model

In [7]:
data = np.random.rand(1000, 512, ).astype("float32")

In [8]:
from autoencoders.dataset import EmbeddingDataset

In [9]:
dataset = EmbeddingDataset(data)

In [10]:
data_loader = DataLoader(dataset,
                        batch_size=8,
                        shuffle=True)

In [12]:
model.train(data_loader, 5)

Applying: 100%|██████████| 125/125 [00:00<00:00, 769.25batch/s]
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f1d0460d1f0>
Traceback (most recent call last):
  File "/home/ibuyanov/anaconda3/lib/python3.9/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/home/ibuyanov/anaconda3/lib/python3.9/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/home/ibuyanov/anaconda3/lib/python3.9/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/home/ibuyanov/anaconda3/lib/python3.9/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <

Having done with training, we can apply encoder to data. Make sure you pass a dataloader.

In [13]:
encoded_data = model.apply_encoder(data_loader)

Applying: 100%|██████████| 125/125 [00:00<00:00, 1074.45batch/s]


In [14]:
encoded_data.shape

(1000, 64)