In [11]:
# Process the data
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from process_data import TextPreprocess, Vocabulary


radonpy_data = pd.read_csv('./radonpy.csv')
# Remove the space of column name
radonpy_data.columns = radonpy_data.columns.str.replace(' ', '')

smiles_list = radonpy_data['smiles'].tolist()

smiles_process = TextPreprocess()
word2index, index2word, word_count = smiles_process.create_vocabulary(smiles_list)



In [12]:
from generative_model import VAE_Encoder, VAE_Decoder

word_count = 200
input_dim = 64
latent_dim = 32
nhead=1
hidden_dim = 128
output_dim = word_count

encoder = VAE_Encoder(word_count=word_count, 
                      d_model=input_dim, 
                      latent_dim=latent_dim, 
                      nhead=nhead)

decoder = VAE_Decoder(latent_dim=latent_dim,
                      d_model=input_dim,
                      hidden_dim=hidden_dim,
                      output_dim=output_dim)

In [13]:
x = smiles_process.text_to_index(smiles_list, padding=True)
x = torch.tensor(x, dtype=torch.int64)  # Convert to torch.long data type
x.shape

torch.Size([1077, 164])

In [14]:
embedding_x = encoder.embedding(x)
embedding_x.shape

torch.Size([1077, 164, 64])

In [15]:
z, mu, logvar = encoder(x)
z.shape, mu.shape, logvar.shape

(torch.Size([1077, 164, 32]),
 torch.Size([1077, 164, 32]),
 torch.Size([1077, 164, 32]))

In [16]:
start_signal = torch.randint(0,37,(1077, 164))
embedding_start_signal = encoder.embedding(start_signal)
embedding_start_signal.shape

torch.Size([1077, 164, 64])

In [17]:
new_linear = decoder.get_linear_layer(layer_dims=[32,64])
new_linear

Sequential(
  (0): Linear(in_features=32, out_features=64, bias=True)
  (1): ReLU()
)

In [18]:
new_linear_z = new_linear(z)
new_linear_z.shape

torch.Size([1077, 164, 64])

In [19]:
print('embedding_start_signal:', embedding_start_signal.shape)
print('new_linear_z:', new_linear_z.shape)

embedding_start_signal: torch.Size([1077, 164, 64])
new_linear_z: torch.Size([1077, 164, 64])


In [None]:
decoder_x = decoder_decoder_x = decoder.decoder(embedding_start_signal, new_linear_z)

In [30]:
rnn_layer = nn.RNN(input_size=32, hidden_size=64, num_layers=5, batch_first=True)

rnn_test_x = torch.randn(1077, 1, 32)
rnn_test_x_out, rnn_test_x_hidden = rnn_layer(rnn_test_x)
rnn_test_x_out.shape, rnn_test_x_hidden.shape


(torch.Size([1077, 1, 64]), torch.Size([5, 1077, 64]))

In [29]:
rnn_test_x_out2, rnn_test_x_hidden2 = rnn_layer(rnn_test_x, rnn_test_x_hidden)
rnn_test_x_out2.shape, rnn_test_x_hidden2.shape

(torch.Size([1077, 1, 64]), torch.Size([1, 1077, 64]))