In [1]:
import tensorflow as tf
import numpy as np
from keras import backend as K
from keras.layers import Input, Dense, LSTM, Lambda, Reshape
from keras.models import Model
from keras import objectives
from keras.layers.core import RepeatVector
from keras.losses import MSE

lstm_dim = 64
max_smiles_len = 100
latent_dim = 64
batch_size = 10

SMILES_CHARS = [' ',
                  '#', '%', '(', ')', '+', '-', '.', '/',
                  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                  '=', '@',
                  'A', 'B', 'C', 'F', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P',
                  'R', 'S', 'T', 'V', 'X', 'Z',
                  '[', '\\', ']',
                  'a', 'b', 'c', 'e', 'g', 'i', 'l', 'n', 'o', 'p', 'r', 's',
                  't', 'u','\n']

input_dim = (max_smiles_len, len(SMILES_CHARS))
output_dim = (max_smiles_len, len(SMILES_CHARS))


smi2index = dict((c, i) for i, c in enumerate(SMILES_CHARS))
index2smi = dict((i, c) for i, c in enumerate(SMILES_CHARS))

Using TensorFlow backend.


In [7]:
with open('smallsmiles.txt') as f:
    smiles_as_list = f.readlines()


def smiles_to_onehot(smiles, max_len = 100):
    onehot = np.zeros((max_len, len(SMILES_CHARS)))
    for i, c in enumerate(smiles):
        onehot[i, smi2index[c]] = 1
    return onehot


def smiles_decoder(onehot):
    smi = ''
    onehot = onehot.argmax( axis=-1 )
    for i in onehot:
        smi += index2smi[i]
    return smi

decoded_rnn_size = 64
encoded_rnn_size = 64
batch_size = 1


In [15]:
input = Input(shape=input_dim)
lstm = LSTM(latent_dim, activation='relu')(input)
zmean = Dense(latent_dim, name='Z_mean_t')(lstm)
zvar = Dense(latent_dim, name='Z_log_var_t', activation=tf.nn.softplus)(lstm)
z = Lambda(lambda m: m[0] + m[1] * tf.random.normal(tf.shape(m[0])))([zmean, zvar])
# z_reshaped = Reshape((-1, latent_dim))(z)
encoder = Model(input, z)

In [89]:
latent_inputs = Input(shape=(latent_dim,), name='z_sampling')
repeated = RepeatVector(100)(latent_inputs)
x_2 = LSTM(57, activation='relu', return_sequences=True)(repeated)
decoder = Model(latent_inputs, x_2)

In [99]:
Y = np.zeros((56, 64))
Y.shape

(56, 64)

In [100]:
decoder.compile(loss=dec_test_loss, optimizer='adam')

In [101]:
decoder.fit(Y, X, epochs=6, batch_size=10)

ValueError: An operation has `None` for gradient. Please make sure that all of your ops have a gradient defined (i.e. are differentiable). Common ops without gradient: K.argmax, K.round, K.eval.

In [34]:
def calculate_loss(x, x_decoded_mean):
    xent_loss = objectives.mse(x, x_decoded_mean)
    kl_loss = - 0.5 * K.mean(1 + zvar - K.square(zmean) - K.exp(zvar))
    loss = xent_loss + kl_loss
    return loss

In [58]:
def z_loss(x, x_new):
    xent_loss = objectives.mse(x, x_new)
    kl_loss = - 0.5 * K.mean(1 + zvar - K.square(zmean) - K.exp(zvar))
    #loss = kl_loss + xent_loss
    return kl_loss

In [93]:
def dec_test_loss(z_start, x_new):
    return objectives.mse(z_start, np.ones(shape=(batch_size,64)))

In [18]:
X = [smiles_to_onehot(x) for x in smiles_as_list]
X = np.array(numpy_X)
#X = tf.convert_to_tensor(numpy_X, dtype=tf.float32)
X.shape

(56, 100, 57)

In [20]:
Y = np.zeros((56, 64))
Y.shape

(56, 64)

In [59]:
encoder.compile(loss=z_loss, optimizer='adam')

Tensor("loss_7/lambda_1_loss/z_loss/mul:0", shape=(), dtype=float32)


In [60]:
encoder.fit(X, X, epochs=6, batch_size=10)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.callbacks.History at 0x260d1752fd0>

In [40]:
X1 = X[0:2]
X1 = np.reshape(X1, (2,100,57))

In [42]:
vec = encoder.predict(X1)
vec

array([[ 0.69962615, -0.19044453, -0.473943  ,  0.15709229,  0.38676664,
         0.09673379, -0.00552937,  0.27217484, -0.33069965,  0.10350725,
         0.04005065,  0.01897205,  0.4978163 ,  0.43282837,  0.07750034,
        -0.12980549, -0.0157441 ,  0.23288672, -0.16825461, -0.15380038,
        -0.506997  , -0.56558514, -0.318742  ,  0.0657466 ,  0.17788282,
        -0.01267723,  0.46297395, -0.05190086,  0.04366036, -0.49731696,
         0.49449486, -0.18058655,  0.16823865,  0.08973356,  0.26726362,
         0.07683934,  0.21143092,  0.06255142, -0.0999649 , -0.1656941 ,
        -0.24250764, -0.12202965,  0.19023865,  0.03327133,  0.23918997,
        -0.27183548,  0.05319056, -0.1087061 ,  0.01953014,  0.39397752,
         0.06285544, -0.03837576, -0.0708136 , -0.07815469, -0.3875594 ,
        -0.07003388, -0.06721469, -0.04812105,  0.01561065, -0.26681468,
         0.19939585, -0.02313525,  0.29807955, -0.29135686],
       [-0.09970035,  0.20681874,  0.23917086,  0.28901142, -0.

In [9]:
# vae_.compile(loss=calculate_loss, optimizer='adam')
# \

# outputs = decoder(encoder(X))
outputs = encoder(X)

vae = Model(input, outputs)
#    vae.compile(loss=zero_loss, optimizer='adam')
#    vae.predict(X, steps=1)




# vae.compile(loss=calculate_loss, optimizer='adam')
# vae.fit(X, X, steps_per_epoch=100, epochs=5)

# mu = vae.predict(X, steps=1)
# print(mu)

AttributeError: 'NoneType' object has no attribute '_inbound_nodes'