## Import necessary libraries and functions

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [6]:
# tensorflow backend
from os import environ
environ['KERAS_BACKEND'] = 'tensorflow'
# import scientific py
import numpy as np
import pandas as pd
# rdkit stuff
from rdkit.Chem import AllChem as Chem
from rdkit.Chem import PandasTools
# plotting stuff
import matplotlib.pyplot as plt
import matplotlib as mpl
from IPython.display import SVG, display
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
# adding path with code to PATH variable
import sys
sys.path.insert(1, './chemical_vae')
# vae stuff
from chemvae.vae_utils import VAEUtils
from chemvae import mol_utils as mu

## Using the VAE

smiles <i class="fa fa-arrow-right" aria-hidden="true"></i> x <i class="fa fa-arrow-right" aria-hidden="true"></i> z <i class="fa fa-arrow-right" aria-hidden="true"></i> x_r <i class="fa fa-arrow-right" aria-hidden="true"></i> smiles_r

In [7]:
# creating the variational autoencoder and loading the pretrained model
vae = VAEUtils(directory='./chemical_vae/models/zinc_properties')

Using standarized functions? True
Standarization: estimating mu and std values ...done!


In [4]:
def encoding_decoding_prediction(vae, smiles):
    """ 
    The function to encode and decode SMILES
    representation of molecule with given VAE
  
    Parameters: 
    vae (VAEUtils): object containing VAE model 
    and property predictor 
  
    Returns: 
    str: SMILES of decoded molecule
    list: list of predicted molecule's properties (qed, SAS, logP)
  
    """
    X_1 = vae.smiles_to_hot(smiles,canonize_smiles=True)
    z_1 = vae.encode(X_1)
    X_r = vae.decode(z_1)
    
    smiles_decoded = vae.hot_to_smiles(X_r,strip=True)[0]

    print('{:20s} : {}'.format('Input',smiles))
    print('{:20s} : {}'.format('Reconstruction',smiles_decoded))
    print('{:20s} : {} with norm {:.3f}'.format('Z representation',z_1.shape, np.linalg.norm(z_1)))

    y_1 = vae.predict_prop_Z(z_1)[0]
    
    print('{:20s} : qed: {:.3f}, SAS: {:.3f}, logP: {:.3f}'.format('Predicted properties', y_1[0], y_1[1], y_1[2]))
    
    return smiles_decoded, y_1

## Encoding, decoding and predicting the properties of the given molecules

* Cc1ccc(S2(=O)=NC(=O)Nc3ccccc32)cc1
* CN(Cc1ccc2c(c1)C(=O)CC2)C(=O)OC(C)(C)C
* COC(=O)C1CCC(Oc2ccc(NC(=O)C(=O)NN)cn2)CC1

In [5]:
smiles = ['Cc1ccc(S2(=O)=NC(=O)Nc3ccccc32)cc1', 'CN(Cc1ccc2c(c1)C(=O)CC2)C(=O)OC(C)(C)C', 'COC(=O)C1CCC(Oc2ccc(NC(=O)C(=O)NN)cn2)CC1']

for i in smiles:
    encoding_decoding_prediction(vae, i)
    print(''.join(['*']*20))

Input                : Cc1ccc(S2(=O)=NC(=O)Nc3ccccc32)cc1
Reconstruction       : C(cccc(C[n+]2=NC(=O)Nc3ccccc32)cc1
Z representation     : (1, 196) with norm 10.274
Predicted properties : qed: 0.723, SAS: 2.410, logP: 3.147
********************
Input                : CN(Cc1ccc2c(c1)C(=O)CC2)C(=O)OC(C)(C)C
Reconstruction       : CN(C)cccc2c(c1)C(=O)CC2)C(=O)OC(C)(C)C
Z representation     : (1, 196) with norm 13.454
Predicted properties : qed: 0.812, SAS: 2.220, logP: 2.438
********************
Input                : COC(=O)C1CCC(Oc2ccc(NC(=O)C(=O)NN)cn2)CC1
Reconstruction       : COC(=O)C1CCC(Oc2ccc(NC(=O)C(=O)NN)cn2)CC1
Z representation     : (1, 196) with norm 13.123
Predicted properties : qed: 0.753, SAS: 2.478, logP: 0.050
********************
