## Load libraries

In [None]:
# tensorflow backend
from os import environ
environ['KERAS_BACKEND'] = 'tensorflow'
# vae stuff
from chemical_vae.chemvae.vae_utils import VAEUtils
from chemical_vae.chemvae import mol_utils as mu
# import scientific py
import numpy as np
import pandas as pd
# rdkit stuff
from rdkit.Chem import AllChem as Chem
from rdkit.Chem import PandasTools
# plotting stuff
import matplotlib.pyplot as plt
import matplotlib as mpl
from IPython.display import SVG, display
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

## Load a model

In [None]:
vae = VAEUtils(directory='chemical_vae/models/zinc_properties')

## Decode/Encode 

Might not be perfect (it's probabilistic). Try several times if needed.

smiles <i class="fa fa-arrow-right" aria-hidden="true"></i> x <i class="fa fa-arrow-right" aria-hidden="true"></i> z <i class="fa fa-arrow-right" aria-hidden="true"></i> x_r <i class="fa fa-arrow-right" aria-hidden="true"></i> smiles_r

In [11]:
smiles_1 = mu.canon_smiles('Cc1ccc(S2(=O)=NC(=O)Nc3ccccc32)cc1')
smiles_2 = mu.canon_smiles('CN(Cc1ccc2c(c1)C(=O)CC2)C(=O)OC(C)(C)C')
smiles_3 = mu.canon_smiles('COC(=O)C1CCC(Oc2ccc(NC(=O)C(=O)NN)cn2)CC1')

x1 = vae.smiles_to_hot(smiles_1,canonize_smiles=True)
x2 = vae.smiles_to_hot(smiles_2,canonize_smiles=True)
x3 = vae.smiles_to_hot(smiles_3,canonize_smiles=True)

z1 = vae.encode(x1)
z2 = vae.encode(x2)
z3 = vae.encode(x3)

x1_r= vae.decode(z1)
x2_r= vae.decode(z2)
x3_r= vae.decode(z3)

print('{:20s} : {}'.format('Input 1',smiles_1))
print('{:20s} : {}'.format('Input 2',smiles_2))
print('{:20s} : {}'.format('Input 3',smiles_3))

print('{:20s} : {}'.format('Reconstruction 1',vae.hot_to_smiles(x1_r,strip=True)[0]))
print('{:20s} : {}'.format('Reconstruction 2',vae.hot_to_smiles(x2_r,strip=True)[0]))
print('{:20s} : {}'.format('Reconstruction 3',vae.hot_to_smiles(x3_r,strip=True)[0]))

print('{:20s} : {} with norm {:.3f}'.format('Z representation 1',z1.shape, np.linalg.norm(z1)))
print('{:20s} : {} with norm {:.3f}'.format('Z representation 2',z2.shape, np.linalg.norm(z2)))
print('{:20s} : {} with norm {:.3f}'.format('Z representation 3',z3.shape, np.linalg.norm(z3)))

Input 1              : Cc1ccc(S2(=O)=NC(=O)Nc3ccccc32)cc1
Input 2              : CN(Cc1ccc2c(c1)C(=O)CC2)C(=O)OC(C)(C)C
Input 3              : COC(=O)C1CCC(Oc2ccc(NC(=O)C(=O)NN)cn2)CC1
Reconstruction 1     : Cc1ccc(-](N)C(NC(=O)Nc3ccccc32)cc1
Reconstruction 2     : CN(C)cccc2c(c1)C(=O)CC2)C(=O)OC(C)(C)C
Reconstruction 3     : COC(=O)C1CCC(Oc2ccc(NC(=O)C(=O)NN)cn2)CC1
Z representation 1   : (1, 196) with norm 10.296
Z representation 2   : (1, 196) with norm 13.437
Z representation 3   : (1, 196) with norm 13.102


## Property prediction

Now we predict Quantitative Estimation of Drug-likeness (QED), the synthetic accessibility score (SAS), and the water–octanol partition coefficient (logP)

In [15]:
#print('Properties (qed,SAS,logP):')
y1 = vae.predict_prop_Z(z1)[0]
y2 = vae.predict_prop_Z(z2)[0]
y3 = vae.predict_prop_Z(z3)[0]

print('{:50}   {}'.format('Chemical structure', 'Properties [qed,SAS,logP]'))
print('{:50} : {}'.format(smiles_1, y1))
print('{:50} : {}'.format(smiles_2, y2))
print('{:50} : {}'.format(smiles_3, y3))

Chemical structure                                   Properties [qed,SAS,logP]
Cc1ccc(S2(=O)=NC(=O)Nc3ccccc32)cc1                 : [0.72313255 2.4103725  3.1467233 ]
CN(Cc1ccc2c(c1)C(=O)CC2)C(=O)OC(C)(C)C             : [0.81158835 2.2198553  2.4382763 ]
COC(=O)C1CCC(Oc2ccc(NC(=O)C(=O)NN)cn2)CC1          : [0.7531525  2.4784982  0.05034626]
