In [1]:
import numpy as np
import psycopg2
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
sns.set(context='talk', style='ticks',
        color_codes=True, rc={'legend.frameon': False})

%matplotlib inline

In [2]:
import os
import sys
sys.path.append('..')

from alphazero import config
import stable_rad_config

In [None]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    # Currently, memory growth needs to be the same across GPUs
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
import nfp

In [None]:
import rdkit
import networkx as nx
from run_mcts import StabilityNode

from reward import calc_reward_inner

In [None]:
node = StabilityNode(rdkit.Chem.MolFromSmiles('C(N)O'))

In [3]:
from alphazero.molecule import build_molecules, EnumerateStereoisomers, StereoEnumerationOptions

INFO:rdkit:Enabling RDKit 2020.09.4 jupyter extensions


In [6]:
import rdkit
from rdkit import Chem

In [None]:
mol = rdkit.Chem.MolFromSmiles('CC(N)O')
Chem.SanitizeMol(mol)
rdkit.Chem.rdmolops.AssignStereochemistry(mol)

In [None]:
opts = StereoEnumerationOptions(unique=True)
[Chem.MolToSmiles(isomer) for isomer in EnumerateStereoisomers(mol, options=opts)]

In [None]:
Chem.MolToSmiles(Chem.MolFromSmiles('C[C@H](N)O'))

In [9]:
[rdkit.Chem.MolToSmiles(mol) for mol in build_molecules(rdkit.Chem.MolFromSmiles('C(N)O'))]

['C[C@H](N)O',
 'C[C@@H](N)O',
 'C=C(N)O',
 'NC(N)O',
 'N=C(N)O',
 'NC(O)O',
 'NC(=O)O',
 'CNCO',
 'COCN']

In [None]:
calc_reward_inner(StabilityNode(rdkit.Chem.MolFromSmiles('C(N)O')))

In [None]:
# Load the reward buffer to plot the optimal molecules
with psycopg2.connect(**config.dbparams) as conn:
    rew_df = pd.read_sql_query("select * from {}_reward".format(config.sql_basename), conn)

rew_df.shape

In [None]:
rew_df.sort_values('real_reward', ascending=False).head(10)

In [None]:
import rdkit

from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.Draw import SimilarityMaps
from rdkit.Chem import rdDepictor
from IPython.display import SVG

In [None]:
from reward import predict, stability_model
from alphazero.node import Node

def pred_spins(mol):
    spins, _ = predict(stability_model,
        {key: tf.constant(np.expand_dims(val, 0))
         for key, val in Node(mol).policy_inputs.items()})
    
    return spins.numpy().flatten()

def pred_bv(mol):
    _, bv = predict(stability_model,
        {key: tf.constant(np.expand_dims(val, 0))
         for key, val in Node(mol).policy_inputs.items()})
    
    return bv.numpy().flatten()

redox_model = tf.keras.models.load_model(
    '/projects/rlmolecule/pstjohn/models/20201030_redox_model/',
    compile=False)

@tf.function(experimental_relax_shapes=True)                
def predict_redox_tf(inputs):
    return redox_model.predict_step(inputs)

def pred_redox(mol):
    return predict_redox_tf(
        {key: tf.constant(np.expand_dims(val, 0))
         for key, val in Node(mol).policy_inputs.items()}).numpy().flatten()

In [None]:
mol = rdkit.Chem.MolFromSmiles('C=C(C(C)C)C(C([O])=O)(N(C)C)N(C)C')

d = rdMolDraw2D.MolDraw2DSVG(400, 400)
SimilarityMaps.GetSimilarityMapFromWeights(mol, [float(i) for i in pred_spins(mol)], draw2d=d)
d.FinishDrawing()
SVG(d.GetDrawingText())

In [None]:
high_reward = rew_df[rew_df.real_reward > 100].copy()

In [None]:
high_reward.head()

In [None]:
from alphazero.preprocessor import preprocessor

redox_dataset = tf.data.Dataset.from_generator(
    lambda: (preprocessor.construct_feature_matrices(Chem.MolFromSmiles(smiles), train=False) for smiles in high_reward.smiles),
    output_types=preprocessor.output_types,
    output_shapes=preprocessor.output_shapes)\
    .padded_batch(batch_size=128, padded_shapes=preprocessor.padded_shapes(),
                  padding_values=preprocessor.padding_values)

In [None]:
predicted_redox = redox_model.predict(redox_dataset, verbose=1)
high_reward['IE'] = predicted_redox[:, 0]
high_reward['EA'] = predicted_redox[:, 1]

In [None]:
from matplotlib.patches import Polygon

fig = plt.figure(figsize=(4,4))
ax = fig.add_subplot(111, aspect='equal')

ax.plot(high_reward['EA'], high_reward['IE'], '.', ms=1)
ax.add_patch(Polygon(np.array([(-.5, 1.2), (-.5, 0.5), (.2, 1.2)]), facecolor='none', zorder=5, edgecolor='r', lw=3, ls='--'))
ax.set_xlim([-1.5, 1.5])
ax.set_ylim([-1, 2])

ax.axvspan(-.5, .2, facecolor='.8', edgecolor='none')
ax.axhspan(.5, 1.2, facecolor='.8', edgecolor='none')

ax.set_xlabel(r'$E_{1/2}^o$, EA (V)')
ax.set_ylabel(r'$E_{1/2}^o$, IE (V)')

In [None]:
from tqdm import tqdm
tqdm.pandas()

In [None]:
def prepare_for_bde(smiles):

    mol = Chem.MolFromSmiles(smiles)
    radical_index = None
    for i, atom in enumerate(mol.GetAtoms()):
        if atom.GetNumRadicalElectrons() != 0:
            assert radical_index == None
            is_radical = True
            radical_index = i

            atom.SetNumExplicitHs(atom.GetNumExplicitHs() + 1)
            atom.SetNumRadicalElectrons(0)
            break

    radical_rank = Chem.CanonicalRankAtoms(mol, includeChirality=True)[radical_index]

    mol_smiles = Chem.MolToSmiles(mol)
    mol = Chem.MolFromSmiles(mol_smiles)

    radical_index_reordered = list(Chem.CanonicalRankAtoms(
        mol, includeChirality=True)).index(radical_rank)

    molH = Chem.AddHs(mol)
    for bond in molH.GetAtomWithIdx(radical_index_reordered).GetBonds():
        if 'H' in {bond.GetBeginAtom().GetSymbol(), bond.GetEndAtom().GetSymbol()}:
            bond_index = bond.GetIdx()
            break
    else:
        bond_index = None
            
    return pd.Series({
        'mol_smiles': mol_smiles,
        'radical_index_mol': radical_index_reordered,
        'bond_index': bond_index
    })


radical_bonds = high_reward.smiles.progress_apply(prepare_for_bde)

In [None]:
sys.path.append('/projects/rlmolecule/pstjohn/models/20201031_bde/')
from preprocess_inputs import preprocessor as bde_preprocessor
bde_preprocessor.from_json('/projects/rlmolecule/pstjohn/models/20201031_bde/preprocessor.json')

bde_model = tf.keras.models.load_model(
    '/projects/rlmolecule/pstjohn/models/20201031_bde/',
    compile=False)

bde_dataset = tf.data.Dataset.from_generator(
    lambda: (bde_preprocessor.construct_feature_matrices(smiles, train=False) for smiles in radical_bonds.mol_smiles),
    output_types=bde_preprocessor.output_types,
    output_shapes=bde_preprocessor.output_shapes)\
    .padded_batch(batch_size=128, padded_shapes=bde_preprocessor.padded_shapes(max_bonds=100),
                  padding_values=bde_preprocessor.padding_values)

def bde_check_valid(smiles):
    inputs = bde_preprocessor.construct_feature_matrices(smiles, train=False)
    return not (inputs['atom'] == 1).any() | (inputs['bond'] == 1).any()
    
is_valid = high_reward.smiles.progress_apply(bde_check_valid)
radical_bonds['is_valid'] = is_valid

In [None]:
pred_bdes, pred_bdfes = bde_model.predict(bde_dataset, verbose=True)

In [None]:
high_reward['BDE'] = [pred_bdes[i, n][0] for i, n in enumerate(radical_bonds.bond_index)]
high_reward['V_diff'] = high_reward['IE'] - high_reward['EA']
valid_subset = high_reward[radical_bonds.is_valid & (high_reward.BDE < 80)]

In [None]:
from matplotlib.patches import Polygon

fig = plt.figure(figsize=(4,4))
ax = fig.add_subplot(111, aspect='equal')

ax.plot(valid_subset['EA'], valid_subset['IE'], '.', ms=1)
ax.add_patch(Polygon(np.array([(-.5, 1.2), (-.5, 0.5), (.2, 1.2)]), facecolor='none', zorder=5, edgecolor='r', lw=3, ls='--'))
ax.set_xlim([-1.5, 1.5])
ax.set_ylim([-1, 2])

ax.axvspan(-.5, .2, facecolor='.8', edgecolor='none')
ax.axhspan(.5, 1.2, facecolor='.8', edgecolor='none')

ax.set_xlabel(r'$E_{1/2}^o$, EA (V)')
ax.set_ylabel(r'$E_{1/2}^o$, IE (V)')

In [None]:
pass_gng = valid_subset[(valid_subset['EA'] > -.5) &
                        (valid_subset['IE'] < 1.2) &
                        (valid_subset['V_diff'] > 1)]

In [None]:
pass_gng.IE.min()

In [None]:
from matplotlib.patches import Polygon

fig = plt.figure(figsize=(4,4))
ax = fig.add_subplot(111, aspect='equal')

ax.plot(pass_gng['EA'], pass_gng['IE'], '.', ms=1)
ax.add_patch(Polygon(np.array([(-.5, 1.2), (-.5, 0.5), (.2, 1.2)]), facecolor='none', zorder=5, edgecolor='r', lw=3, ls='--'))
ax.set_xlim([-1.5, 1.5])
ax.set_ylim([-1, 2])

# ax.axvspan(-.5, .2, facecolor='.8', edgecolor='none')
# ax.axhspan(.5, 1.2, facecolor='.8', edgecolor='none')

ax.set_xlabel(r'$E_{1/2}^o$, EA (V)')
ax.set_ylabel(r'$E_{1/2}^o$, IE (V)')

In [None]:
pass_gng.sort_values('real_reward', ascending=False)

In [None]:
# pass_gng.to_csv('20201031_gng_radicals.csv')

In [None]:
to_plot = pass_gng.sample(64)
d2d = rdMolDraw2D.MolDraw2DSVG(150*8, 150*8, 150, 150)
opts = d2d.drawOptions()
d2d.DrawMolecules([Chem.MolFromSmiles(row.smiles) for _, row in to_plot.iterrows()],
                  highlightAtoms=[(int(row.atom_index),) for _, row in to_plot.iterrows()])
d2d.FinishDrawing()
SVG(d2d.GetDrawingText())