### Dependencies for the interactive plots apart from rdkit, oechem and other qc* packages
 !conda install -c conda-forge plotly -y
 
 !conda install -c plotly jupyter-dash -y
 
 !conda install -c plotly plotly-orca -y

In [1]:
#imports
import numpy as np
from scipy import stats
import fragmenter
from openeye import oechem

In [2]:
TD_datasets = ['Fragment Stability Benchmark',
 'Fragmenter paper',
 'OpenFF DANCE 1 eMolecules t142 v1.0',
 'OpenFF Fragmenter Validation 1.0',
 'OpenFF Full TorsionDrive Benchmark 1',
 'OpenFF Gen 2 Torsion Set 1 Roche',
 'OpenFF Gen 2 Torsion Set 1 Roche 2',
 'OpenFF Gen 2 Torsion Set 2 Coverage',
 'OpenFF Gen 2 Torsion Set 2 Coverage 2',
 'OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy',
 'OpenFF Gen 2 Torsion Set 3 Pfizer Discrepancy 2',
 'OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy',
 'OpenFF Gen 2 Torsion Set 4 eMolecules Discrepancy 2',
 'OpenFF Gen 2 Torsion Set 5 Bayer',
 'OpenFF Gen 2 Torsion Set 5 Bayer 2',
 'OpenFF Gen 2 Torsion Set 6 Supplemental',
 'OpenFF Gen 2 Torsion Set 6 Supplemental 2',
 'OpenFF Group1 Torsions',
 'OpenFF Group1 Torsions 2',
 'OpenFF Group1 Torsions 3',
 'OpenFF Primary Benchmark 1 Torsion Set',
 'OpenFF Primary Benchmark 2 Torsion Set',
 'OpenFF Primary TorsionDrive Benchmark 1',
 'OpenFF Rowley Biaryl v1.0',
 'OpenFF Substituted Phenyl Set 1',
 'OpenFF-benchmark-ligand-fragments-v1.0',
 'Pfizer Discrepancy Torsion Dataset 1',
 'SMIRNOFF Coverage Torsion Set 1',
 'SiliconTX Torsion Benchmark Set 1',
 'TorsionDrive Paper']

In [12]:
def oeb2oemol(oebfile):
    """
    Takes in oebfile and generates oemolList
        Parameters
        ----------
        oebfile : String
            Title of an oeb file
        Returns
        -------
        mollist : List of objects
            List of OEMols in the .oeb file
            
    """
    ifs = oechem.oemolistream(oebfile)
    mollist = []

    for mol in ifs.GetOEGraphMols():
            mollist.append(oechem.OEGraphMol(mol))

    return mollist


def compute_r_ci(wbos, max_energies):
    return (stats.linregress(wbos, max_energies)[2])**2


def plot_interactive(fileList, t_id):
    """
    Takes in a list of oeb files and plots wbo vs torsion barrier, combining all the datasets and plotting by each tid in the combined dataset
    
    Note: ***Plot is interactive (or returns chemical structures) only for the last usage
    
    Parameters
    ----------
    fileList: list of strings
    each string is a oeb file name
    Eg. ['rowley.oeb'] or ['rowley.oeb', 'phenyl.oeb']
    
    t_id: str
    torsion id, eg., 't43'
    """
    import plotly.express as px
    from jupyter_dash import JupyterDash
    import dash_core_components as dcc
    import dash_html_components as html
    import pandas as pd
    import plotly.graph_objects as go
    from dash.dependencies import Input, Output
    from rdkit import Chem
    from rdkit.Chem.Draw import MolsToGridImage
    import base64
    from io import BytesIO
    from plotly.validators.scatter.marker import SymbolValidator
    import ntpath
    
    df = pd.DataFrame(columns = ['tid', 'tb', 'wbo', 'cmiles', 'TDindices', 'filename']) 
    fig = go.Figure({'layout' : go.Layout(height=900, width=1000,
            xaxis={'title': 'Wiberg Bond Order'},
            yaxis={'title': 'Torsion barrier (kJ/mol)'},
            margin={'l': 40, 'b': 40, 't': 10, 'r': 10},
            legend={'orientation': 'h', 'y': -0.2}, 
            legend_font=dict(family='Rockwell', color='black', size=14),
            hovermode=False,
            dragmode='select')})
    fig.update_xaxes(title_font=dict(size=18, family='Rockwell', color='black'), 
                     ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
                     tickfont=dict(family='Rockwell', color='black', size=14),
                     showgrid=False, gridwidth=1, gridcolor='black', 
                     mirror=True, linewidth=2, linecolor='black', showline=True)
    fig.update_yaxes(title_font=dict(size=18, family='Rockwell', color='black'),
                    ticks="outside", tickwidth=2, tickcolor='black', ticklen=10,
                    tickfont=dict(family='Rockwell', color='black', size=14),
                    showgrid=False, gridwidth=1, gridcolor='black', 
                    mirror=True, linewidth=2, linecolor='black', showline=True)
    colors = fragmenter.chemi._KELLYS_COLORS
    colors = colors * 2
    raw_symbols = SymbolValidator().values
    symbols = []
    for i in range(0,len(raw_symbols),8):
        symbols.append(raw_symbols[i])
    count = 0

    for fileName in fileList:
        molList = []
        molList = oeb2oemol(fileName)

        for m in molList:
            tid = m.GetData("IDMatch")
            fname = ntpath.basename(fileName)
            df = df.append({'tid': tid, 
                            'tb': m.GetData("TB"),
                            'wbo' : m.GetData("WBO"),
                            'cmiles' : m.GetData("cmiles"),
                            'TDindices' : m.GetData("TDindices"),
                            'filename' : fname}, 
                            ignore_index = True)
        
        x = df[(df.filename == fname) & (df.tid == t_id)].wbo
        y = df.loc[x.index].tb
        fig.add_scatter(x=x,
                        y=y,
                        mode="markers", 
                        name=fname, 
                        marker_color=colors[count],
                        marker_symbol=count,
                        marker_size=8)
        count += 1
    
    x = df[df.tid == t_id].wbo
    y = df.loc[x.index].tb
    slope, intercept, r_value, p_value, std_err =    stats.linregress(x, y)
    print("tid: ", t_id, "r_value: ", r_value, 
          "slope: ", slope, "intercept: ", intercept)

    fig.add_traces(go.Scatter(
        x=np.unique(x), 
        y=np.poly1d([slope, intercept])(np.unique(x)), 
        showlegend=False, mode ='lines'))
    slope_text = 'slope: '+str('%.2f' % slope)
    r_value = 'r_val: '+str('%.2f' % r_value)
    fig_text = slope_text + ', '+ r_value
    fig.add_annotation(text=fig_text, 
                       font = {'family': "Times", 'size': 18, 'color': 'black'},
                       xref="paper", yref="paper", x=1, y=0.2,
                       showarrow=False)
    
    graph_component = dcc.Graph(id="graph_id", figure=fig)
    image_component = html.Img(id="structure-image")

    app = JupyterDash(__name__) 

    app.layout = html.Div([
        html.Div([graph_component]), 
        html.Div([image_component])])

    @app.callback(
        Output('structure-image', 'src'),
        [Input('graph_id', 'selectedData')])
    def display_selected_data(selectedData):
        max_structs = 40
        structs_per_row = 1
        empty_plot = "data:image/gif;base64,R0lGODlhAQABAAAAACwAAAAAAQABAAA="
        if selectedData:
            if len(selectedData['points']) == 0:
                return empty_plot
            print("# of points selected = ", len(selectedData['points']))
            xval = [x['x'] for x in selectedData['points']]
            yval = [x['y'] for x in selectedData['points']]
            match_df = df[df['tb'].isin(yval) & df['tid'].isin([t_id])]
            smiles_list = list(match_df.cmiles)
            name_list = list(match_df.tid)
            name_list = []
            
            hl_atoms = []
            for i in range(len(smiles_list)):
#                 print(smiles_list[i])
                indices_tup = match_df.iloc[i].TDindices
                indices_list = [x + 1 for x in list(indices_tup)] 
                hl_atoms.append(indices_list)
                tid = match_df.iloc[i].tid
                tor_bar = match_df.iloc[i].tb
                wbo_tor = match_df.iloc[i].wbo
                cmiles_str = match_df.iloc[i].cmiles
                tmp = [str(tid), ':', 'TDindices [', str(indices_tup[0]+1),
                       str(indices_tup[1]+1), str(indices_tup[2]+1), 
                       str(indices_tup[3]+1), ']', 
                       'wbo:', str('%.2f'%(wbo_tor)), 
                       'TB:', str('%.2f'%(tor_bar)), '(kJ/mol)']
                name_list.append(' '.join(tmp))
            mol_list = [Chem.MolFromSmiles(x) for x in smiles_list]
            print(len(mol_list))
            img = MolsToGridImage(mol_list[0:max_structs], 
                                  subImgSize=(400, 400), 
                                  molsPerRow=structs_per_row, 
                                  legends=name_list)
#             ,
#                                   highlightAtomLists=hl_atoms)
            buffered = BytesIO()
            img.save(buffered, format="PNG", legendFontSize=60)
            encoded_image = base64.b64encode(buffered.getvalue())
            src_str = 'data:image/png;base64,{}'.format(encoded_image.decode())
        else:
            return empty_plot
        return src_str

    if __name__ == '__main__':   
        app.run_server(mode='inline', port=8061, debug=True)

    return fig


In [4]:
rowley_t43 = plot_interactive(['./FF_1.2.1/OpenFF Rowley Biaryl v1.0.oeb'], t_id='t43')

tid:  t43 r_value:  -0.045708731993487406 slope:  -36.37504650365637 intercept:  55.698882138701066


In [5]:
TD_working_oeb = ['./FF_1.2.1/'+x+'.oeb' for x in TD_datasets]
all_t43 = plot_interactive(TD_working_oeb, t_id='t43')

tid:  t43 r_value:  0.82272786065585 slope:  516.1768094538851 intercept:  -493.1300638906191


In [21]:
tig_ids = ['TIG8'] #, TIG1', 'TIG2', 'TIG3', 'TIG4', 'TIG5', 'TIG6', 'TIG7', 'TIG8']
TD_working_oeb = ['./FF_1.3.0-tig/'+x+'.oeb' for x in TD_datasets]
folder_name = './FF_1.3.0-tig/'
for iid in tig_ids:
    tmp = plot_interactive(TD_working_oeb, t_id=iid)
    tmp.write_image(folder_name+"fig_"+str(iid)+".pdf")

tid:  TIG8 r_value:  0.4168597107629522 slope:  136.121985107741 intercept:  -95.46544507009921


# of points selected =  1
[H:15][c:2]1[c:3]([c:6]([c:5]([c:7]([c:4]1[H:17])[N:14]([H:24])[C:8]2=[N:12][C:9]([C:10]([N:13]2[H:23])([H:21])[H:22])([H:19])[H:20])[H:18])[C:1]#[N:11])[H:16]
1
# of points selected =  4
[H:14][c:1]1[c:2]([c:5]([n:10][c:7]([c:3]1[H:16])[N:13]([H:23])[c:8]2[c:4]([c:6]([n:11][n:12]2[H:22])[C:9]([H:19])([H:20])[H:21])[H:17])[H:18])[H:15]
[H:14][c:1]1[c:2]([c:5]([n:10][c:7]([c:3]1[H:16])[N:13]([H:23])[c:8]2[c:4]([c:6]([n:11][n:12]2[H:22])[C:9]([H:19])([H:20])[H:21])[H:17])[H:18])[H:15]
2
# of points selected =  3
[H:16][C:1]1=[C:2]([C:5]([C:6]([C:7]([C:8]1([C:10]([H:26])([H:27])[H:28])[C:11]([H:29])([H:30])[H:31])([H:21])[H:22])([H:19])[H:20])([H:17])[H:18])/[C:3](=[N:12]/[N:14]([H:34])[C:4](=[O:15])[N:13]([H:32])[H:33])/[C:9]([H:23])([H:24])[H:25]
[H:12][c:1]1[c:2]([c:4]([n:8][c:5]([c:3]1[H:14])[N:9]([H:19])[C:6](=[O:11])[N:10]([H:20])[C:7]([H:16])([H:17])[H:18])[H:15])[H:13]
[H:12][c:1]1[c:2]([n:7][c:4]([n:8][c:3]1[H:14])[N:9]([H:18])[C:5](=[O:11])[N:10]([H:19]