In [71]:
import matplotlib

%matplotlib inline
# matplotlib.use("Agg")

import mdtraj as md
import numpy as np
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy
from scipy.spatial.distance import squareform
import pandas as pd
import os

# import nglview as nv
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.Draw import IPythonConsole
from IPython.display import SVG
IPythonConsole.drawOptions.addAtomIndices = True
IPythonConsole.molSize = 600,600

In [72]:
compound_index = int(snakemake.wildcards.compound_dir)

# load omega conformers as mdtraj traj and topology in rdkit
t_omega = md.load(snakemake.input.pdb)
print(t_omega)
conf_gens = Chem.MolFromPDBFile(
    snakemake.input.pdb, removeHs=False, sanitize=True
)
# if os.stat(snakemake.input.mol2).st_size != 0:
#     conf_gens = Chem.MolFromMol2File(
#         snakemake.input.mol2, removeHs=False, sanitize=True
#     )
conf_gens.RemoveAllConformers()
conf_gens

In [73]:
# Load MD reference topology as mdtraj-traj and topology as rdkit mol
t_ref = md.load(snakemake.input.ref_top)
print(t_ref)

ref_mol = Chem.MolFromPDBFile(
    snakemake.input.ref_pdb_equil, removeHs=False, sanitize=False
) # ref_pdb_equil

ref_mol = Chem.MolFromMol2File(
    snakemake.input.ref_equil_mol2, removeHs=False, sanitize=True
) # ref_pdb_equil

ref_mol.RemoveAllConformers()
ref_mol

In [75]:
# Try to assign bond orders from template, this does a substruct match. If this fails -> cheminfo struct. is missing atoms
addH = False
try: 
    AllChem.AssignBondOrdersFromTemplate(ref_mol, conf_gens)
except ValueError:
    print("Missing atoms in cheminfo!")
    addH = True
    # Add Hs to conf_gens
    conf_gens_add = Chem.AddHs(conf_gens)  # Gets stuck with this: addResidueInfo=True
    
    display(AllChem.AssignBondOrdersFromTemplate(conf_gens, conf_gens_add))
    hits_H = list(conf_gens_add.GetSubstructMatch(conf_gens))
    additional_Hs = set(range(conf_gens_add.GetNumAtoms())) - set(hits_H)
    # for aH in additional_Hs:
    #     atom = conf_gens_add.GetAtomWithIdx(aH)
    #     atom.Chem.Atom.GetPDBResidueInfo
    # store where the added H's are:
    print(additional_Hs)

In [76]:
conf_gens

In [81]:
# Now match to ref_mol
if addH:
    conf_gens_add = AllChem.AssignBondOrdersFromTemplate(ref_mol, conf_gens_add)
    missing_list = list(conf_gens_add.GetSubstructMatch(ref_mol))
    # Find missing atom indices.
    missing = set(range(conf_gens_add.GetNumAtoms())) - set(missing_list)
else:
    conf_gens = AllChem.AssignBondOrdersFromTemplate(ref_mol, conf_gens)
    missing_list = list(conf_gens.GetSubstructMatch(ref_mol))
#     missing_list = list(ref_mol.GetSubstructMatch(conf_gens))
    # Find missing atom indices.
    missing = set(range(conf_gens.GetNumAtoms())) - set(missing_list)
print("The following atoms of the ref_mol structure are missing in the cheminfo structures, or couldn't be matched")
print(missing)
d = rdMolDraw2D.MolDraw2DSVG(500, 500)  # or MolDraw2DCairo to get PNGs
if addH:
    rdMolDraw2D.PrepareAndDrawMolecule(d, conf_gens_add, highlightAtoms=missing)
else:
    rdMolDraw2D.PrepareAndDrawMolecule(d, conf_gens, highlightAtoms=missing)
d.FinishDrawing()
SVG(d.GetDrawingText())

In [83]:
xyz_omega = t_omega.xyz
# Omega -> omega with additional Hs order
if addH:
    xyz_omega = xyz_omega[:,hits_H,:]
    for i, aH in enumerate(list(additional_Hs)):
        # add 0s for additional Hs
        xyz_omega = np.concatenate((xyz_omega, np.zeros((xyz_omega.shape[0],1,3))), axis=1)
    # omega with additional Hs + appended Hs -> omega with addtional Hs as in rdkit
    xyz_omega = xyz_omega[:,hits_H + list(additional_Hs),:]

# Reorder (& cut out excess H's) according to MD ref
xyz_omega = xyz_omega[:,missing_list,:]

In [84]:
# Overwrite ref xyz with omega coordinates
t_ref.xyz = xyz_omega
t_ref

In [85]:
t_ref.save_pdb(snakemake.output.mol_aligned)