In [None]:
import math
import itertools
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import rdkit as rd
from rdkit import Chem

from rdkit.Chem import Draw, PandasTools

from rdkit.Chem import Descriptors,AllChem, rdChemReactions, rdDepictor
from rdkit.Chem import rdRGroupDecomposition as r_decomp
# %matplotlib qt

In [None]:
from openmm.app import *
from openmm import *
from openmm.unit import *
from sys import stdout

In [2]:
import nglview as nv

view = nv.show_structure_file(nv.datafiles.PDB)
view

NGLWidget()

In [3]:
view.clear_representations()

# add licorice without hydrogen
view.add_licorice('not hydrogen')

In [None]:
'C1CCC(Br)C=C1C'
'CC1CCCC(Br)C=1'
'CC1=CC(Br)CCC1'
Chem.MolFromSmarts('C=CC=C')

# Viewer

In [None]:
import subprocess

cmd = subprocess.Popen(['pymol', '-Rq'])

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import PyMol
from rdkit.Chem.Subshape import SubshapeAligner, SubshapeBuilder, SubshapeObjects
# mols = [m for m in Chem.SDMolSupplier('cdk2.sdf')]
# for m in mols:
#     molid = m.GetProp('id')
#     m.SetProp('_Name', molid) #_Name prop is required for align with shape-it

# ref = Chem.Mol(mols[0].ToBinary())
# probe = Chem.Mol(mols[1].ToBinary())
# ref,probe = mol1,mol1
# AllChem.CanonicalizeConformer(ref.GetConformer())
builder = SubshapeBuilder.SubshapeBuilder()
# builder.gridDims = (20.,20.,10)
# builder.gridSpacing=0.5
# builder.winRad = 4.
# refShape = builder.GenerateSubshapeShape(mol1)
# probeShape = builder.GenerateSubshapeShape(probe)

# aligner = SubshapeAligner.SubshapeAligner()
# algs = aligner.GetSubshapeAlignments(ref, refShape, probe, probeShape, builder)

# alg = algs[0]
# AllChem.TransformMol(probe, alg.transform)
# newprobeShape = builder(probe)

v = PyMol.MolViewer()
v.ShowMol(mol1, name='ref')
# SubshapeObjects.DisplaySubshape(v, refShape, 'ref_Shape')
v.server.do('set transparency=0.5')
# v.ShowMol(probe, name='probe', showOnly=False)
# SubshapeObjects.DisplaySubshape(v, newprobeShape, 'prob_Shape')
# v.server.do('set transparency=0.5')

v.GetPNG()

# Start

In [None]:
smiles_list = [
    'C(C(=O)O)N',
    'N[C@@H](CC1=CC=CC=C1)C(O)=O',
    'O=C([C@H](CC1=CNC=N1)N)O',
    'C([C@@H](C(=O)O)N)S'
]
mol_list = []
#print them out:
for smiles in smiles_list:
    mol = Chem.MolFromSmiles(smiles)
    mol_list.append(mol)
#show all at once
img = Draw.MolsToGridImage(mol_list, molsPerRow=4)
img

In [None]:
pattern = Chem.MolFromSmiles('N')

for mol in mol_list:
    print(mol.HasSubstructMatch(pattern))


In [None]:
glycine = Chem.MolFromSmiles('C(C(=O)O)N')
fp = AllChem.GetMorganFingerprintAsBitVect(glycine, 2, nBits=1024)
#currently, it's a long list of 0s and 1s.
fp_arr = np.zeros((1, ))
Chem.DataStructs.ConvertToNumpyArray(fp, fp_arr)
np.nonzero(fp_arr)

In [None]:
bi = {}
#want bitInfo to put in the dictionary
fp = AllChem.GetMorganFingerprintAsBitVect(glycine, 2, nBits=1024, bitInfo=bi)
fp_arr = np.zeros((1, ))
Chem.DataStructs.ConvertToNumpyArray(fp, fp_arr)
np.nonzero(fp_arr)

prints = [(glycine, x, bi) for x in fp.GetOnBits()]
Draw.DrawMorganBits(prints,
                    molsPerRow=4,
                    legends=[str(x) for x in fp.GetOnBits()])


In [None]:
df = pd.read_csv('amino_acids.smiles', sep='|', header=0)
df['mol'] = df['smiles'].apply(Chem.MolFromSmiles)
df[['name', 'mol']].apply(
    lambda row: (row['name'], row['mol'].SetProp('_Name', row['name'])),
    axis='columns',
)
df.loc[0], df.loc[7] = df.loc[7], df.loc[0]
df.head()

In [None]:
print(Chem.MolToMolBlock(df['mol'].loc[0]))

In [None]:
Draw.MolsToGridImage(mols=df['mol'], subImgSize=(400, 90))

# Decomposition

In [None]:
core = Chem.MolFromSmiles('NCC(=O)O')
core2 = Chem.MolFromSmiles('[*:1][C@H](N[*:2])C(O)=O')

In [None]:
plt.imshow(Draw.MolToImage(core))

In [None]:
r_decomp.RGroupDecompositionParameters().onlyMatchAtRGroups = True
res, fails = r_decomp.RGroupDecompose([core], df['mol'], asRows=False)

PandasTools.RGroupDecompositionToFrame(
    res,
    [df['mol'][i] for i in range(len(df['mol'])) if i not in fails],
)

# Find bonds

In [None]:
test = Chem.MolFromSmiles(
    '[H]N[C@@H]([C@@H](C)O)C(=O)N[C@@H](CCCNC(N)=N)C(=O)N[C@@H](CCSC)C(O)=O')
test

In [None]:
core_bonds = Chem.MolFromSmarts('NCC(=O)NCC(=O)')
core_bonds

In [None]:
# generate coordinates and orient canonically
rdDepictor.SetPreferCoordGen(True)
rdDepictor.Compute2DCoords(test)

# matching atoms
hit_atoms = test.GetSubstructMatches(core_bonds)
hit_atoms = list(itertools.chain(*hit_atoms))

# matching bonds, annotate the peptide bond
hit_bonds = []
for bond in test.GetBonds():
    aid1 = bond.GetBeginAtomIdx()
    aid2 = bond.GetEndAtomIdx()
    if aid1 in hit_atoms and aid2 in hit_atoms:
        hit_bonds.append(bond.GetIdx())

Draw.MolToImage(
    test,
    highlightAtoms=hit_atoms,
    highlightBonds=hit_bonds,
)


In [None]:
reaction = rdChemReactions.ReactionFromSmarts(
    '[N:1][C:2][C:3](=[O:4])[N:5][C:6][C:7](=[O:8])>>[N:1][C:2][C:3](=[O:4])O.[N:5][C:6][C:7](=[O:8])'
)
reaction

In [None]:
reactants = [test]

In [None]:
products = reaction.RunReactants(reactants,maxProducts=100)[0]

In [None]:
plt.imshow(Draw.MolToImage(products[1]))

In [None]:
# Check pepdides
def check_identity(mol1, mol2):
    return all([
        mol1.HasSubstructMatch(mol2, useChirality=True),
        mol2.HasSubstructMatch(mol1, useChirality=True)
    ])


def check_if_in_list(mol, mols):
    for mol_ in mols:
        if check_identity(mol, mol_):
            return True
    else:
        return False


def is_peptide(mol, known_amino_acids, reaction):
    # print(f'processing structure {Chem.MolToSmiles(mol)}')

    if check_if_in_list(mol, known_amino_acids):
        # print(f'structure {Chem.MolToSmiles(mol)}: amino acid')
        return True

    reacts = (mol, )
    products = reaction.RunReactants(reacts, maxProducts=1)

    if not products:
        # print('peptide bond hydrolysis could not be applied')
        return False

    else:
        mol1 = products[0][0]
        Chem.SanitizeMol(mol1)
        mol2 = products[0][1]
        Chem.SanitizeMol(mol2)
        # print(
        #     f'applied hydrolysis reaction: {Chem.MolToSmiles(mol)} -> {Chem.MolToSmiles(mol1)} + {Chem.MolToSmiles(mol2)}'
        # )

        return all([
            is_peptide(mol1, known_amino_acids, reaction),
            is_peptide(mol2, known_amino_acids, reaction)
        ])


In [None]:
# 5 amino acids
structures = df[['name', 'mol']].assign(type='amino acid').iloc[:5]

# di-peptide: ArgAla
smi = '[H]N[C@@H](CCCNC(N)=N)C(=O)N[C@@H](C)C(O)=O'
mol1 = Chem.MolFromSmiles(smi)
mol1 = pd.Series({
    'name': 'ArgAla', 'mol': mol1, 'type': 'non-peptide'
}).to_frame().T

# oligo-peptide: ArgAlaThreMeth
smi = '[H]N[C@@H](CCCNC(N)=N)C(=O)N[C@@H](C)C(=O)N[C@@H]([C@@H](C)O)C(=O)N[C@@H](CCSC)C(O)=O'
mol2 = Chem.MolFromSmiles(smi)
mol2 = pd.Series({
    'name': 'ArgAlaThreMeth', 'mol': mol2, 'type': 'non-peptide'
}).to_frame().T

# longer-peptide: ATTAMSSTA
smi = 'CSCC[C@H](NC(=O)[C@H](C)NC(=O)[C@@H](NC(=O)[C@@H](NC(=O)[C@H](C)N)[C@@H](C)O)' \
      '[C@@H](C)O)C(=O)N[C@@H](CO)C(=O)N[C@@H](CO)C(=O)N[C@@H]([C@@H](C)O)C(=O)N[C@@H](C)C(O)=O'
mol3 = Chem.MolFromSmiles(smi)
mol3= pd.Series({
    'name': 'ATTAMSSTA', 'mol': mol3, 'type': 'non-peptide'
}).to_frame().T

# non-peptide 1
smi = '[H]N[C@@H](CCCNC(N)=N)C(=O)N[C@@H](C)C(O)'
mol4 = Chem.MolFromSmiles(smi)
mol4 = pd.Series({
    'name': 'non-peptide 1', 'mol': mol4, 'type': 'non-peptide'
}).to_frame().T

# non-peptide 2
smi = '[H]N[C@@H](CCCNC(N)=N)CC(=O)N[C@@H](C)C(O)=O'
mol5 = Chem.MolFromSmiles(smi)
mol5 = pd.Series({
    'name': 'non-peptide 2', 'mol': mol5, 'type': 'non-peptide'
}).to_frame().T

structures = pd.concat(
    [
        structures,mol1,mol2,mol3,mol4,mol5
    ],
    axis='index',
    ignore_index=True,
    sort=False,
)


structures['result'] = structures['mol'].apply(lambda mol: is_peptide(
    mol,
    known_amino_acids=df['mol'].to_list(),
    reaction=reaction,
))
structures

In [None]:
import openmm