## hb-search PyMol integration

### Init

In [1]:
import pandas as pd
import sys
import subprocess
import os
from pymol import cmd, stored
import platform
import pathlib
import xmlrpc.client as xmlrpclib
cmd = xmlrpclib.ServerProxy('http://localhost:9123')

In [2]:
def form4cmd(pdbID, x):
    '''
    Part of lambda function to format dataframe to Pymol compatible form:
    from "A:183:LEU:O" to "/2akr//A/LEU`183/O"
    :param pdbstr: pdb code of handled structure
    :param x: entry within dataframe
    '''
    chain, resID, amiaci, atom = x.split(':')
    ret = f'/{pdbID}//{chain}/{resID}/{atom}'
    return ret

In [3]:
def formID(pdbID, x):
    '''
    Part of lambda function to form distance ID for pymol object naming:
    from "/2akr//A/LEU`183/O" to "A183O"
    :param pdbstr: pdb code of handled structure
    :param x: entry within dataframe
    '''
    pdbstr, chain, resID, atom  = filter(None, x.split('/'))

    ret = f"{chain}{resID}{atom}" 

    return ret

In [4]:
def pymolDisplay(df, pdbID):
    '''
    '''
    # List init
    bondList = []
    dono_sel = []
    acc_sel = []
    
    #Acceptor and donor lists
    acc    = df['ACC'].tolist()
    dono   = df['DONO'].tolist()
    acc_id = df['ACC_ID'].tolist()
    don_id = df['DONO_ID'].tolist()
    
    #cmd.load(f'{pdbID}.pdb')
    # Looping over acc and dono list
    for i in range(len(acc)):
        
        # Display distances within PyMol
        cmd.distance(f"{acc_id[i]}-{don_id[i]}", acc[i] , dono[i])
        bondList.append(f"{acc_id[i]}-{don_id[i]}")
        
        dono_sel.append('/'.join(dono[i].split('/')[0:5]))
        acc_sel.append('/'.join(acc[i].split('/')[0:5]))

        
    cmd.group("HBonds", " ".join(bondList))
    cmd.hide("labels", "HBonds")
    
    cmd.select('all_acceptors', ' + '.join(acc_sel))
    cmd.hide('cartoon', 'all_acceptors')
    cmd.show('sticks', 'all_acceptors')
    cmd.delete('all_acceptors')

    cmd.select('all_donors', ' + '.join(dono_sel))
    cmd.hide('cartoon', 'all_donors')
    cmd.show('sticks', 'all_donors')
    cmd.delete('all_donors')

    return bondList

In [7]:
cmd.reinitialize()
df, bondlist = main('4awn')

NameError: name 'main' is not defined

In [None]:
#cmd.reinitialize()
#df = hbsearch('4awn')

In [5]:
def startHBsearch(pdb_path, hb_file, pse_file, solvent_key, connections ):

    # Setting environment variable
    os.environ['PSE_FILE'] = 'period-table-info.txt'
    
    # Determine operation system
    osys = platform.system()
    
    # Executing hb_search
    #hbs = subprocess.run(os.path.normpath(f"./{osys}/hb-search -hb hb-define.txt {pdbID}.pdb"),
     #                    capture_output=True, shell=True, check=True, text=True).stdout
    hbs = subprocess.run(os.path.normpath(f"./{osys}/hb-search -hb {hb_file} -solv {solvent_key} \
            -con {connections} {pdb_path}"), \
                    capture_output=True, shell=True, check = True, text = True).stdout
    return hbs

In [6]:
def readInHBS(pdbID, hbsfile):
    hbs_columns = [i for i in hbsfile.split('\n')]
    hbs_split = [i.split() for i in hbs_columns]

    HEAD_LST = ['IDENT', 'ACC', 'sep1', 'DONO',
                ':', 'x', 'y', 'z', 'sep2', 'a', 'b']

    df = pd.DataFrame(hbs_split, columns=HEAD_LST)
    df = df[df["IDENT"] == "HBOND"]
    df = df[['ACC', 'DONO']]
    df['ACC'] = df['ACC'].map(lambda x: form4cmd(pdbID, x))
    df['DONO'] = df['DONO'].map(lambda x: form4cmd(pdbID, x))
    
    df['ACC_ID'] = df['ACC'].map(lambda x: formID(pdbID, x))
    df['DONO_ID'] = df['DONO'].map(lambda x: formID(pdbID, x))
    return df

In [9]:
os.path.dirname(os.path.abspath(__file__))

NameError: name '__file__' is not defined

In [7]:
SCRPT_PATH = '/home/juk/Python_WS21-22/hbond/HBonds_Project/hbonds_search_pymol'
def check_PDB(PDBstr:str):
    '''
    '''
    if ( '/' in PDBstr or '\\' in PDBstr ) and '.pdb' in PDBstr:
        print(os.path.exists(PDBstr))

        if not os.path.exists(PDBstr):
            print(f'ERROR: {PDBstr} does not exist!')
            sys.exit()            
        cmd.load(PDBstr)
        PDB_ID = os.path.basename(os.path.normpath(PDBstr)).split('.')[0]
        os.mkdir(PDB_ID)
        cmd.save(os.path.join(SCRPT_PATH, f'{PDB_ID}/{PDBstr}'))

        return PDB_ID, os.path.join(SCRPT_PATH, f'{PDB_ID}/{PDB_ID}.pdb')
    
    if PDBstr in cmd.get_object_list('all'):
        os.mkdir(os.path.join(SCRPT_PATH, PDBstr))
        cmd.save(os.path.join(SCRPT_PATH, f'{PDBstr}/{PDBstr}.pdb'))
        return PDBstr, os.path.join(SCRPT_PATH, f'{PDBstr}/{PDBstr}.pdb')
    
    elif 4 <= len(PDBstr) <= 5 and PDBstr[0].isnumeric():
        
        if cmd.fetch(PDBstr) == -1:
            print(f'ERROR-fetch: unable to fetch "{PDBstr}" ', file=sys.stderr )
            sys.exit()
        os.mkdir(PDBstr)

        cmd.save(os.path.join(SCRPT_PATH, f'{PDBstr}/{PDBstr}.pdb'))
        return PDBstr, os.path.join(SCRPT_PATH, f'{PDBstr}/{PDBstr}.pdb')
    
    else:
        raise(f"ERROR: {PDBstr} is not a valid PDB-ID!")
        sys.exit()

In [8]:
#SCRPT_PATH = os.path.dirname(os.path.abspath(__file__))
SCRPT_PATH = '/home/juk/Python_WS21-22/hbond/HBonds_Project/hbonds_search_pymol'

def main(PDBstr: str,  
         hb_file: str = os.path.join(SCRPT_PATH, "hb-define.txt"), 
         pse_file:str = os.path.join(SCRPT_PATH, "period-table-info.txt"),
         solvent_key:str = "NONE",
         connections: str = "0"):
    '''
    Executing hb_search with set parameters and extract HBOND-entries from output
    :return df_hbond: Dataframe with all HBOND entries from hb_search output 
    '''
    PDB_ID, pdb_path = check_PDB(PDBstr)
    PROJ_DIR = os.path.join(SCRPT_PATH, PDB_ID)
    
    #print(PDB_ID, pdb_path)
    hbs = startHBsearch(pdb_path, hb_file, pse_file, solvent_key, connections )
    df = readInHBS(PDB_ID, hbs)

    bondlist = pymolDisplay(df, PDB_ID)

    return df, bondlist, hbs, pdb_path

In [9]:
os.chdir(SCRPT_PATH)

In [11]:
#cmd.fetch('4awn')
cmd.reinitialize()
df, bondlist, hbs, pdb_path = main('./2akr.pdb')

True


In [22]:
for i in df.values.tolist():
    print(i)

['/2akr//A/7/OD1', '/2akr//A/1165/O', 'A7OD1', 'A1165O']
['/2akr//A/8/N', '/2akr//A/106/O', 'A8N', 'A106O']
['/2akr//A/8/OH', '/2akr//A/183/O', 'A8OH', 'A183O']
['/2akr//A/9/N', '/2akr//A/1038/O', 'A9N', 'A1038O']
['/2akr//A/11/NE', '/2akr//A/1036/O', 'A11NE', 'A1036O']
['/2akr//A/11/NH1', '/2akr//A/1008/O', 'A11NH1', 'A1008O']
['/2akr//A/11/NH1', '/2akr//A/1025/O', 'A11NH1', 'A1025O']
['/2akr//A/11/NH2', '/2akr//A/1025/O', 'A11NH2', 'A1025O']
['/2akr//A/11/NH2', '/2akr//A/1036/O', 'A11NH2', 'A1036O']
['/2akr//A/14/NE2', '/2akr//A/16/OG', 'A14NE2', 'A16OG']
['/2akr//A/14/OE1', '/2akr//A/73/OH', 'A14OE1', 'A73OH']
['/2akr//A/16/N', '/2akr//A/1093/O', 'A16N', 'A1093O']
['/2akr//A/16/OG', '/2akr//A/14/NE2', 'A16OG', 'A14NE2']
['/2akr//A/16/OG', '/2akr//A/26/OG1', 'A16OG', 'A26OG1']
['/2akr//A/17/OG', '/2akr//A/97/OE1', 'A17OG', 'A97OE1']
['/2akr//A/17/OG', '/2akr//A/1107/O', 'A17OG', 'A1107O']
['/2akr//A/19/N', '/2akr//A/23/O', 'A19N', 'A23O']
['/2akr//A/20/N', '/2akr//A/23/O', 'A20N', 'A

In [14]:
PROJ_DIR = '/home/juk/Python_WS21-22/hbond/HBonds_Project/hbonds_search_pymol/2akr'

In [15]:
os.chdir(PROJ_DIR)

In [16]:
os.chdir(SCRPT_PATH)

In [17]:
os.getcwd()
with open(os.path.join(PROJ_DIR, 'hbs_out.hb'), 'w') as f:
    f.write(hbs)


In [18]:
os.chdir(PROJ_DIR)

In [19]:
os.path.normpath(f'../{osys}/hb-network ./hbs_out.hb ')

NameError: name 'osys' is not defined

In [20]:
## FUNCTIONIZE

# Determine operation system
osys = platform.system()

# Executing hb_search
#hbs = subprocess.run(os.path.normpath(f"./{osys}/hb-search -hb hb-define.txt {pdbID}.pdb"),
 #                    capture_output=True, shell=True, check=True, text=True).stdout
    
output_string = subprocess.run(os.path.normpath(f'../{osys}/hb-network ./hbs_out.hb '),  capture_output=True, shell=True,\
    check = True, text = True)


In [21]:
os.chdir('CLUSTER')

In [22]:
def readInCLUS(pdbID, cl_file):
    clu_columns = [i for i in cl_file.split('\n')]
    clu_split = [i.split() for i in clu_columns]
    
    HEAD_LST = ['IDENT', 'ACC', 'sep1', 'DONO']

    df = pd.DataFrame(clu_split, columns=HEAD_LST)
    df = df[df["IDENT"] == "HBOND"]
    df = df[['ACC', 'DONO']]
    df['ACC'] = df['ACC'].map(lambda x: form4cmd(pdbID, x))
    df['DONO'] = df['DONO'].map(lambda x: form4cmd(pdbID, x))
    df['ACC_ID'] = df['ACC'].map(lambda x: formID(pdbID, x))
    df['DONO_ID'] = df['DONO'].map(lambda x: formID(pdbID, x))    
    print(df)
    return df

In [31]:
df_clus

NameError: name 'df_clus' is not defined

In [23]:
df_clus = {}
k = 0
for c, cl_file in enumerate(os.listdir()):
    if not os.path.getsize(cl_file):
        continue
    
    with open(cl_file, 'r') as cl_str:
        df_clus[k] = readInCLUS('2akr', cl_str.read())
    k+=1

               ACC             DONO  ACC_ID DONO_ID
0  /2akr//A/188/NZ  /2akr//A/1172/O  A188NZ  A1172O
1  /2akr//A/1172/O  /2akr//A/188/NZ  A1172O  A188NZ
                ACC              DONO   ACC_ID  DONO_ID
0  /2akr//A/127/NH1  /2akr//A/134/OE1  A127NH1  A134OE1
1  /2akr//A/134/OE1  /2akr//A/127/NH1  A134OE1  A127NH1
                ACC              DONO   ACC_ID  DONO_ID
0  /2akr//A/121/NE2   /2akr//A/1187/O  A121NE2   A1187O
1   /2akr//A/1094/O   /2akr//A/1187/O   A1094O   A1187O
2   /2akr//A/1187/O   /2akr//A/1094/O   A1187O   A1094O
3   /2akr//A/1187/O  /2akr//A/121/NE2   A1187O  A121NE2
               ACC             DONO  ACC_ID DONO_ID
0  /2akr//C/25/NH1  /2akr//C/1084/O  C25NH1  C1084O
1  /2akr//C/1084/O   /2akr//D/170/O  C1084O   D170O
2  /2akr//C/1084/O  /2akr//C/25/NH1  C1084O  C25NH1
3   /2akr//D/170/O  /2akr//C/1084/O   D170O  C1084O
                ACC              DONO   ACC_ID  DONO_ID
0   /2akr//A/264/NE   /2akr//A/1075/O   A264NE   A1075O
1  /2akr//A/264/NH2   /2

               ACC             DONO  ACC_ID DONO_ID
0  /2akr//A/20/OD1   /2akr//A/22/OG  A20OD1   A22OG
1   /2akr//A/22/OG  /2akr//A/20/OD1   A22OG  A20OD1
               ACC             DONO  ACC_ID DONO_ID
0   /2akr//C/59/OG  /2akr//C/1171/O   C59OG  C1171O
1   /2akr//C/59/OG  /2akr//C/1039/O   C59OG  C1039O
2  /2akr//C/1039/O  /2akr//C/1215/O  C1039O  C1215O
3  /2akr//C/1039/O   /2akr//C/59/OG  C1039O   C59OG
4  /2akr//C/1171/O  /2akr//C/1215/O  C1171O  C1215O
5  /2akr//C/1171/O   /2akr//C/59/OG  C1171O   C59OG
6  /2akr//C/1215/O  /2akr//C/1171/O  C1215O  C1171O
7  /2akr//C/1215/O  /2akr//C/1039/O  C1215O  C1039O
               ACC             DONO  ACC_ID DONO_ID
0  /2akr//A/104/SG  /2akr//A/168/SG  A104SG  A168SG
1  /2akr//A/168/SG  /2akr//A/104/SG  A168SG  A104SG
                ACC              DONO   ACC_ID  DONO_ID
0  /2akr//C/105/OE1   /2akr//C/107/OH  C105OE1   C107OH
1   /2akr//C/107/OH  /2akr//C/105/OE1   C107OH  C105OE1
                ACC              DONO   ACC_ID  DONO

              ACC            DONO ACC_ID DONO_ID
0  /2akr//D/142/O  /2akr//D/200/O  D142O   D200O
1  /2akr//D/200/O  /2akr//D/142/O  D200O   D142O
               ACC             DONO  ACC_ID DONO_ID
0  /2akr//A/37/OG1  /2akr//A/38/ND1  A37OG1  A38ND1
1  /2akr//A/38/ND1  /2akr//A/37/OG1  A38ND1  A37OG1
                ACC             DONO  ACC_ID DONO_ID
0   /2akr//C/14/NE2   /2akr//C/16/OG  C14NE2   C16OG
1    /2akr//C/16/OG  /2akr//C/26/OG1   C16OG  C26OG1
2    /2akr//C/16/OG  /2akr//C/14/NE2   C16OG  C14NE2
3    /2akr//C/21/NE  /2akr//C/92/OE1   C21NE  C92OE1
4   /2akr//C/21/NH2  /2akr//C/92/OE2  C21NH2  C92OE2
..              ...              ...     ...     ...
85  /2akr//C/1230/O  /2akr//C/1089/O  C1230O  C1089O
86  /2akr//C/1243/O  /2akr//C/1023/O  C1243O  C1023O
87  /2akr//C/1246/O  /2akr//C/1230/O  C1246O  C1230O
88  /2akr//C/1246/O  /2akr//C/1167/O  C1246O  C1167O
89  /2akr//C/1252/O  /2akr//C/92/OE2  C1252O  C92OE2

[90 rows x 4 columns]
              ACC            DONO ACC_

               ACC             DONO  ACC_ID DONO_ID
0   /2akr//A/44/OG  /2akr//A/1180/O   A44OG  A1180O
1  /2akr//A/1180/O   /2akr//A/44/OG  A1180O   A44OG
                ACC             DONO  ACC_ID DONO_ID
0   /2akr//A/1004/O   /2akr//B/151/O  A1004O   B151O
1   /2akr//A/1004/O  /2akr//A/1142/O  A1004O  A1142O
2   /2akr//A/1142/O   /2akr//B/163/O  A1142O   B163O
3   /2akr//A/1142/O   /2akr//B/151/O  A1142O   B151O
4   /2akr//A/1142/O  /2akr//A/1004/O  A1142O  A1004O
5    /2akr//B/8/NE2   /2akr//B/163/O   B8NE2   B163O
6    /2akr//B/151/O   /2akr//B/163/O   B151O   B163O
7    /2akr//B/151/O  /2akr//A/1142/O   B151O  A1142O
8    /2akr//B/151/O  /2akr//A/1004/O   B151O  A1004O
9    /2akr//B/163/O   /2akr//B/151/O   B163O   B151O
10   /2akr//B/163/O   /2akr//B/8/NE2   B163O   B8NE2
11   /2akr//B/163/O  /2akr//A/1142/O   B163O  A1142O
               ACC             DONO  ACC_ID DONO_ID
0  /2akr//D/16/OE2   /2akr//D/179/O  D16OE2   D179O
1   /2akr//D/179/O  /2akr//D/16/OE2   D179O  D16OE2

In [266]:
hbs

'# HB parameter \n#\t hb-distance <= R_VdW(1) + R_VdW(2) +    1.000\n#\t   80.0 <= alpha <=  150.0 \n#\t   80.0 <= beta  <=  150.0 \n# Number of Donors:     2098\n# Number of Acceptors:  2212\nDONOR       A:7:ASN:N          A:7:ASN:CA         1 2 \nDONOR       A:7:ASN:ND2        A:7:ASN:CG         7 6 \nDONOR       A:7:ASN:OD1        A:7:ASN:CG         8 6 \nDONOR       A:8:TYR:N          A:8:TYR:CA         9 10 \nDONOR       A:8:TYR:OH         A:8:TYR:CZ         20 19 \nDONOR       A:9:THR:N          A:9:THR:CA         21 22 \nDONOR       A:9:THR:OG1        A:9:THR:CB         27 25 \nDONOR       A:10:PHE:N         A:10:PHE:CA        28 29 \nDONOR       A:11:ARG:N         A:11:ARG:CA        39 40 \nDONOR       A:11:ARG:NE        A:11:ARG:CD        46 45 \nDONOR       A:11:ARG:NH1       A:11:ARG:CZ        48 47 \nDONOR       A:11:ARG:NH2       A:11:ARG:CZ        49 47 \nDONOR       A:12:CYS:N         A:12:CYS:CA        50 51 \nDONOR       A:12:CYS:SG        A:12:CYS:CB        55 54 \nDO

In [33]:
df_clus[0]

Unnamed: 0,ACC,DONO,ACC_ID,DONO_ID
0,/2akr//A/188/NZ,/2akr//A/1172/O,A188NZ,A1172O
1,/2akr//A/1172/O,/2akr//A/188/NZ,A1172O,A188NZ


In [24]:
pdb_id= '2akr'

In [25]:
ALL_CLUS = []
HBB_LST = []
for c, i in enumerate(df_clus):
    CL_LST = []
    acc = []
    dono = []
    acc_id = []
    dono_id = []
    acc_sel = []
    don_sel = []
    bondList = []

    acc = df_clus[c]['ACC'].tolist()
    dono = df_clus[c]['DONO'].tolist()
    acc_id = df_clus[c]['ACC_ID'].tolist()
    don_id = df_clus[c]['DONO_ID'].tolist()
    
    acc_aa = ['/'.join(i.split('/')[:-1]) for i in acc]
    don_aa = ['/'.join(i.split('/')[:-1]) for i in dono]
    
    cmd.select('atms', ' + '.join(acc_aa+don_aa) )
    cmd.create(f'ATM_{c}', 'atms', 0, 0)
    
    for i in range(len(acc)):
        print(f"{acc_id[i]}-{don_id[i]}" )
        cmd.copy(f"{acc_id[i]}-{don_id[i]}_COP", f"{acc_id[i]}-{don_id[i]}")
        bondList.append(f"{acc_id[i]}-{don_id[i]}_COP")

        don_sel.append('/'.join(dono[i].split('/')[0:5]))
        acc_sel.append('/'.join(acc[i].split('/')[0:5]))    
        

    cmd.group(f"HBB_{c}", " ".join(bondList))
    cmd.hide("labels", f"HBB_{c}")
    
#     cmd.select('all_acceptors', ' + '.join(acc_sel))
#     cmd.hide('cartoon', 'all_acceptors')
#     cmd.show('sticks', 'all_acceptors')
#     cmd.delete('all_acceptors')

#     cmd.select('all_donors', ' + '.join(don_sel))
#     cmd.hide('cartoon', 'all_donors')
#     cmd.show('sticks', 'all_donors')
#     cmd.delete('all_donors')        
#     cmd.show('sticks', f'Cluster_{c}')

    CL_LST.append(f'ATM_{c}')
    CL_LST.append(f'HBB_{c}')
    
    cmd.group(f'CLUSTER_{c}', " ".join(CL_LST))
    ALL_CLUS.append(f'CLUSTER_{c}')
    
cmd.group(f'{pdb_id}_CLUST', " ".join(ALL_CLUS))







A188NZ-A1172O
A1172O-A188NZ
A127NH1-A134OE1
A134OE1-A127NH1
A121NE2-A1187O
A1094O-A1187O
A1187O-A1094O
A1187O-A121NE2
C25NH1-C1084O
C1084O-D170O
C1084O-C25NH1
D170O-C1084O
A264NE-A1075O
A264NH2-A1075O
A278OH-A1113O
A1075O-A1113O
A1075O-A264NH2
A1075O-A264NE
A1113O-A1075O
A1113O-A278OH
C226OD1-C1264O
C264NE-C1169O
C264NH2-C1264O
C264NH2-C1169O
C278OH-C1202O
C278OH-C1191O
C278OH-C1061O
C1061O-C1191O
C1061O-C278OH
C1169O-C1202O
C1169O-C1191O
C1169O-C264NH2
C1169O-C264NE
C1191O-C1202O
C1191O-C1169O
C1191O-C1061O
C1191O-C278OH
C1202O-C1191O
C1202O-C1169O
C1202O-C278OH
C1264O-C264NH2
C1264O-C226OD1
C17OG-D34NE2
C17OG-C97OE1
C97OE1-C1020O
C97OE1-C17OG
C1020O-C97OE1
D34NE2-C17OG
A153OD1-A1238O
A1238O-A153OD1
C267ND1-C269OG
C269OG-C267ND1
A121OE1-B103O
B31NE2-B103O
B103O-B31NE2
B103O-A121OE1
A236OD1-A1096O
A1096O-A1194O
A1096O-A236OD1
A1194O-A1096O
C28OG-C1008O
C40NE1-C74NH1
C74NH1-C1003O
C74NH1-C40NE1
C1003O-C1008O
C1003O-C74NH1
C1008O-C1003O
C1008O-C28OG
C76OG-C1272O
C76OG-C1196O
C76OG-C79NH1

B98OD2-B110O
B110O-B208O
B110O-B139O
B110O-B98OD2
B110O-B98OD1
B119O-B123O
B119O-B98OD2
B119O-A194OG
B123O-B119O
B123O-B96OD2
B123O-A1027O
B139O-B110O
B139O-B98OD2
B139O-B98OD1
B139O-A1031O
B192O-B98OD2
B208O-B110O
B208O-B98OD1
C195OG-C204NH1
C204NH1-C1194O
C204NH1-C258OE2
C204NH1-C195OG
C204NH2-C1194O
C258OE2-C204NH1
C1194O-C204NH2
C1194O-C204NH1
A59OG-A62NE2
A62NE2-A1205O
A62NE2-A59OG
A1205O-A62NE2
A131OG1-A1183O
A1086O-A1229O
A1086O-A1195O
A1086O-A1185O
A1086O-A1154O
A1154O-A1229O
A1154O-A1183O
A1154O-A1086O
A1183O-A1154O
A1183O-A131OG1
A1185O-A1213O
A1185O-A1195O
A1185O-A1086O
A1195O-A1229O
A1195O-A1224O
A1195O-A1213O
A1195O-A1185O
A1195O-A1086O
A1213O-A1195O
A1213O-A1185O
A1224O-A1229O
A1224O-A1195O
A1229O-A1224O
A1229O-A1195O
A1229O-A1154O
A1229O-A1086O
C1109O-C1150O
C1150O-C1109O
C82OE1-C1217O
C83OE2-C1217O
C1217O-C83OE2
C1217O-C82OE1
B50OE2-B226O
B50OE2-B209O
B50OE2-B174O
B174O-B209O
B174O-B50OE2
B209O-B226O
B209O-B174O
B209O-B50OE2
B226O-B209O
B226O-B50OE2
C180NZ-C1174O
C1174O

A1116O-A1176O
A1116O-A1063O
A1129O-A1215O
A1129O-A1178O
A1139O-B188O
A1139O-B136O
A1139O-A1088O
A1139O-A1065O
A1139O-A1029O
A1141O-A1159O
A1141O-A1011O
A1141O-A214OH
A1144O-C1149O
A1144O-C60ND2
A1144O-A1215O
A1144O-A234NH1
A1156O-A1091O
A1156O-A1020O
A1159O-A1141O
A1159O-A1011O
A1159O-A34OD1
A1171O-A1036O
A1171O-A105OE2
A1176O-A1116O
A1178O-A1232O
A1178O-A1129O
A1178O-A1017O
A1190O-A1232O
A1190O-A1017O
A1190O-A216NZ
A1199O-C1134O
A1199O-A1063O
A1209O-B172O
A1209O-B116O
A1209O-A1074O
A1209O-A1036O
A1215O-A1144O
A1215O-A1129O
A1220O-C1134O
A1220O-A1082O
A1220O-A1063O
A1220O-A1009O
A1232O-A1190O
A1232O-A1178O
B8OE1-B131O
B8OE1-B117O
B26OH-B178O
B26OH-B134O
B28OG1-B144O
B28OG1-B117O
B29NE2-B144O
B29NE2-B109O
B29NE2-B61OG
B52OG-B155O
B52OG-B130O
B53OD2-A1080O
B53OD2-A39NH1
B53OD2-A36OE1
B53OD2-A36NE2
B55OG-B149O
B55OG-A31NE1
B57OG-B204O
B57OG-B109O
B59OD1-B61OG
B59OD2-B61OG
B60NE1-B175O
B60NE1-B172O
B61OG-B109O
B61OG-B59OD2
B61OG-B59OD1
B61OG-B29NE2
B63OH-B149O
B63OH-B136O
B109O-B61OG
B109O

C245NE1-C34OD2
C1004O-C1241O
C1004O-C1161O
C1004O-C34OD2
C1012O-D115O
C1012O-C1146O
C1025O-C1256O
C1026O-D188O
C1026O-D155O
C1026O-C1188O
C1026O-C1175O
C1026O-C1078O
C1030O-D149O
C1030O-C1216O
C1030O-C1112O
C1035O-C1164O
C1035O-C1099O
C1035O-C243OE1
C1035O-C36OE1
C1067O-C1163O
C1067O-C216NZ
C1069O-C1188O
C1069O-C1143O
C1073O-D175O
C1073O-D130O
C1073O-C1256O
C1078O-D188O
C1078O-D104O
C1078O-C1188O
C1078O-C1026O
C1078O-C101OG
C1099O-D136O
C1099O-D53OD2
C1099O-C1035O
C1099O-C36OE1
C1100O-C1216O
C1110O-D154O
C1110O-C1247O
C1110O-C1163O
C1110O-C1160O
C1112O-D197O
C1112O-C1030O
C1113O-C1247O
C1113O-C1234O
C1113O-C1160O
C1143O-C1175O
C1143O-C1069O
C1143O-C117NE2
C1146O-D164O
C1146O-C1012O
C1146O-C27OD2
C1160O-C1247O
C1160O-C1113O
C1160O-C1110O
C1160O-C245NE1
C1161O-C1241O
C1161O-C1004O
C1163O-C1110O
C1163O-C1067O
C1164O-D151O
C1164O-C1234O
C1164O-C1168O
C1164O-C1035O
C1168O-D154O
C1168O-D136O
C1168O-D52OG
C1168O-C1164O
C1175O-C1188O
C1175O-C1143O
C1175O-C1026O
C1175O-C101OG
C1188O-C1175O
C118

In [252]:
i = 0
print(f"{acc_id[i]}-{don_id[i]}")
cmd.copy(f"{acc_id[i]}-{don_id[i]}_COPP", 'A7OD1-A1165O')


A188NZ-A1172O


In [247]:
cmd.copy('A7OD1-A1165O_copp',  'A7OD1-A1165O')

In [None]:
def pymolDisplay(df, pdbID):
    '''
    '''
    # List init
    bondList = []
    dono_sel = []
    acc_sel = []
    
    #Acceptor and donor lists
    acc    = df['ACC'].tolist()
    dono   = df['DONO'].tolist()
    acc_id = df['ACC_ID'].tolist()
    don_id = df['DONO_ID'].tolist()
    
    #cmd.load(f'{pdbID}.pdb')
    # Looping over acc and dono list
    for i in range(len(acc)):
        
        # Display distances within PyMol
        cmd.distance(f"{acc_id[i]}-{don_id[i]}", acc[i] , dono[i])
        bondList.append(f"{acc_id[i]}-{don_id[i]}")
        
        dono_sel.append('/'.join(dono[i].split('/')[0:5]))
        acc_sel.append('/'.join(acc[i].split('/')[0:5]))

        
    cmd.group("HBonds", " ".join(bondList))
    cmd.hide("labels", "HBonds")
    
    cmd.select('all_acceptors', ' + '.join(acc_sel))
    cmd.hide('cartoon', 'all_acceptors')
    cmd.show('sticks', 'all_acceptors')
    cmd.delete('all_acceptors')

    cmd.select('all_donors', ' + '.join(dono_sel))
    cmd.hide('cartoon', 'all_donors')
    cmd.show('sticks', 'all_donors')
    cmd.delete('all_donors')

    return bondList

In [172]:
acc_sel = []
don_sel = []

In [173]:
acc_sel = df_clus[78]['ACC'].tolist()
don_sel = df_clus[78]['DONO'].tolist()

In [174]:
cmd.select('clus1', ' + '.join(acc_sel+don_sel) )
#cmd.select('clus2', ' + '.join(don_sel) )
#cmd.create('CLUS', 'clus')
#cmd.group("HBonds", " ".join(bondList))
#cmd.show('spheres','clus')
#cmd.select('all_don', ' + '.join(don_sel))
#cmd.show('spheres','all_don')


17

In [137]:
cmd.select('clus2', ' + '.join(don_sel) )


17

In [165]:
cmd.create('CLUS', 'clus', 0, 0)


In [123]:
cmd.hide('spheres','clus')


1

In [67]:
for i in df_clus:
    print(df_clus[i])

               ACC             DONO
0  /2akr//A/188/NZ  /2akr//A/1172/O
1  /2akr//A/1172/O  /2akr//A/188/NZ
                ACC              DONO
0  /2akr//A/127/NH1  /2akr//A/134/OE1
1  /2akr//A/134/OE1  /2akr//A/127/NH1
                ACC              DONO
0  /2akr//A/121/NE2   /2akr//A/1187/O
1   /2akr//A/1094/O   /2akr//A/1187/O
2   /2akr//A/1187/O   /2akr//A/1094/O
3   /2akr//A/1187/O  /2akr//A/121/NE2
               ACC             DONO
0  /2akr//C/25/NH1  /2akr//C/1084/O
1  /2akr//C/1084/O   /2akr//D/170/O
2  /2akr//C/1084/O  /2akr//C/25/NH1
3   /2akr//D/170/O  /2akr//C/1084/O
                ACC              DONO
0   /2akr//A/264/NE   /2akr//A/1075/O
1  /2akr//A/264/NH2   /2akr//A/1075/O
2   /2akr//A/278/OH   /2akr//A/1113/O
3   /2akr//A/1075/O   /2akr//A/1113/O
4   /2akr//A/1075/O  /2akr//A/264/NH2
5   /2akr//A/1075/O   /2akr//A/264/NE
6   /2akr//A/1113/O   /2akr//A/1075/O
7   /2akr//A/1113/O   /2akr//A/278/OH
                 ACC              DONO
0   /2akr//C/226/OD1   /2ak

In [None]:
for i in os.listdir('./CLUSTER'):
    os.remove(f'./CLUSTER/{i}' )
os.rmdir('CLUSTER')

In [None]:
def test(teststr:str='asd', secstr='zxc'):
    params = {
        'teststr' : teststr,
        'secstr' : secstr
    }
    print(params.secstr)

In [None]:
test()

# ToDO

1. Biopython check if PDB exists

wir erstellen einen befehl hb network initialize CLUSTER hb out etc abspeichern. Am ende hb_in(ATOM)