In [44]:
#Use FFTDock and refinment to dock ligands into AlphaFold models with FAD cofactor
#Inputs:
    #cofactor/pdb_with_fad/tropb.pdb
    #dock/ligands/2.pdb
    #dock/ligands/2.str #CGenFF stream file
    #toppar
    #dock/inp
#Outputs:
    #dock/grid/tropb_fftdock_3.bin
    #dock/poses/tropb_2_fftdock_3
    #dock/poses/tropb_2_prot_0.75
    #dock/scores/
    
import os 
import re
import pandas as pd
import numpy as np
os.chdir('/home/azamh/demo/seq_struct_func/dock')

In [5]:
#Define protein and ligand to dock
charmm_exec = '/home/wyujin/CHARMM7/stable-release/install/test/bin/charmm'
protein = 'tropb'
ligand = '2'
liganddir = './ligands'
faddir = '../cofactor/pdb_with_fad'
fftdock_dockdir = os.path.join('poses', f'{protein}_{ligand}_fftdock_3')
fftdock_eps = 3
griddir = './grid'
scriptdir = './inp'
fftdock_poses = 500
fftdock_scorefile = f'scores/{protein}_{ligand}_fftdock_3.dat'

In [6]:
#Create directory for fftdock poses
os.makedirs(fftdock_dockdir, exist_ok = True)

In [7]:
#Run FFTDock
#Generate FFTDOCK conformers
def fftdock(protein: str,
            ligand: str,
            dockdir: str,
            scorefile: str,
            epsilon: int,
            griddir:str,
            scriptdir:str,
            faddir:str,
            liganddir:str,
            num_poses:int):
    
    #generate fftdock grid with fftdock.inp
    gridfile = f'{griddir}/{protein}_fftdock_{epsilon}.bin'
    if not os.path.isfile(gridfile):
        grid_gen_out = os.popen(f'{charmm_exec} -i {scriptdir}/fftdock.inp makegrid=1 protein={protein} epsilon={epsilon} scriptdir={scriptdir} faddir={faddir} gridfile={gridfile}').read()
        print(grid_gen_out)
        
        if not os.path.isfile(gridfile):
            raise ValueError('Grid Generation Failed! See CHARMM Out')
    
    #generate fftdock conformations
    fftdock_out = os.popen(f'{charmm_exec} -i {scriptdir}/fftdock.inp griddock=1 protein={protein} faddir={faddir} liganddir={liganddir} ' + 
                           f'ligand={ligand} poses={num_poses}  dockdir={dockdir} scorefile={scorefile} epsilon={epsilon} gridfile={gridfile} scriptdir={scriptdir}').read()
    print(fftdock_out)
    
    return

[gollum154][[12321,1],0][../../../../../openmpi-3.1.2/opal/mca/btl/openib/btl_openib_component.c:1671:init_one_device] error obtaining device attributes for mlx5_0 errno says Protocol not supported
--------------------------------------------------------------------------

  Local host:   gollum154
  Local device: mlx5_0
--------------------------------------------------------------------------


 Parameter: GRIDDOCK <- "1"
 Parameter: PROTEIN <- "TROPB"
 Parameter: FADDIR <- "../COFACTOR/PDB_WITH_FAD"
 Parameter: LIGANDDIR <- "./LIGANDS"
 Parameter: LIGAND <- "2"
 Parameter: POSES <- "500"
 Parameter: DOCKDIR <- "POSES/TROPB_2_FFTDOCK_3"
 Parameter: SCOREFILE <- "SCORES/TROPB_2_FFTDOCK_3.DAT"
 Parameter: EPSILON <- "3"
 Parameter: GRIDFILE <- "./GRID/TROPB_FFTDOCK_3.BIN"
 Parameter: SCRIPTDIR <- "./INP"
1
                 Chemistry at HARvard Macromolecular Mechanics
           (CHARMM) - Developmental Version 47b1     August 15, 2022            
                             Git commit ID 83ff627                              
       Copyright(c) 1984-2022  President and Fellows of Harvard College
                              All Rights Reserved
  Current operating system: Linux-3.10.0-1127.el7.x86_64(x86_64)@gollum154.ls   
                 Created on 12/7/22 at 12:22:00 by user: azamh       

            Maximum number of ATOMS:    360720, and RESidues:      120240
 RDTITL> 

In [None]:
fftdock(protein, ligand, fftdock_dockdir, fftdock_scorefile, fftdock_eps, griddir, scriptdir, faddir, liganddir, fftdock_poses)

In [13]:
#Minimize poses in explicit protein
#Minimize fftdock poses in explicit protein
prot_dockdir = os.path.join('poses', f'{protein}_{ligand}_prot_0.75')
prot_eps = 0.75
prot_scorefile = f'scores/{protein}_{ligand}_prot_0.75.dat'
os.makedirs(prot_dockdir, exist_ok = True)

#Minimize fftdock poses in explicit protein
def prot(protein: str,
         ligand: str,
         dockdir: str,
         scorefile: str,
         eps_study: int,
         fftdockdir: str,
         scriptdir:str,
         faddir:str,
         num_poses:int,
         liganddir:str,):
    
    #Minimize on explicit protein 
    prot_min_out = os.popen(f'{charmm_exec} -i {scriptdir}/protmin.inp protein={protein} ligand={ligand} poses={num_poses} ' + 
        f'dockdir={dockdir} scorefile={scorefile} faddir={faddir} liganddir={liganddir} ' + 
        f'fftdockdir={fftdockdir} epsilon={eps_study} scriptdir={scriptdir}').read()
    print(prot_min_out)
    
    return

In [14]:
prot(protein, ligand, prot_dockdir, prot_scorefile, prot_eps, fftdock_dockdir, scriptdir, faddir, fftdock_poses, liganddir)

[gollum154][[12078,1],0][../../../../../openmpi-3.1.2/opal/mca/btl/openib/btl_openib_component.c:1671:init_one_device] error obtaining device attributes for mlx5_0 errno says Protocol not supported
--------------------------------------------------------------------------

  Local host:   gollum154
  Local device: mlx5_0
--------------------------------------------------------------------------
IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [37]:
#Cluster generated poses
#Cluster poses with cluster_pl and return output
def run_clusterpl(protein, ligand, dockdir, radius = 1):
    cmd = f'cluster.pl -kclust -nolsqfit -radius {radius} -selmode heavy {dockdir}/{protein}_{ligand}_*.pdb'
    cluster_out = os.popen(cmd).read()
    return cluster_out

#Read in cluster output and return list of clusters
def read_clusters(protein, ligand, dockdir, cluster_out):
    clusters = []
    cluster = []
    start = False
    
    for line in cluster_out.splitlines():
        if 'cluster t.' in line:
            if cluster:
                clusters.append(cluster)
            cluster = []
            start = True
        elif start:
            index = re.findall('{}/{}_{}_(\S+).pdb'.format(dockdir, protein, ligand), line)[0]
            cluster.append(index)
    if cluster:
        clusters.append(cluster)
    
    #sort clusters by size: largest -> smallest
    clusters.sort(key = len, reverse=True)  
    
    return clusters

#returns dictionary to map each pose from fftdock or minimization to its energy
def read_scores(scorefile):
    index_energies = dict()
    #get index and associated energy for score.dat
    with open(scorefile, 'r') as min_score_reader:
        for line in min_score_reader.read().splitlines():
            index, index_energy = line.split()[0], line.split()[1]
            index_energies[index] = float(index_energy)

    return index_energies

#based on dictionary from read_scores, calculate stats for given cluster
def cluster_stats(cluster, index_energies):
    cluster_dict = dict()
    cluster_dict['cluster'] = cluster
    cluster_dict['size'] = len(cluster)
    energies = []
    min_index = -1
    min_ener = ''
    for index in cluster:
        index_energy = index_energies[index]
        energies.append(float(index_energy))
        if not min_ener or index_energy < min_ener:
            min_ener = round(index_energy,2)
            min_index = index
    cluster_dict['min_ener'] = min_ener
    cluster_dict['min_index'] = min_index
    average = round(np.average(energies),2)
    std = round(np.std(energies),2)
    cluster_dict['average energy'] = average
    cluster_dict['std energy'] = std
    return cluster_dict
    
    
    

In [38]:
cluster_out = run_clusterpl(protein, ligand, prot_dockdir)
index_energies = read_scores(prot_scorefile)
print(cluster_out)

/export/apps/CentOS7/mmtsb/src/feig/toolset//bin/kclust -pdb -centroid -cdist -radius 1 -iterate -maxerr 0.01 -mode rmsd -heavy


# cluster file
# automatically generated on: Wed Dec  7 12:58:49 2022
# mode: rmsd, filetype: pdb, lsqfit: 0, selmode: heavy
@cluster t has 500 elements, 136 subclusters
1 poses/tropb_2_prot_0.75/tropb_2_100.pdb
2 poses/tropb_2_prot_0.75/tropb_2_101.pdb
3 poses/tropb_2_prot_0.75/tropb_2_102.pdb
4 poses/tropb_2_prot_0.75/tropb_2_103.pdb
5 poses/tropb_2_prot_0.75/tropb_2_104.pdb
6 poses/tropb_2_prot_0.75/tropb_2_105.pdb
7 poses/tropb_2_prot_0.75/tropb_2_106.pdb
8 poses/tropb_2_prot_0.75/tropb_2_107.pdb
9 poses/tropb_2_prot_0.75/tropb_2_108.pdb
10 poses/tropb_2_prot_0.75/tropb_2_109.pdb
11 poses/tropb_2_prot_0.75/tropb_2_10.pdb
12 poses/tropb_2_prot_0.75/tropb_2_110.pdb
13 poses/tropb_2_prot_0.75/tropb_2_111.pdb
14 poses/tropb_2_prot_0.75/tropb_2_112.pdb
15 poses/tropb_2_prot_0.75/tropb_2_113.pdb
16 poses/tropb_2_prot_0.75/tropb_2_114.pdb
17 poses/tropb_2_prot_0.75/tropb_2_115.pdb
18 poses/tropb_2_prot_0.75/tropb_2_116.pdb
19 poses/tropb_2_prot_0.75/tropb_2_117.pdb
20 poses/tropb_2_prot_0

In [39]:
clusters = read_clusters(protein, ligand, prot_dockdir, cluster_out)
cluster_dicts = [cluster_stats(cluster, index_energies) for cluster in clusters]

In [46]:
cluster_df = pd.DataFrame.from_records(cluster_dicts)
cluster_df = cluster_df.sort_values(by=['min_ener'])
cluster_df = cluster_df.reset_index(drop=True)
cluster_df.to_excel(f'cluster/{protein}_{ligand}_prot_0.75.xlsx')
cluster_df

Unnamed: 0,cluster,size,min_ener,min_index,average energy,std energy
0,[422],1,-54.22,422,-54.22,0.00
1,"[107, 122, 143, 166, 183, 212, 223, 345, 347, ...",13,-48.71,381,-35.73,9.14
2,"[1, 367, 392, 39, 452, 63]",6,-47.17,367,-29.69,12.23
3,"[179, 275, 297, 396, 460, 475, 477]",7,-45.61,396,-25.63,9.39
4,"[121, 158, 161, 21, 360, 55, 8]",7,-42.97,8,-26.39,7.85
...,...,...,...,...,...,...
131,"[244, 28, 382, 426, 441, 53]",6,5005.82,441,5035.67,23.38
132,"[197, 302, 311]",3,5018.36,197,5053.08,30.07
133,[32],1,5022.10,32,5022.10,0.00
134,"[116, 96]",2,5112.42,116,5114.75,2.33
