-
Notifications
You must be signed in to change notification settings - Fork 144
/
ligand_refinement_from_command_line.py
169 lines (141 loc) · 7.83 KB
/
ligand_refinement_from_command_line.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import optparse
import os
import pyrosetta
def sample_ligand_interface(pdb_filename,
partners,
ligand_params=[""],
jobs=1,
job_output="ligand_output"):
"""
Performs ligand-protein docking using Rosetta fullatom docking
(DockingHighRes) on the ligand-protein complex in <pdb_filename>
using the relative chain <partners>. If the ligand parameters
(a .params file) are not defaultly loaded into PyRosetta,
<ligand_params> must supply the list of files including the ligand
parameters. <jobs> trajectories are performed with output
structures named <job_output>_(job#).pdb.
Note: Global docking, a problem solved by the Rosetta DockingProtocol,
requires interface detection and refinement as with other protocols,
these tasks are split into centroid (interface detection) and
high-resolution (interface refinement) methods without a centroid
representation, low-resolution ligand-protein prediction is not
possible and as such, only the high-resolution ligand-protein
interface refinement is available. If you add a perturbation or
randomization step, the high-resolution stages may fail. A perturbation
step CAN make this a global docking algorithm however the rigid-body
sampling preceding refinement requires extensive sampling to produce
accurate results and this algorithm spends most of its effort in
refinement (which may be useless for the predicted interface).
This script performs ligand-protein interface structure prediction but does NOT
perform global ligand-protein docking. Since there is no generic interface
detection, the input PDB file must have the ligand placed near the interface
that will be refined. If the DockMCMProtocol is applied to a pose
without placement near the interface, then the refinement may:
-waste steps sampling the wrong interface
-fail by predicting an incorrect interface very far from the true interface
-fail by separating the ligand from the protein (usually due to a clash)
DockMCMProtocol does not require an independent randomization or perturbation
step to "seed" its prediction.
Additional refinement steps may increase the accuracy of the predicted
conformation (see refinement.py). Drastic moves (large conformational changes)
should be avoided; if they precede the protocol, the problems above may occur,
if they succeed the protocol, the protocol results may be lost.
"""
# Declare working directory and output directory
working_dir = os.getcwd()
output_dir = "outputs"
if not os.path.exists(output_dir):
os.mkdir(output_dir)
# Initialize PyRosetta
pyrosetta.init()
# Create an empty pose from the desired PDB file
pose = pyrosetta.rosetta.core.pose.Pose()
# If the params list has contents, load .params files
# Note: this method of adding ligands to the ResidueTypeSet is unnecessary
# if you call pyrosetta.init("-extra_res_fa {}".format(ligand_params))
if len(ligand_params) != 0 and ligand_params[0] != "":
ligand_params = pyrosetta.Vector1(ligand_params)
res_set = pose.conformation().modifiable_residue_type_set_for_conf()
res_set.read_files_for_base_residue_types(ligand_params)
pose.conformation().reset_residue_type_set_for_conf(res_set)
# Load pdb_filename into pose
pyrosetta.io.pose_from_file(pose, pdb_filename)
# Setup the docking FoldTree
# the method setup_foldtree takes an input pose and sets its
# FoldTree to have jump 1 represent the relation between the two docking
# partners, the jump points are the residues closest to the centers of
# geometry for each partner with a cutpoint at the end of the chain,
# the second argument is a string specifying the relative chain orientation
# such as "A_B" of "LH_A", ONLY TWO BODY DOCKING is supported and the
# partners MUST have different chain IDs and be in the same pose (the
# same PDB), additional chains can be grouped with one of the partners,
# the "_" character specifies which bodies are separated
# the third argument...is currently unsupported but must be set (it is
# supposed to specify which jumps are movable, to support multibody
# docking...but Rosetta doesn't currently)
# the FoldTrees setup by this method are for TWO BODY docking ONLY!
dock_jump = 1 # jump number 1 is the inter-body jump
pyrosetta.rosetta.protocols.docking.setup_foldtree(pose,
partners,
pyrosetta.Vector1([dock_jump]))
# Create a copy of the pose for testing
test_pose = pose.clone()
# Create ScoreFunctions for centroid and fullatom docking
scorefxn = pyrosetta.create_score_function("ligand")
# Setup the high resolution (fullatom) docking protocol using DockMCMProtocol.
docking = pyrosetta.rosetta.protocols.docking.DockMCMProtocol()
# Many of its options and settings can be set using the setter methods.
docking.set_scorefxn(scorefxn)
# Change directory temporarily for output
os.chdir(output_dir)
# Setup the PyJobDistributor
jd = pyrosetta.toolbox.py_jobdistributor.PyJobDistributor(job_output,
jobs, scorefxn,
compress=False)
# Set the native pose so that the output scorefile contains the pose rmsd metric
jd.native_pose = pose
# Optional: setup a PyMOLObserver
# pyrosetta.rosetta.protocols.moves.AddPyMOLObserver(test_pose, True)
# Perform protein-ligand docking
# counter = 0 # for pretty output to PyMOLObserver
while not jd.job_complete:
test_pose = pose.clone() # Reset test pose to original structure
# counter += 1 # Change the pose name, for pretty output to PyMOLObserver
# test_pose.pdb_info().name(job_output + '_' + str(counter))
docking.apply(test_pose) # Perform docking and output to PyMOL
# Write the decoy structure to disc
jd.output_decoy(test_pose)
os.chdir(working_dir)
if __name__ == "__main__":
# Declare parser object for managing input options
parser = optparse.OptionParser()
parser.add_option("--pdb_filename",
dest="pdb_filename",
help="The PDB file containing the ligand and protein to dock.")
parser.add_option("--partners",
dest="partners",
default = "A_X",
help="The relative chain partners for docking.")
parser.add_option("--ligand_params",
dest="ligand_params",
help="The ligand residue parameter file.")
parser.add_option("--jobs",
dest="jobs",
default="1",
help="The number of jobs (trajectories) to perform.")
parser.add_option("--job_output",
dest="job_output",
default = "ligand_output",
help="The name preceding all output, output PDB files and scorefile.")
(options, args) = parser.parse_args()
# Catch input erros
if not options.pdb_filename:
parser.error("pdb_filename not given!")
if not options.ligand_params:
parser.error("ligand_params not given!")
# Run ligand refinement protocol
sample_ligand_interface(pdb_filename=options.pdb_filename,
partners=options.partners,
ligand_params=options.ligand_params.split(","),
jobs=int(options.jobs),
job_output=options.job_output)