## PyRosettaCluster: A Framework For Reproducible Computational Protein Design

This Jupyter Lab example generates a decoy using PyRosettaCluster, then reproduces the simulation to generate an identical copy of the decoy.

### 1. Import packages

In [1]:
import bz2
import glob
import json
import logging
logging.basicConfig(level=logging.INFO)
import os
import pyrosetta
import pyrosetta.distributed.io as io
import pyrosetta.distributed.packed_pose as packed_pose
import pyrosetta.distributed.tasks.rosetta_scripts as rosetta_scripts
import pyrosetta.distributed.tasks.score as score
import pyrosetta.distributed.viewer as viewer
import random
import tempfile

from pyrosettacluster import PyRosettaCluster, get_instance_kwargs

Inject client code here, then run the cell:

In [15]:
from dask.distributed import Client

client = Client("tcp://127.0.0.1:46491")
client

OSError: Timed out trying to connect to 'tcp://127.0.0.1:46491' after 10 s: Timed out trying to connect to 'tcp://127.0.0.1:46491' after 10 s: in <distributed.comm.tcp.TCPConnector object at 0x14db968d0>: ConnectionRefusedError: [Errno 61] Connection refused

### 1. Define the user-provided protocol:

### 2. Define the user-provided input pdb:

### 3. Execute protocol

In [11]:


def protocol1(packed_pose, **kwargs):
    """
    Repack the input `PackedPose` object.
    
    Args:
        packed_pose: an input `PackedPose` object.
        **kwargs: PyRosettaCluster keyword arguments.

    Returns:
        A `Pose` object.
    """
    import pyrosetta
    import pyrosetta.distributed.io as io
    from pyrosetta.rosetta.protocols.minimization_packing import (
        PackRotamersMover,
    )

    input_protocol = """
        <ROSETTASCRIPTS>
          <TASKOPERATIONS>
            <RestrictToRepacking name="only_pack"/>
          </TASKOPERATIONS>

          <MOVERS>
            <PackRotamersMover name="pack" task_operations="only_pack" />
          </MOVERS>

          <PROTOCOLS>
            <Add mover="pack"/>
          </PROTOCOLS>
        </ROSETTASCRIPTS>
        """
    relax = rosetta_scripts.SingleoutputRosettaScriptsTask(input_protocol)
    # Syntax check via setup
    relax.setup()

    
    
    pose = io.to_pose(packed_pose)
    ppose = relax(ppose)
    
#     pack_rotamers = PackRotamersMover(
#         scorefxn=pyrosetta.create_score_function("ref2015.wts"),
#         task=pyrosetta.standard_packer_task(pose),
#         nloop=10,
#     )
#     pack_rotamers.apply(pose)
#     dummy_pose_1 = io.to_pose(io.pose_from_sequence("W" * 6))
#     dummy_pose_2 = io.to_pose(io.pose_from_sequence("F" * 6))

    return ppose #pose, dummy_pose_1, dummy_pose_2

In [12]:
# def my_first_protocol(packed_pose, **kwargs):
#     """
#     Repack the input `PackedPose` object.
    
#     Args:
#         packed_pose: an input `PackedPose` object.
#         **kwargs: PyRosettaCluster keyword arguments.

#     Returns:
#         Three `Pose` objects.
#     """
#     import pyrosetta
#     import pyrosetta.distributed.io as io
#     from pyrosetta.rosetta.protocols.minimization_packing import (
#         PackRotamersMover,
#     )

#     pose = io.to_pose(packed_pose)
#     pack_rotamers = PackRotamersMover(
#         scorefxn=pyrosetta.create_score_function("ref2015.wts"),
#         task=pyrosetta.standard_packer_task(pose),
#         nloop=10,
#     )
#     pack_rotamers.apply(pose)
#     dummy_pose_1 = io.to_pose(io.pose_from_sequence("W" * 6))
#     dummy_pose_2 = io.to_pose(io.pose_from_sequence("F" * 6))

#     return pose, dummy_pose_1, dummy_pose_2

# def my_second_protocol(packed_pose, **kwargs):
#     """
#     Given the input `PackedPose` object, apply a virtual root, termini distance
#     constraints, and minimization. Afterward, remove the constraints and virtural root.
    
#     Args:
#         packed_pose: an input `PackedPose` object.
#         **kwargs: PyRosettaCluster keyword arguments.

#     Returns:
#         Three `Pose` objects.
#     """
#     import pyrosetta
#     import pyrosetta.distributed.io as io
#     from pyrosetta.rosetta.protocols.simple_moves import VirtualRootMover
#     from pyrosetta.rosetta.protocols.rosetta_scripts import XmlObjects

#     pose = io.to_pose(packed_pose)
#     scorefxn = pyrosetta.create_score_function("ref2015_cst.wts")
#     scorefxn(pose)

#     virtual_root = VirtualRootMover()
#     virtual_root.set_removable(True)
#     virtual_root.set_remove(False)
#     virtual_root.apply(pose)

#     xml = XmlObjects.create_from_string(
#         """
#         <ROSETTASCRIPTS>
#             <SCOREFXNS>
#                 <ScoreFunction name="default_cst" weights="ref2015.wts">
#                     <Reweight scoretype="atom_pair_constraint" weight="20.0" />
#                 </ScoreFunction>
#             </SCOREFXNS>
#             <RESIDUE_SELECTORS>
#                 <Index name="n" resnums="1"/>
#                 <Index name="c" resnums="{size}"/>
#             </RESIDUE_SELECTORS>
#             <MOVERS>
#                 <AddConstraints name="add_csts" >
#                     <DistanceConstraintGenerator name="dist_cst"
#                         residue_selector1="n"
#                         residue_selector2="c"
#                         function="HARMONIC 2.5 0.1" />
#                 </AddConstraints>
#                 <RemoveConstraints name="rm_csts" constraint_generators="dist_cst" />
#                 <MinMover name="min"
#                     scorefxn="default_cst"
#                     chi="1"
#                     bb="1"
#                     type="dfpmin_armijo_nonmonotone"
#                     tolerance="0.0001"
#                     max_iter="500" >
#                     <MoveMap name="mm" bb="1" chi="1" jump="1" />
#                 </MinMover>
#             </MOVERS>
#             <PROTOCOLS>
#                 <Add mover="add_csts"/>
#                 <Add mover="min"/>
#                 <Add mover="rm_csts"/>
#             </PROTOCOLS>
#         </ROSETTASCRIPTS>
#         """.format(size=pose.size())
#     ).get_mover("ParsedProtocol")
#     xml.apply(pose)

#     virtual_root.set_remove(True)
#     virtual_root.apply(pose)

#     scorefxn(pose)
#     dummy_pose_1 = io.to_pose(io.pose_from_sequence("R" * 6))
#     dummy_pose_2 = io.to_pose(io.pose_from_sequence("P" * 6))

#     return dummy_pose_1, pose, dummy_pose_2

# def my_third_protocol(packed_pose, **kwargs):
#     """
#     Repack the input `PackedPose` object and set the sequence as a scoreterm.
    
#     Args:
#         packed_pose: an input `PackedPose` object.
#         **kwargs: PyRosettaCluster keyword arguments.

#     Returns:
#         Three `Pose` objects.
#     """
#     import pyrosetta
#     import pyrosetta.distributed.io as io
#     from pyrosetta.rosetta.protocols.minimization_packing import (
#         PackRotamersMover,
#     )

#     pose = io.to_pose(packed_pose)
#     pack_rotamers = PackRotamersMover(
#         scorefxn=pyrosetta.create_score_function("ref2015.wts"),
#         task=pyrosetta.standard_packer_task(pose),
#         nloop=10,
#     )
#     pack_rotamers.apply(pose)

#     dummy_pose = io.to_pose(io.pose_from_sequence("I" * 12))
#     for p in [dummy_pose.clone(), dummy_pose.clone(), pose]:
#         pyrosetta.rosetta.core.pose.setPoseExtraScore(
#             p, "SEQUENCE", p.sequence()
#         )
#         yield p

In [13]:
def create_tasks():
    yield {
        "options": "-ex1",
        "extra_options": "-out:level 300 -multithreading:total_threads 1",
    }

### 4. Launch the first simulation the generate a decoy to later reproduce

In [14]:
pdb_string = !curl https://files.rcsb.org/download/1qys.pdb
pdb_string = "\n".join(pdb_string)
input_packed_pose = score.ScorePoseTask()(io.pose_from_pdbstring(pdb_string,'1qys'))
# input_packed_pose = io.pose_from_sequence(
#     "".join([random.choice("ACDEFGHIKLMNPQRSTVWY") for i in range(100)])
# )

# wpose = pyrosetta.distributed.packed_pose.to_pose( ppose )

# pdb_info = pyrosetta.rosetta.core.pose.PDBInfo( wpose )
# name = pdb_info.name()
# print(pdb_info)
# print(name)
# name='x'
# name = wpose.pdb_info().name()
# wpose.dump_pdb( "{0}.pdb".format( os.path.join(outdir,name) ) )


protocols = [ protocol1 ] #my_first_protocol, my_second_protocol, my_third_protocol]

# outdir = '/Users/aloshbaugh/Documents/0_git/PyRosettaCluster/tutorials/output'
workdir = tempfile.TemporaryDirectory().name
output_path = os.path.abspath(os.path.join(workdir, "output"))

PyRosettaCluster(
    tasks=create_tasks,
    input_packed_pose=input_packed_pose,
    protocols=protocols,
    client=client,
    scratch_dir=workdir,
    output_path=output_path, #k
).distribute(protocols=protocols)



NameError: name 'client' is not defined

While jobs are running, you may monitor their progress using the dask dashboard diagnostics within Jupyter Lab!

In the "Dask" tab, click the various diagnostic tools _(arrows)_ to open new tabs:

![title](images/dask_labextension_4.png)

Arrange the diagnostic tool tabs within Jupyter Lab how you best see fit by clicking and dragging them:

![title](images/dask_labextension_3.png)

### 5. Visualize the results

Gather poses from disk into memory:

In [25]:
results = glob.glob(os.path.join(output_path, "decoys/*/*.pdb.bz2"))
packed_poses = []
for bz2file in results:
    with open(bz2file, "rb") as f:
        packed_poses.append(io.pose_from_pdbstring(bz2.decompress(f.read()).decode()))

View poses in memory:

In [26]:
view = viewer.init(packed_poses, window_size=(800, 600))
view.add(viewer.setStyle())
view.add(viewer.setStyle(colorscheme="whiteCarbon", radius=0.25))
view.add(viewer.setHydrogenBonds())
view.add(viewer.setHydrogens(polar_only=True))
view.add(viewer.setDisulfides(radius=0.25))
view()

interactive(children=(IntSlider(value=0, continuous_update=False, description='Decoys', max=26), Output()), _d…

<function pyrosetta.distributed.viewer.core.Viewer.show.<locals>.view(i=0)>

Choose a decoy that you would like to reproduce. For example, here we choose index 14 in `results`:

![title](images/dask_labextension_5.png)

The `PyRosettaCluster` instance keyword arguments to reproduce this decoy are recovered using `get_instance_kwargs()`:

In [30]:
get_instance_kwargs(input_file=results[14])

{'ami_id': '',
 'compressed': True,
 'cores': 1,
 'dashboard_address': ':8787',
 'decoy_dir_name': 'decoys',
 'decoy_ids': [0, 1, 2],
 'dry_run': False,
 'ignore_errors': False,
 'instance_id': '',
 'logging_level': 'INFO',
 'logs_dir_name': 'logs',
 'max_workers': 1000,
 'memory': '4g',
 'min_workers': 1,
 'nstruct': 1,
 'output_path': '/tmp/tmp47a2rpxj/outputs',
 'processes': 1,
 'project_name': '2020.04.23.18.17.27.945466',
 'protocols': ['my_first_protocol', 'my_second_protocol', 'my_third_protocol'],
 'save_all': False,
 'scheduler': None,
 'scorefile_name': 'scores.json',
 'scratch_dir': '/tmp/tmp47a2rpxj',
 'seeds': ['-602709579', '1678961993', '-754765036'],
 'sha1': '',
 'simulation_name': '2020.04.23.18.17.27.945466',
 'tasks': {'options': '-ex1',
  'extra_options': '-out:level 300 -multithreading:total_threads 1'},
 'timeout': 0.5}

### 6. Launch the second simulation the reproduce a decoy

The `input_packed_pose`, `client`, and `protocols` variables need to be specified along with the `PyRosettaCluster` instance keyword arguments needed to reproduce the desired trajectory:

In [31]:
PyRosettaCluster(
    input_packed_pose=input_packed_pose,
    client=client,
    **get_instance_kwargs(input_file=results[14]),
).distribute(protocols=protocols)

### 7. Visualize the reproduced decoy

In [45]:
for bz2file in glob.glob(os.path.join(output_path, "decoys/*/*.pdb.bz2")):
    if bz2file not in results:
        reproduced_result = bz2file
        break

with open(reproduced_result, "rb") as f:
    reproduced_packed_pose = io.pose_from_pdbstring(bz2.decompress(f.read()).decode())

In [36]:
view = viewer.init(reproduced_packed_pose, window_size=(800, 600))
view.add(viewer.setStyle())
view.add(viewer.setStyle(colorscheme="whiteCarbon", radius=0.25))
view.add(viewer.setHydrogenBonds())
view.add(viewer.setHydrogens(polar_only=True))
view.add(viewer.setDisulfides(radius=0.25))
view()

### 8. Optionally, perform sanity checks to confirm that the reproduced pose is identical to the original:

PyRosetta trajectories are _deterministic_ depending on the input random number generated seed(s)!

In [37]:
original_pose = poses[14].pose
reproduced_pose = reproduced_packed_pose.pose

#### Assert that the sequences are identical:

In [38]:
assert original_pose.sequence() == reproduced_pose.sequence()

#### Assert that the `total_score`s are identical:

In [40]:
scorefxn = pyrosetta.create_score_function("ref2015.wts")
assert scorefxn(original_pose) == scorefxn(reproduced_pose)

#### Assert that the C$_{\alpha}$–C$_{\alpha}$ root-mean-square deviation (RMSD) is `0.0` Å:

Note: There is no need to first superimpose the `original_pose` and `reproduced_pose` because they were both generated starting from the same `input_packed_pose`

In [44]:
assert pyrosetta.rosetta.core.scoring.CA_rmsd(original_pose, reproduced_pose) == 0.0

#### The reason the `original_pose` and `reproduced_pose` are identical is because the `seeds`, `decoy_ids`, and `protocols` attributes were identical in both `PyRosettaCluster` simulations:

In [52]:
for attribute in ["seeds", "decoy_ids", "protocols"]:
    assert get_instance_kwargs(reproduced_result)[attribute] == get_instance_kwargs(results[14])[attribute]