In [5]:
from __future__ import print_function, division, absolute_import
%load_ext autoreload
%autoreload 2

In [6]:
import ppmod
import ppmod.segment_assignment as sa
import ppmod.topology as t

In [7]:
#load ply file and convert it to edge notation
edge_topology = t.convert_vface_to_efaces(t.load_vfaces("ply/01_tetrahedron.ply"))
#Explore all topologies
tops = t.explore(edge_topology, True)
#enumarate and name them
tops = t.name_topologies_and_permutations(tops)
#calculate the topological contact order
df = t.calculate_TCO(tops)
df.sort_values(by=["num_AP","num_cross","TCO","stdTCO","segments" ])

id,num_AP,num_P,num_cross,topo,reflected
0,2,4,0,ABCADECFEbDf,
1,3,3,0,ABCADECFdBef,
2,3,3,0,ABCADEbDFceF,


Unnamed: 0,min,max,TCO,stdTCO,segments,num_AP,num_P,num_cross,ref
1.1,3,7,4.33333,1.59861,ABCADECFEbDf,2,4,0,True
1.1,3,7,4.33333,1.59861,bDfABCADECFE,2,4,0,True
1.5,3,8,4.33333,1.97203,DECFEbDfABCA,2,4,0,True
1.6,3,8,4.33333,1.97203,ECFEbDfABCAD,2,4,0,True
1.8,3,9,4.66667,2.21108,FEbDfABCADEC,2,4,0,True
1.3,3,9,4.66667,2.21108,CADECFEbDfAB,2,4,0,True
1.11,3,7,5.0,1.73205,DfABCADECFEb,2,4,0,True
1.12,3,7,5.0,1.73205,fABCADECFEbD,2,4,0,True
1.9,3,9,5.33333,2.21108,EbDfABCADECF,2,4,0,True
1.2,3,9,5.33333,2.21108,BCADECFEbDfA,2,4,0,True


In [8]:
#We chose the the topology with the smallest TCO and stdTCO and only two antiparallel
topology = t.standard("bDfABCADECFE") #This is 1.10 or 1.1R

In [9]:
#Assign the segments and linkers
gui = sa.segment_assignment_gui(topology)

In [10]:
#the assignment can be retrived using: 
seg_assignment_str = gui.result_text.value
#When we first designed the tetrahedron there were less segments available. 
#For reproducibility we are using the previous assignment
seg_assignment_str ="""
A->APHshSN:APHshSN
B->P3SN:P4SN
C->BCRSN:BCRSN
D->GCNshSN:GCNshSN
E->P7SN:P8SN
F->P5SN:P6SN
"""
model_name = "TET12SN"
linker = "SGPGS"

seg_assignment = sa.segment_assignments_to_dict(seg_assignment_str)
real_segments = sa.do_assignment_replacements(topology, seg_assignment_str)

#one can also manually edit the sequnce
annotated_seq = sa.get_annotated_sequence(real_segments, 'segments.xlsx', N_tag="M",C_tag="LEHHHHHHHH")
annotated_seq_edit = sa.text_edit_gui(annotated_seq, caption="-".join(real_segments))

In [11]:
#Now write everything to a self-contained config file! 
#Information about pairs and and 3D templates is need and is read from 'segments.xls' sheet 'pairs'.

included_pairs = [":".join(s) for s in seg_assignment.values()]
pairs_info = sa.get_included_pairs_info('segments.xlsx', sheetname='pairs', included_pairs=included_pairs)
sa.write_make_config(model_name, annotated_seq, pairs_info, out_name='make_config.py')

# Now run the model building!


The rest of the pipeline is handled via Snakemake (a workflow manager). There is a Snakemake file that can be customized.
Sankemake takes care of dependencies and rerunning tasks and analysis. Make_config.py generates a JSON file with all details (that is less human readable). Next chimera generates a model of the protein. Modeller than bring the various segments together and refines the twist via homology modelling. All the alignment files are generated automatically.


In [None]:
#this takes a bit less than an hour on a modern intel procesor. Use three parallel threads.
!bash snakemake -j3

In [12]:
#view the generated models in chimera
!bash chimera */03-*.pdb

In [30]:
#or directly in the notebook
import mdtraj 
import mdtraj.html 
from glob import glob
import ipywidgets as widgets
from IPython.display import display

def on_value_change(change):
    w.frame=change['new']

slider = widgets.FloatSlider(value=0, min=0, max=models.n_frames-1, step=1, description='Model:')
slider.observe(on_value_change, names='value')
    
mdtraj.html.enable_notebook()

files = glob("*/03-*.pdb")
models = mdtraj.load(files)

display(slider)
w = mdtraj.html.TrajectoryView(models, secondaryStructure="ribbon")
w

<IPython.core.display.Javascript object>

In [29]:
mdtraj.html.TrajectorySliderView(models, secondaryStructure="ribbon")