In [17]:
# TO BE REMOVED!!!
%load_ext autoreload
%autoreload 2

# Imports
import os, shutil
import nglview as nv
import ipywidgets
import zipfile
import webbrowser

# Helpers
def def_dict(propierties={}):
    def_props = {'out_log_path': 'log/log.log',
                 'err_log_path': 'log/log.err',
                 'remove_tmp': True,
                 'can_write_console_log': False}
    def_props.update(propierties)
    return def_props

def show_pdbs(pdbs, surface=False):
    # Load the PDB files
    views = [nv.show_file(pdb) for pdb in pdbs]
    for view in views:
        if surface:
            view.clear()
            view.add_cartoon(color='black')
            view.add_surface(color='electrostatic', opacity=0.5)
        view.layout.width = '100%'
    return ipywidgets.HBox(views)

def display_actpass(pdb, actpass, opacity=1):
    with open(actpass, 'r') as file:
        actpass = file.read().splitlines()
        act_res = actpass[0].replace(' ', ', ')
        pas_res = actpass[1].replace(' ', ', ')
        
    # Load the PDB files
    view = nv.NGLWidget()
    view.add_component(pdb)
    view.clear()
    view.add_cartoon(color='black')
    view.add_ball_and_stick(color='grey',opacity=opacity)
    view.add_surface(selection=f'not ( {pas_res}, {act_res} )', color='white', opacity=opacity)
    if act_res != '':
        view.add_surface(selection=f'{act_res}', color='red')
    if pas_res != '':
        view.add_surface(selection=f'{pas_res}', color='green', opacity=opacity)
    view.layout.width = '100%'
    return view

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Haddock3 protein protein docking using BioExcel Building Blocks (biobb)
***
This tutorial aims to illustrate the process of **proptein protein docking** using **Haddock3**, step by step, using the **BioExcel Building Blocks library (biobb)**. 
***
**Biobb modules** used:

 - [biobb_haddock](https://github.com/bioexcel/biobb_haddock): Biobb building blocks for the Haddock3 suite.
 
**Auxiliar libraries** used:

 - [nb_conda_kernels](https://github.com/Anaconda-Platform/nb_conda_kernels): Enables a Jupyter Notebook or JupyterLab application in one conda environment to access kernels for Python, R, and other languages found in other environments.
 - [nglview](http://nglviewer.org/#nglview): Jupyter/IPython widget to interactively view molecular structures and trajectories in notebooks.
 - [ipywidgets](https://github.com/jupyter-widgets/ipywidgets): Interactive HTML widgets for Jupyter notebooks and the IPython kernel.

### Conda Installation and Launch

```console
git clone https://github.com/bioexcel/biobb_wf_ligand_parameterization.git
cd biobb_wf_ligand_parameterization
conda env create -f conda_env/environment.yml
conda activate biobb_ligand_parameterization_tutorial
jupyter-nbextension enable --py --user widgetsnbextension
jupyter-nbextension enable --py --user nglview
jupyter-notebook biobb_wf_ligand_parameterization/notebooks/biobb_ligand_parameterization_tutorial.ipynb
  ``` 

***
### Pipeline steps:
 1. [Input Parameters](#input)
 2. [Create topology](#fetch)
 3. [CAPRI evaluation](#addh)
 4. [Select Top structures](#min)
 5. [Flexible Refinement](#acpype)
 6. [2nd CAPRI evalutation](#output)
 7. [Energy Minimization Refinement](#questions)
 8. [Energy Minimization Refinement](#questions)
 
***
![](https://bioexcel.eu/wp-content/uploads/2019/04/Bioexcell_logo_1080px_transp.png)
***

<a id="input"></a>
***
## Input parameters
**Input parameters** needed:
 - **ligandCode**: 3-letter code of the ligand structure (e.g. IBP)
 - **mol_charge**: Molecule net charge (e.g. -1)
 - **pH**: Acidity or alkalinity for the small molecule. Hydrogen atoms will be added according to this pH. (e.g. 7.4)

# HADDOCK3 PPI tutorial

In [2]:
# https://www.bonvinlab.org/education/HADDOCK3/HADDOCK3-antibody-antigen/#setting-up-and-running-the-docking-with-haddock3
# data from: https://surfdrive.surf.nl/files/index.php/s/R7VHGQM9nx8QuQn

ab_id    = '4G6K'  # antibody
ag_id    = '4I1B' # antigen
ref_id   = '4G6M' 
out_path = './data/antibody/'
data_pth = '/home/rchaves/repo/biobb_haddock/biobb_haddock/test/data/haddock/'

## Preparing PDB files for docking

### Fetching the PDBs

In [68]:
# Downloading desired PDB files
# Import module
from biobb_io.api.pdb import pdb

# Create properties dict and inputs/outputs
ab_pdb  = f'{out_path}/pre/{ab_id}_0.pdb'
ag_pdb  = f'{out_path}/pre/{ag_id}_0.pdb'
ref_pdb = f'{out_path}/pre/{ref_id}_0.pdb'

# Create and launch bb
pdb(output_pdb_path = ab_pdb,  properties = def_dict({'pdb_code': ab_id}))
pdb(output_pdb_path = ag_pdb,  properties = def_dict({'pdb_code': ag_id}))
pdb(output_pdb_path = ref_pdb, properties = def_dict({'pdb_code': ref_id}))

0

### Preparing the antibody structure

In [42]:
from biobb_pdb_tools.pdb_tools.biobb_pdb_tidy import biobb_pdb_tidy
from biobb_pdb_tools.pdb_tools.biobb_pdb_selchain import biobb_pdb_selchain
from biobb_pdb_tools.pdb_tools.biobb_pdb_delhetatm import biobb_pdb_delhetatm
from biobb_pdb_tools.pdb_tools.biobb_pdb_fixinsert import biobb_pdb_fixinsert
from biobb_pdb_tools.pdb_tools.biobb_pdb_selaltloc import biobb_pdb_selaltloc
from biobb_pdb_tools.pdb_tools.biobb_pdb_keepcoord import biobb_pdb_keepcoord
from biobb_pdb_tools.pdb_tools.biobb_pdb_selres import biobb_pdb_selres

steps = [
    biobb_pdb_tidy,
    biobb_pdb_selchain,
    biobb_pdb_delhetatm,
    biobb_pdb_fixinsert,
    biobb_pdb_selaltloc,
    biobb_pdb_keepcoord,
    biobb_pdb_selres,
    biobb_pdb_tidy,
]

for ch in ['H','L']:
    step_props = {
        'biobb_pdb_tidy':     {'strict': True},
        'biobb_pdb_selchain': {'chains': ch},
        'biobb_pdb_selres':   {'selection': f'1:{ 120 if ch == "H" else 107}'},
    }
    for i, step in enumerate(steps):
        pdb_in  = f'{out_path}/pre/{ab_id}_{i}.pdb'
        if i+1 < len(steps):
            pdb_out  = f'{out_path}/pre/{ab_id}_{i+1}.pdb'
        else:
            pdb_out  = f'{out_path}/pre/{ab_id}_{ch}.pdb'
        props = def_dict(step_props.get(step.__name__, {}))
        step(input_file_path = pdb_in,  output_file_path=pdb_out,  properties = props)

In [43]:
import zipfile

# Define the zip file path
zip_file_path = f'{out_path}/pre/{ab_id}_HL.zip'

# Create a zip file and add the pdb_out file to it
with zipfile.ZipFile(zip_file_path, 'w') as zipf:
    zipf.write(f'{out_path}/pre/{ab_id}_H.pdb', arcname=f'{ab_id}_H.pdb')
    zipf.write(f'{out_path}/pre/{ab_id}_L.pdb', arcname=f'{ab_id}_L.pdb')

In [44]:
from biobb_pdb_tools.pdb_tools.biobb_pdb_merge import biobb_pdb_merge
from biobb_pdb_tools.pdb_tools.biobb_pdb_reres import biobb_pdb_reres
from biobb_pdb_tools.pdb_tools.biobb_pdb_chain import biobb_pdb_chain
from biobb_pdb_tools.pdb_tools.biobb_pdb_chainxseg import biobb_pdb_chainxseg

steps = [
    biobb_pdb_merge,
    biobb_pdb_reres,
    biobb_pdb_chain,
    biobb_pdb_chainxseg,
    biobb_pdb_tidy,
]

step_props = {
    'pdb_reres': {'number': 1},
    'biobb_pdb_chain': {'chain': 'A'},
    'biobb_pdb_tidy': {'strict': True},
}

for i, step in enumerate(steps):
    pdb_in  = (zip_file_path if i == 0 
               else f'{out_path}/pre/{ab_id}_HL_{i}.pdb')
    
    pdb_out = (f'{out_path}/pre/{ab_id}_HL_{i+1}.pdb' 
               if i+1 < len(steps) 
               else f'{out_path}/{ab_id}_clean.pdb')
    
    props = def_dict(step_props.get(step.__name__, {}))
    step(input_file_path = pdb_in,  output_file_path=pdb_out,  properties = props)

### Preparing the antigen structure

In [None]:
steps = [
    biobb_pdb_tidy,
    biobb_pdb_delhetatm,
    biobb_pdb_selaltloc,
    biobb_pdb_keepcoord,
    biobb_pdb_chain,
    biobb_pdb_chainxseg,
    biobb_pdb_tidy,
]

step_props = {
    'biobb_pdb_tidy': {'strict': True},
    'biobb_pdb_chain': {'chain': 'B'},
}

for i, step in enumerate(steps):
    pdb_in  = f'{out_path}/pre/{ag_id}_{i}.pdb'
    pdb_out = (f'{out_path}/pre/{ag_id}_{i+1}.pdb' 
               if i+1 < len(steps) 
               else f'{out_path}/{ag_id}_clean.pdb')
    props = def_dict(step_props.get(step.__name__, {}))
    step(input_file_path = pdb_in,  output_file_path=pdb_out,  properties = props)

### Preparing the reference pdb

In [84]:
steps = [
    biobb_pdb_tidy,
    biobb_pdb_selchain
]

step_props = {
    'biobb_pdb_tidy': {'strict': True},
    'biobb_pdb_selchain': {'chains': 'H,L'},
}

for i, step in enumerate(steps):
    pdb_in  = f'{out_path}pre/{ref_id}_{i}.pdb'
    pdb_out = f'{out_path}pre/{ref_id}_{i+1}.pdb' 
    props = def_dict(step_props.get(step.__name__, {}))
    step(input_file_path = pdb_in,  output_file_path=pdb_out,  properties = props)

In [133]:
steps = [
    biobb_pdb_tidy,
    biobb_pdb_selchain,
    biobb_pdb_delhetatm,
    biobb_pdb_fixinsert,
    biobb_pdb_selaltloc,
    biobb_pdb_keepcoord,
    biobb_pdb_selres,
    biobb_pdb_tidy,
]
sels = {"H": 120, "L":107, "A": ''}

for ch in ['H','L']:
    step_props = {
        'biobb_pdb_tidy':     {'strict': True},
        'biobb_pdb_selchain': {'chains': ch},
        'biobb_pdb_selres':   {'selection': f'1:{sels[ch]}'}
    }
    for i, step in enumerate(steps):
        pdb_in  = f'{out_path}/pre/{ref_id}_{i}.pdb'
        if i+1 < len(steps):
            pdb_out  = f'{out_path}/pre/{ref_id}_{i+1}.pdb'
        else:
            pdb_out  = f'{out_path}/pre/{ref_id}_{ch}.pdb'
        props = def_dict(step_props.get(step.__name__, {}))
        step(input_file_path = pdb_in,  output_file_path=pdb_out,  properties = props)

In [178]:
H = f'{out_path}/pre/{ref_id}_H.pdb'
L = f'{out_path}/pre/{ref_id}_L.pdb'
HL = f'{out_path}/pre/{ref_id}_HL.pdb'

!pdb_merge {H} {L} | pdb_reres -1 | pdb_chain -A | pdb_chainxseg | pdb_tidy -strict > {HL}

In [None]:
pdb_in  = f'{out_path}/pre/{ref_id}_0.pdb'
A = f'{out_path}/pre/{ref_id}_A.pdb'
ref_pdb_clean = f'{out_path}/{ref_id}_clean.pdb'

!pdb_selchain -A {pdb_in} | pdb_reatom -0 | pdb_chain -B | pdb_chainxseg > {A}
!pdb_merge {HL} {A} | pdb_segxchain | pdb_tidy -strict > {o}

## Defining restrains

#### Paratope
The residues of the hypervariable loops involved in binding. Identified with:
- [ProABC-2](https://academic.oup.com/bioinformatics/article/36/20/5107/5873593?login=false)

#### Epitope

From [bibliography](https://linkinghub.elsevier.com/retrieve/pii/S0022283612007863)

In [None]:
paratope_sel = '31,32,33,34,35,52,54,55,56,100,101,102,103,104,105,106,151,152,169,170,173,211,212,213,214,216'
epitope_sel  = '72,73,74,75,81,83,84,89,90,92,94,96,97,98,115,116,117'

In [50]:
ab_pdb_clean = f'{out_path}/{ab_id}_clean.pdb'
ag_pdb_clean = f'{out_path}/{ag_id}_clean.pdb'
views = show_pdbs([ab_pdb_clean, ag_pdb_clean])

In [49]:
views.children[0].add_surface(selection=paratope_sel.replace(',', ', '), color='red')
views.children[1].add_surface(selection=epitope_sel.replace(',', ', '), color='red')
views

HBox(children=(NGLWidget(layout=Layout(width='100%')), NGLWidget(layout=Layout(width='100%'))))

In [64]:
# Obtain passive from active selection
from biobb_haddock.haddock_restraints.haddock3_passive_from_active import haddock3_passive_from_active

ab_actpass = f'{out_path}{ab_id}_actpass.txt'
ag_actpass = f'{out_path}{ag_id}_actpass.txt'

# Create the actpass for the antibody manually
with open(ab_actpass, 'w') as f:
    f.write( paratope_sel.replace(',', ' ')+'\n\n')

# For the antigen, we will use the epitope selection as the active selection
# and some reidues around it as passsive
haddock3_passive_from_active( 
    input_pdb_path      = ag_pdb_clean,
    output_actpass_path = ag_actpass,
    properties          = def_dict({'active_list' : epitope_sel}))

0

In [54]:
display_actpass(ag_pdb_clean, ag_actpass)

NGLWidget(layout=Layout(width='100%'))

#### Defining ambiguous restraints

In [56]:
# Convert active/passive to ambiguous restraints
from biobb_haddock.haddock_restraints.haddock3_actpass_to_ambig import haddock3_actpass_to_ambig

complex_tbl = f'{out_path}ambig-paratope-NMR-epitope.tbl'

haddock3_actpass_to_ambig( 
    input_actpass1_path=ab_actpass,
    input_actpass2_path=ag_actpass,    
    output_tbl_path=complex_tbl,
    properties = def_dict({
        'segid_one': 'A', 
        'segid_two': 'B'}))

0

In [None]:
# Validate tbl
!haddock3-restraints validate_tbl {complex_tbl} --silent

#### Additional restraints for multi-chain proteins

In [57]:
# Tie antibody chains together
from biobb_haddock.haddock_restraints.haddock3_restrain_bodies import haddock3_restrain_bodies

body_tbl = f'{out_path}antibody-unambig.tbl'

haddock3_restrain_bodies( 
    input_structure_path=ab_pdb_clean,
    output_tbl_path=body_tbl,
    properties = def_dict())

0

## Docking

In [15]:
# repeat variables to run the code above
ab_pdb_clean = f'{out_path}{ab_id}_clean.pdb'
ag_pdb_clean = f'{out_path}{ag_id}_clean.pdb'
ref_pdb_clean = f'{out_path}{ref_id}_clean.pdb'
complex_tbl = f'{out_path}ambig-paratope-NMR-epitope.tbl'
body_tbl = f'{out_path}antibody-unambig.tbl'

### Create topology

In [26]:
from biobb_haddock.haddock.topology import topology
step_idx = 0
mol1_output_top_zip_path = f'{out_path}/docking/{step_idx}/top_mol1.zip'
mol2_output_top_zip_path = f'{out_path}/docking/{step_idx}/top_mol2.zip'
wf_topology              = f'{out_path}/docking/{step_idx}/wf_topology.zip'

topology(mol1_input_pdb_path        = ab_pdb_clean,
         mol2_input_pdb_path        = ag_pdb_clean,
         mol1_output_top_zip_path   = mol1_output_top_zip_path,
         mol2_output_top_zip_path   = mol2_output_top_zip_path,
         output_haddock_wf_data_zip = wf_topology,
         properties                 = def_dict())

0

### Rigid Body sampling

In [29]:
from biobb_haddock.haddock.rigid_body import rigid_body

properties={
    'cfg': {
        'tolerance': 2,
        'sampling': 5, # 1000
    }
}

step_idx = 1
docking_output_zip_path = f'{out_path}docking/{step_idx}/docking.zip'
wf_rigidbody            = f'{out_path}docking/{step_idx}/wf_rigidbody.zip'

rigid_body(input_haddock_wf_data_zip     = wf_topology,
           docking_output_zip_path       = docking_output_zip_path,
           ambig_restraints_table_path   = complex_tbl,
           unambig_restraints_table_path = body_tbl,
           output_haddock_wf_data_zip    = wf_rigidbody,
           properties                    = def_dict(properties))

0

### CAPRI evaluation

In [34]:
from biobb_haddock.haddock.capri_eval import capri_eval

step_idx = 2
output_evaluation_zip_path = f'{out_path}docking/{step_idx}/caprieval.zip'
wf_caprieval               = f'{out_path}docking/{step_idx}/wf_caprieval.zip'

capri_eval(input_haddock_wf_data_zip  = wf_rigidbody,
           reference_pdb_path         = ref_pdb_clean,
           output_evaluation_zip_path = output_evaluation_zip_path,
           output_haddock_wf_data_zip = wf_caprieval,
           properties                 = def_dict())

0

### Select Top structures

In [35]:
from biobb_haddock.haddock.sele_top import sele_top

properties={
    'cfg': {
        'select': 3,
    }
}

step_idx = 3
output_selection_zip_path = f'{out_path}docking/{step_idx}/selected.zip'
wf_seletop                = f'{out_path}docking/{step_idx}/wf_seletop.zip'

sele_top(input_haddock_wf_data_zip  = wf_caprieval,
         output_selection_zip_path  = output_selection_zip_path,
         output_haddock_wf_data_zip = wf_seletop,
         properties                 = def_dict(properties))

0

### 2nd CAPRI evalutation

In [37]:
from biobb_haddock.haddock.capri_eval import capri_eval

step_idx = 4
output_evaluation_zip_path2 = f'{out_path}docking/{step_idx}/caprieval2.zip'
wf_caprieval2               = f'{out_path}docking/{step_idx}/wf_caprieval2.zip'

capri_eval(input_haddock_wf_data_zip  = wf_seletop,
           reference_pdb_path         = ref_pdb_clean,
           output_evaluation_zip_path = output_evaluation_zip_path2,
           output_haddock_wf_data_zip = wf_caprieval2,
           properties                 = def_dict())

0

### Flexible Refinement

In [40]:
from biobb_haddock.haddock.flex_ref import flex_ref

step_idx = 5
refinement_output_zip_path = f'{out_path}docking/{step_idx}/flexref.zip'
wf_flexref                 = f'{out_path}docking/{step_idx}/wf_flexref.zip'

flex_ref(input_haddock_wf_data_zip     = wf_caprieval2,
         refinement_output_zip_path    = refinement_output_zip_path,
         ambig_restraints_table_path   = complex_tbl,
         unambig_restraints_table_path = body_tbl,
         output_haddock_wf_data_zip    = wf_flexref,
         properties                    = def_dict())

0

### 3nd CAPRI evalutation

In [41]:
from biobb_haddock.haddock.capri_eval import capri_eval

step_idx = 6
output_evaluation_zip_path3 = f'{out_path}docking/{step_idx}/caprieval3.zip'
wf_caprieval3               = f'{out_path}docking/{step_idx}/wf_caprieval3.zip'

capri_eval(input_haddock_wf_data_zip  = wf_flexref,
           reference_pdb_path         = ref_pdb_clean,
           output_evaluation_zip_path = output_evaluation_zip_path3,
           output_haddock_wf_data_zip = wf_caprieval3,
           properties                 = def_dict())

0

### Energy Minimization Refinement

In [42]:
from biobb_haddock.haddock.em_ref import em_ref

step_idx = 7
refinement_output_zip_path = f'{out_path}docking/{step_idx}/emref.zip'
wf_emref                   = f'{out_path}docking/{step_idx}/wf_emref.zip'

em_ref(input_haddock_wf_data_zip  = wf_caprieval3,
       refinement_output_zip_path = refinement_output_zip_path,
       ambig_restraints_table_path   = complex_tbl,
       unambig_restraints_table_path = body_tbl,
       output_haddock_wf_data_zip = wf_emref,
       properties                 = def_dict())

0

### 4rd CAPRI evaluation

In [43]:
from biobb_haddock.haddock.capri_eval import capri_eval

step_idx = 8
output_evaluation_zip_path4 = f'{out_path}docking/{step_idx}/caprieval4.zip'
wf_caprieval4               = f'{out_path}docking/{step_idx}/wf_caprieval4.zip'

capri_eval(input_haddock_wf_data_zip  = wf_emref,
           reference_pdb_path         = ref_pdb_clean,
           output_evaluation_zip_path = output_evaluation_zip_path4,
           output_haddock_wf_data_zip = wf_caprieval4,
           properties                 = def_dict())

0

### Clustering using FCC

In [44]:
from biobb_haddock.haddock.clust_fcc import clust_fcc

step_idx = 9
output_cluster_zip_path = f'{out_path}docking/{step_idx}/clustfcc.zip'
wf_clustfcc             = f'{out_path}docking/{step_idx}/wf_clustfcc.zip'

clust_fcc(input_haddock_wf_data_zip = wf_caprieval4,
         output_cluster_zip_path    = output_cluster_zip_path,
         output_haddock_wf_data_zip = wf_clustfcc,
         properties                 = def_dict())

0

### Selecting top clusters

In [None]:
from biobb_haddock.haddock.sele_top_clusts import sele_top_clusts

properties={
    'cfg': {
        'top_models': 4,
    },
}

step_idx = 10
output_seletopclusts_zip_path = f'{out_path}docking/{step_idx}/seletopclusts.zip'
wf_seletopclusts              = f'{out_path}docking/{step_idx}/wf_seletopclusts.zip'

sele_top_clusts(input_haddock_wf_data_zip  = wf_clustfcc,
                output_selection_zip_path  = output_seletopclusts_zip_path,
                output_haddock_wf_data_zip = wf_seletopclusts,
                properties                 = def_dict(properties))

0

### 5th CAPRI evaluation

In [53]:
from biobb_haddock.haddock.capri_eval import capri_eval

step_idx = 11
output_evaluation_zip_path5 = f'{out_path}docking/{step_idx}/caprieval5.zip'
wf_caprieval5               = f'{out_path}docking/{step_idx}/wf_caprieval5.zip'

capri_eval(input_haddock_wf_data_zip  = wf_seletopclusts,
           reference_pdb_path         = ref_pdb_clean,
           output_evaluation_zip_path = output_evaluation_zip_path5,
           output_haddock_wf_data_zip = wf_caprieval5,
           properties                 = def_dict())

0

### Contacts analysis

In [54]:
from biobb_haddock.haddock.contact_map import contact_map

step_idx = 12
output_contactmap_zip_path = f'{out_path}docking/{step_idx}/contact_map.zip'
wf_contact_map             = f'{out_path}docking/{step_idx}/wf_contact_map.zip'

contact_map(input_haddock_wf_data_zip  = wf_caprieval5,
            output_contactmap_zip_path = output_contactmap_zip_path,
            output_haddock_wf_data_zip = wf_contact_map,
            properties                 = def_dict())

0

## Results

In [5]:
step_idx = 11

In [6]:
output_evaluation_zip_path5 = f'{out_path}/{step_idx}/caprieval5.zip'
wf_caprieval5 = f'{out_path}/{step_idx}/wf_caprieval5.zip'

In [7]:
import zipfile

# Unzip wf_caprieval5
with zipfile.ZipFile(wf_caprieval5, 'r') as zip_ref:
    zip_ref.extractall(out_path+'/final_results')

In [8]:
import os
import subprocess

# Change directory to the final results folder
os.chdir(out_path + '/final_results')

In [13]:
# Start the HTTP server and store the process
http_server_process = subprocess.Popen(['python3', '-m', 'http.server', '8000'])

# Function to stop the server when needed
def stop_http_server():
    http_server_process.terminate()
    http_server_process.wait()
    print("HTTP server stopped.")

NameError: name 'subprocess' is not defined

In [11]:
stop_http_server()

HTTP server stopped.


In [10]:
from IPython.core.display import HTML

# Embed an external website using an iframe
iframe_html = """
<iframe src="http://0.0.0.0:8000/analysis/11_caprieval_analysis/report.html" width="90%" height="600" style="border:none;">
    Your browser does not support iframes.
</iframe>
"""
display(HTML(iframe_html))

127.0.0.1 - - [04/Apr/2025 16:22:53] "GET /analysis/11_caprieval_analysis/report.html HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2025 16:23:31] "GET /10_seletopclusts/cluster_1_model_4.pdb.gz HTTP/1.1" 200 -
127.0.0.1 - - [04/Apr/2025 16:25:05] "GET /10_seletopclusts/cluster_1_model_2.pdb.gz HTTP/1.1" 200 -


# Barnase-Barstar protein complex

In [2]:
# Barnase-Barstar protein complex
# From Chen, R., Mintseris, J., Janin, J. and Weng, Z. (2003)
# A protein–protein docking benchmark. 
# Proteins, 52: 88-91. https://doi-org.sire.ub.edu/10.1002/prot.10390
barnase_id = "1A2P"
barnase_ch = "B"
barstar_id = "1A19"
barstar_ch = "A"
complex_id = "1BRS" # barnase_barstar_complex
complex_ch = "A,D"
out_path = 'data/barnase_barstar/'

## Prepare pdbs

In [8]:
# Downloading desired PDB files
# Import module
from biobb_io.api.pdb import pdb

# Create properties dict and inputs/outputs
barnase_pdb = f'{out_path}{barnase_id}.pdb'
barstar_pdb = f'{out_path}{barstar_id}.pdb'
complex_pdb = f'{out_path}{complex_id}.pdb'

# Create and launch bb
pdb(output_pdb_path=barnase_pdb, properties=def_dict({'pdb_code': barnase_id}))
pdb(output_pdb_path=barstar_pdb, properties=def_dict({'pdb_code': barstar_id}))
pdb(output_pdb_path=complex_pdb, properties=def_dict({'pdb_code': complex_id}))

0

In [33]:
# These are the pdbs we get from RCSB
show_pdbs([barnase_pdb, barstar_pdb, complex_pdb])

HBox(children=(NGLWidget(layout=Layout(width='100%')), NGLWidget(layout=Layout(width='100%')), NGLWidget(layou…

In [9]:
# Filtering specific chains: we need to get rid of repeated chains
from biobb_pdb_tools.pdb_tools.biobb_pdb_selchain import biobb_pdb_selchain

# Create properties dict and inputs/outputs
barnase_pdb_ch = f'{out_path}{barnase_id}_ch.pdb'
barstar_pdb_ch = f'{out_path}{barstar_id}_ch.pdb'
complex_pdb_ch = f'{out_path}{complex_id}_ch.pdb'

# # Create and launch bb
biobb_pdb_selchain(input_file_path  = barnase_pdb,
                   output_file_path = barnase_pdb_ch,
                   properties       = def_dict({'chains': barnase_ch}))

biobb_pdb_selchain(input_file_path  = barstar_pdb,
                   output_file_path = barstar_pdb_ch,
                   properties       = def_dict({'chains': barstar_ch}))

biobb_pdb_selchain(input_file_path  = complex_pdb,
                   output_file_path = complex_pdb_ch,
                   properties       = def_dict({'chains': complex_ch}))


cmd_wrapper commnand print: pdb_selchain -B /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_8e05ce18-713e-47fc-a976-a04052729ed7/1A2P.pdb > ./data/barnase_barstar/1A2P_ch.pdb
Executing: pdb_selchain -B /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_8e05ce18-7...
Exit code: 0


cmd_wrapper commnand print: pdb_selchain -A /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_c1250fc6-6c55-4f76-bcbd-4ebca48708b3/1A19.pdb > ./data/barnase_barstar/1A19_ch.pdb
Executing: pdb_selchain -A /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_c1250fc6-6...
Exit code: 0


cmd_wrapper commnand print: pdb_selchain -A,D /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_93703370-0e31-4cab-ac71-c980ecc64590/1BRS.pdb > ./data/barnase_barstar/1BRS_ch.pdb
Executing: pdb_selchain -A,D /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_93703370...
Exit code: 0



0

In [35]:
# On a real case we don't have the reference to know how the proteins bind each other
# What information can use to guide the process?
show_pdbs([barnase_pdb_ch, barstar_pdb_ch, complex_pdb_ch])

HBox(children=(NGLWidget(layout=Layout(width='100%')), NGLWidget(layout=Layout(width='100%')), NGLWidget(layou…

## Prepare AIRs

In [92]:
# Solvent accessibility: 
from biobb_haddock.haddock_restraints.haddock3_accessibility import haddock3_accessibility

# Create properties dict and inputs/outputs
barnase_sasa_out = f'{out_path}{barnase_id}_sasa_out.txt'
barstar_sasa_out = f'{out_path}{barstar_id}_sasa_out.txt'
barnase_sasa_actpass = f'{out_path}{barnase_id}_sasa_actpass.txt'
barstar_sasa_actpass = f'{out_path}{barstar_id}_sasa_actpass.txt'

cutoff = 0.3
# Barnase Chain
haddock3_accessibility(
        input_pdb_path            = barnase_pdb_ch,
        output_accessibility_path = barnase_sasa_out,
        output_actpass_path       = barnase_sasa_actpass,
        properties                = def_dict({'chain': barnase_ch,
                                              'cutoff': cutoff}))
# Barstar Chain
haddock3_accessibility(
        input_pdb_path            = barstar_pdb_ch,
        output_accessibility_path = barstar_sasa_out,
        output_actpass_path       = barstar_sasa_actpass,
        properties                = def_dict({'chain': barstar_ch,
                                              'cutoff': cutoff}))


cmd_wrapper commnand print: haddock3-restraints calc_accessibility /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_069cd347-4593-402d-ad43-4281ac6db7cf/1A2P_ch.pdb --export_to_actpass -c 0.3 &> /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_069cd347-4593-402d-ad43-4281ac6db7cf/1A2P_sasa_out.txt
Executing: haddock3-restraints calc_accessibility /home/rchaves/repo/ab_design/biobb_wf_had...
Exit code: 0


cmd_wrapper commnand print: haddock3-restraints calc_accessibility /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_31981aa4-f7d5-456f-b6fc-6416df9a9a91/1A19_ch.pdb --export_to_actpass -c 0.3 &> /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_31981aa4-f7d5-456f-b6fc-6416df9a9a91/1A19_sasa_out.txt
Executing: haddock3-restraints calc_accessibility /home/rchaves/repo/ab_design/biobb_wf_had...
Exit code: 0



0

In [None]:


# Careful! Pockets are good places to bind but have low accessibility
view1 = display_actpass(barnase_pdb_ch, barnase_sasa_actpass)
view2 = display_actpass(barstar_pdb_ch, barstar_sasa_actpass)
ipywidgets.HBox([view1, view2])

HBox(children=(NGLWidget(layout=Layout(width='100%')), NGLWidget(layout=Layout(width='100%'))))

In [39]:
# Electrostatic energies:
# We see a postive charge in the binding site of barnase and a negative charge in the binding site of barstar
show_pdbs([barnase_pdb_ch, barstar_pdb_ch],surface=True)

HBox(children=(NGLWidget(layout=Layout(width='100%')), NGLWidget(layout=Layout(width='100%'))))

In [None]:
# Obtain passive from active selection
from biobb_haddock.haddock_restraints.haddock3_passive_from_active import haddock3_passive_from_active

barnase_pass2act = f'{out_path}{barnase_id}_manual_actpass.txt'
barstar_pass2act = f'{out_path}{barstar_id}_manual_actpass.txt'

haddock3_passive_from_active( 
    input_pdb_path      = barnase_pdb_ch,
    output_actpass_path = barnase_pass2act,
    properties          = def_dict({'active_list' : '27,73,83,87'})
)

haddock3_passive_from_active( 
    input_pdb_path      = barstar_pdb_ch,
    output_actpass_path = barstar_pass2act,
    properties          = def_dict({'active_list' : '33,35,39,43'})
)

2025-05-05 12:55:23,211 [MainThread  ] [INFO ]  Module: biobb_haddock.haddock_restraints.haddock3_passive_from_active Version: 5.0.0
2025-05-05 12:55:23,212 [MainThread  ] [INFO ]  /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_81ddc4f2-3579-4329-b68d-d5d509769dbe directory successfully created
2025-05-05 12:55:23,213 [MainThread  ] [INFO ]  Copy: ./data/barnase_barstar/1A2P_ch.pdb to /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_81ddc4f2-3579-4329-b68d-d5d509769dbe
2025-05-05 12:55:23,214 [MainThread  ] [INFO ]  haddock3-restraints passive_from_active /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_81ddc4f2-3579-4329-b68d-d5d509769dbe/1A2P_ch.pdb 27,73,83,87 &> /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_81ddc4f2-3579-4329-b68d-d5d509769dbe/1A2P_manual_actpass.txt

2025-05-05 12:55:23,584 [MainThread  ] [INFO ]  Executing: haddock3-restraints passive_from_active /home/rchaves/repo/ab_design/biobb_wf_ha...
2025-05-05 12:55:23,585 [MainThread  ] [INFO ]  E

0

In [41]:
view1 = display_actpass(barnase_pdb_ch, barnase_pass2act, opacity=0.3)
view2 = display_actpass(barstar_pdb_ch, barstar_pass2act, opacity=0.3)
ipywidgets.HBox([view1, view2])

HBox(children=(NGLWidget(layout=Layout(width='100%')), NGLWidget(layout=Layout(width='100%'))))

In [None]:
# Convert active/passive to ambiguous restraints
from biobb_haddock.haddock_restraints.haddock3_actpass_to_ambig import haddock3_actpass_to_ambig

# With SASA
barnase_barstar_sasa_tbl = f'{out_path}barnase_barstar_sasa.tbl'
haddock3_actpass_to_ambig( 
    input_actpass1_path = barnase_sasa_actpass,
    input_actpass2_path = barstar_sasa_actpass,    
    output_tbl_path     = barnase_barstar_sasa_tbl,
    properties          = def_dict({'pass_to_act' : True,  # tbl need actives, we use the passive as active
                                    'segid_one': barnase_ch, 
                                    'segid_two': barstar_ch}))

# With manual active/passive
barnase_barstar_manual_tbl = f'{out_path}barnase_barstar_manual.tbl'
haddock3_actpass_to_ambig( 
    input_actpass1_path = barnase_pass2act,
    input_actpass2_path = barstar_pass2act,    
    output_tbl_path     = barnase_barstar_manual_tbl,
    properties          = def_dict({'segid_one': barnase_ch,
                                    'segid_two': barstar_ch}))

# The restrain have the next format:
# assign (selection1) (selection2) distance, lower-bound correction, upper-bound correction


cmd_wrapper commnand print: haddock3-restraints active_passive_to_ambig /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_bd0c7188-d83a-4a48-b955-6428566e9585/1A2P_sasa_actpass.txt /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_bd0c7188-d83a-4a48-b955-6428566e9585/1A19_sasa_actpass.txt --segid-one B --segid-two A &> /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_bd0c7188-d83a-4a48-b955-6428566e9585/barnase_barstar_sasa.tbl
Executing: haddock3-restraints active_passive_to_ambig /home/rchaves/repo/ab_design/biobb_w...
Exit code: 0


cmd_wrapper commnand print: haddock3-restraints active_passive_to_ambig /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_3dda8207-10ba-4cc6-bd07-85841b9f6ac5/1A2P_manual_actpass.txt /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_3dda8207-10ba-4cc6-bd07-85841b9f6ac5/1A19_manual_actpass.txt --segid-one B --segid-two A &> /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_3dda8207-10ba-4cc6-bd07-85841b9f6ac5/barnase_barstar_manu

0

In [45]:
# Validate tbl
!haddock3-restraints validate_tbl {barnase_barstar_sasa_tbl} --silent
!haddock3-restraints validate_tbl {barnase_barstar_manual_tbl} --silent

[2025-05-05 12:57:16,112 cli_restraints INFO] [DEPRECATION NOTICE] This command will soon be replaced with `haddock-restraints`
[2025-05-05 12:57:16,647 cli_restraints INFO] [DEPRECATION NOTICE] This command will soon be replaced with `haddock-restraints`


## Docking

### 0. Topology

In [99]:
from biobb_haddock.haddock.topology import topology

properties=def_dict({
    'cfg': {
        'tolerance': 0,
    },
})

step_idx = 0
barnase_top_zip_path = f'{out_path}{step_idx}/barnase_top.zip'
barstar_top_zip_path = f'{out_path}{step_idx}/barstar_top.zip'
wf_topology          = f'{out_path}{step_idx}/wf_topology.zip'

topology(mol1_input_pdb_path        = barnase_pdb_ch,
         mol2_input_pdb_path        = barstar_pdb_ch,
         mol1_output_top_zip_path   = barnase_top_zip_path,
         mol2_output_top_zip_path   = barstar_top_zip_path,
         output_haddock_wf_data_zip = wf_topology,
         properties                 = properties)

CFG: tolerance
CFG: 0

cmd_wrapper commnand print: haddock3 30475695-abd9-4bd3-8eda-76c878e6f144/haddock.cfg
Executing: haddock3 30475695-abd9-4bd3-8eda-76c878e6f144/haddock.cfg...
Exit code: 0



0

### 1. Rigid body docking

In [None]:
from biobb_haddock.haddock.rigid_body import rigid_body

properties=def_dict({
    'cfg': {
        'tolerance': 5,
        'sampling': 100,
        # turn on random definiton of AIRs
        'ranair': False
    },
})

step_idx = 1
docking_output_zip_path = f'{out_path}{step_idx}/docking.zip'
wf_rigidbody            = f'{out_path}{step_idx}/wf_rigidbody.zip'

rigid_body(input_haddock_wf_data_zip   = wf_topology,
           docking_output_zip_path     = docking_output_zip_path,
           ambig_restraints_table_path = barnase_barstar_manual_tbl,
           output_haddock_wf_data_zip  = wf_rigidbody,
           properties                  = properties)

In [4]:
folder = docking_output_zip_path[:-4]
if os.path.exists(folder):
    shutil.rmtree(folder)
if not os.path.exists(folder):
    os.makedirs(folder)

with zipfile.ZipFile(docking_output_zip_path, 'r') as zip_ref:
    zip_ref.extractall(folder)

In [5]:
import pytraj as pt
import glob

pdb_dir = "data/barnase_barstar/1/docking/"
pdb_files = sorted(glob.glob(f"{pdb_dir}/*.pdb.gz"))
def show_aligned(chain):
    # Get all PDB files and sort them
    # Create a trajectory from the PDB files
    traj = pt.iterload(pdb_files, top=pdb_files[0])
    # Save the trajectory
    # pt.write_traj(f"{pdb_dir}/combined_{chain}_aligned.dcd", traj, overwrite=True)
    pt.align(traj, ref=0, mask=f'::{chain}')
    traj.save(f"{pdb_dir}/combined_{chain}_aligned_clust.pdb", options="model", overwrite=True)
    view = nv.show_pytraj(traj)
    view.layout.width = '100%'
    return view

In [6]:
view1 = show_aligned('B') # barnase
view2 = show_aligned('A') # barstar

# Display the viewer
ipywidgets.HBox([view1, view2])

HBox(children=(NGLWidget(layout=Layout(width='100%'), max_frame=99), NGLWidget(layout=Layout(width='100%'), ma…

In [54]:
view1 = nv.show_structure_file(f"{pdb_dir}/combined_A_aligned_clust.pdb", default_representation=False)
view2 = nv.show_structure_file(f"{pdb_dir}/combined_B_aligned_clust.pdb", default_representation=False)
view1.add_ribbon(color='chainIndex')
view2.add_ribbon(color='chainIndex')
view1.layout.width = '100%'
view2.layout.width = '100%'
# Display the viewer
box = ipywidgets.HBox([view1, view2])
display(box)
# Create a dropdown widget
opts = ['All']
opts.extend([pdb_file.split('/')[-1].split('.')[0] for pdb_file in pdb_files])
mdsel = ipywidgets.Dropdown(
    options=opts,
    description='Sel. model:',
    disabled=False,
)
display(mdsel)

def on_dropdown_change(change):
    """Handle dropdown selection changes.
    From https://github.com/nglviewer/nglview/issues/765
    """
    if change['type'] == 'change' and change['name'] == 'value': 
        selected_file = change['new']
        if selected_file=='All':
            view1._remote_call('setSelection', target='compList', args=["*"], 
               kwargs=dict(component_index=0))
            view2._remote_call('setSelection', target='compList', args=["*"], 
               kwargs=dict(component_index=0))
        else:
            # Extract model number from the filename
            model_num = selected_file.split('_')[1]
            print(f"Selected model: {model_num}")
            # Update the view with the selected model
            view1._remote_call('setSelection', target='compList', 
                            args=[f"/{model_num}"], 
                            kwargs=dict(component_index=0))
            # You can also update view2 if needed
            view2._remote_call('setSelection', target='compList', 
                            args=[f"/{model_num}"], 
                            kwargs=dict(component_index=0))

# Register the callback function
mdsel.observe(on_dropdown_change, names='value')

HBox(children=(NGLWidget(layout=Layout(width='100%')), NGLWidget(layout=Layout(width='100%'))))

Dropdown(description='Sel. model:', options=('All', 'rigidbody_1', 'rigidbody_10', 'rigidbody_100', 'rigidbody…

### 2. CAPRI eval

In [None]:
from biobb_haddock.haddock.capri_eval import capri_eval

output_evaluation_zip_path = f'{out_path}2/caprieval.zip'
wf_caprieval               = f'{out_path}2/wf_caprieval.zip'

capri_eval(input_haddock_wf_data_zip  = wf_rigidbody,
           reference_pdb_path         = complex_pdb_ch,
           output_evaluation_zip_path = output_evaluation_zip_path,
           output_haddock_wf_data_zip = wf_caprieval,
           properties = def_dict())

In [31]:
with zipfile.ZipFile(wf_caprieval, 'r') as zip_ref:
    zip_ref.extractall(wf_caprieval[:-4])
    
webbrowser.open(f"http://0.0.0.0:8000/{wf_caprieval[:-4]}/analysis/2_caprieval_analysis/report.html")
!python3 -m http.server

Serving HTTP on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ...
127.0.0.1 - - [05/May/2025 15:02:04] "GET /data/barnase_barstar/2/wf_caprieval/analysis/2_caprieval_analysis/report.html HTTP/1.1" 200 -

Keyboard interrupt received, exiting.
^C


In [33]:
import pandas as pd

tsv_dir = wf_caprieval[:-4]+'/2_caprieval/'
# Load the cluster and single data into pandas DataFrames
cluster_df = pd.read_csv(tsv_dir + 'capri_clt.tsv', sep='\t',comment='#')
single_df = pd.read_csv(tsv_dir + 'capri_ss.tsv', sep='\t',comment='#')

# DockQ: incorrect (<0.23), acceptable (0.23-0.49), medium (0.49-0.80), and high (>=0.80) 
display(single_df.head())
single_df = single_df.sort_values(by='dockq', ascending=False)
display(single_df.head())
display(cluster_df.head())

Unnamed: 0,model,md5,caprieval_rank,score,irmsd,fnat,lrmsd,ilrmsd,dockq,rmsd,...,dihe,elec,improper,rdcs,rg,sym,total,vdw,vean,xpcs
0,../1_rigidbody/rigidbody_21.pdb,-,1,-35.399,10.5,0.073,17.72,17.46,0.093,10.437,...,0.0,-5.081,0.0,0.0,0.0,0.0,138.927,-27.074,0.0,0.0
1,../1_rigidbody/rigidbody_32.pdb,-,2,-32.402,9.09,0.036,16.234,15.139,0.093,9.466,...,0.0,-6.379,0.0,0.0,0.0,0.0,38.012,-17.737,0.0,0.0
2,../1_rigidbody/rigidbody_23.pdb,-,3,-31.626,12.408,0.036,19.163,18.927,0.072,11.838,...,0.0,-3.388,0.0,0.0,0.0,0.0,-1.722,-25.743,0.0,0.0
3,../1_rigidbody/rigidbody_39.pdb,-,4,-30.608,12.285,0.073,18.308,18.422,0.088,11.766,...,0.0,-4.807,0.0,0.0,0.0,0.0,35.006,-33.078,0.0,0.0
4,../1_rigidbody/rigidbody_92.pdb,-,5,-30.26,1.698,0.382,5.985,4.938,0.496,1.644,...,0.0,-3.119,0.0,0.0,0.0,0.0,-22.287,-41.086,0.0,0.0


Unnamed: 0,model,md5,caprieval_rank,score,irmsd,fnat,lrmsd,ilrmsd,dockq,rmsd,...,dihe,elec,improper,rdcs,rg,sym,total,vdw,vean,xpcs
9,../1_rigidbody/rigidbody_14.pdb,-,10,-29.587,1.667,0.436,3.353,3.362,0.583,1.55,...,0.0,-3.629,0.0,0.0,0.0,0.0,62.121,-20.308,0.0,0.0
23,../1_rigidbody/rigidbody_69.pdb,-,24,-22.066,1.72,0.436,3.285,3.021,0.579,1.724,...,0.0,-1.703,0.0,0.0,0.0,0.0,19.948,-13.323,0.0,0.0
4,../1_rigidbody/rigidbody_92.pdb,-,5,-30.26,1.698,0.382,5.985,4.938,0.496,1.644,...,0.0,-3.119,0.0,0.0,0.0,0.0,-22.287,-41.086,0.0,0.0
7,../1_rigidbody/rigidbody_79.pdb,-,8,-29.777,1.76,0.382,6.202,5.176,0.485,1.69,...,0.0,-3.389,0.0,0.0,0.0,0.0,-31.637,-39.426,0.0,0.0
55,../1_rigidbody/rigidbody_70.pdb,-,56,-16.968,2.086,0.382,6.881,5.319,0.442,2.176,...,0.0,-2.606,0.0,0.0,0.0,0.0,-28.916,-29.317,0.0,0.0


Unnamed: 0,cluster_rank,cluster_id,n,under_eval,score,score_std,irmsd,irmsd_std,fnat,fnat_std,...,bsa_std,desolv,desolv_std,elec,elec_std,total,total_std,vdw,vdw_std,caprieval_rank
0,-,-,100,-,-32.509,1.786,11.071,1.37,0.055,0.018,...,106.215,-16.186,1.825,-4.914,1.062,52.556,52.263,-25.908,5.467,1


In [35]:
import gzip
import shutil
best_pdb = os.path.normpath(os.path.join(tsv_dir, single_df['model'][0]))
# Decompress the .gz file
with gzip.open(best_pdb + '.gz', 'rb') as f_in:
    with open(best_pdb, 'wb') as f_out:
        shutil.copyfileobj(f_in, f_out)

In [36]:
show_pdbs([best_pdb, complex_pdb_ch])

HBox(children=(NGLWidget(layout=Layout(width='100%')), NGLWidget(layout=Layout(width='100%'))))

In [37]:
# the reference and the input proteins have diferent number of residues/atoms, 
# so a fit based on rmsd like pytraj does fails

# # TODO: meter en structure utils
from Bio.PDB import PDBParser, PDBIO
from Bio.PDB.cealign import CEAligner

# Parse the structures
parser = PDBParser(QUIET=True)
structure1 = parser.get_structure("complex_pdb_ch", complex_pdb_ch)
structure2 = parser.get_structure("best_pdb", best_pdb)
    
# Perform CE alignment
aligner = CEAligner()
aligner.set_reference(structure1)
aligner.align(structure2)

# Save structure2 to a PDB file
output_pdb_path = f"{out_path}aligned_structure2.pdb"
io = PDBIO()
io.set_structure(structure2)
io.save(output_pdb_path)

In [38]:
view = nv.show_structure_file(output_pdb_path)
view.add_component(complex_pdb_ch)
view.clear()
view.component_0.add_cartoon(selection=f':{barnase_ch}', color='red')
view.component_0.add_cartoon(selection=f':{barstar_ch}', color='pink')
view.component_1.clear()
view.component_1.add_cartoon(selection=f':{complex_ch[0]}', color='blue')
view.component_1.add_cartoon(selection=f':{complex_ch[-1]}', color='cyan')
view

NGLWidget()

### 3. Extend docking

In [18]:
# Files are relative to the input_haddock_wf_data_zip
cfg ="""
[seletop]
select = 25

[caprieval]
reference_fname = "./data/2_caprieval/1BRS_ch.pdb"

[flexref]
tolerance = 5
ambig_fname = "./data/1_rigidbody/barnase_barstar_manual.tbl"

[caprieval]
reference_fname = "./data/2_caprieval/1BRS_ch.pdb"

[emref]
tolerance = 5
ambig_fname = "./data/1_rigidbody/barnase_barstar_manual.tbl"

[caprieval]
reference_fname = "./data/2_caprieval/1BRS_ch.pdb"
# ====================================================================
"""
haddock_config_path        = f'{out_path}docking-barnase-barstar.cfg'

with open(haddock_config_path, 'w') as config_file:
    config_file.write(cfg)

In [19]:
from biobb_haddock.haddock.haddock3_extend import haddock3_extend

output_haddock_wf_data_zip = f'{out_path}3/extend_wf.zip'  

haddock3_extend(input_haddock_wf_data_zip  = wf_caprieval,
                haddock_config_path        = haddock_config_path,
                output_haddock_wf_data_zip = output_haddock_wf_data_zip,
                properties = def_dict())

2025-05-05 16:00:28,818 [MainThread  ] [INFO ]  Module: biobb_haddock.haddock.haddock3_extend Version: 5.0.0
2025-05-05 16:00:28,819 [MainThread  ] [INFO ]  /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_c72059a1-3c48-4835-9154-0217837ba52f directory successfully created
2025-05-05 16:00:28,824 [MainThread  ] [INFO ]  Copy: data/barnase_barstar/2/wf_caprieval.zip to /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_c72059a1-3c48-4835-9154-0217837ba52f
2025-05-05 16:00:28,825 [MainThread  ] [INFO ]  Copy: data/barnase_barstar/docking-barnase-barstar.cfg to /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_c72059a1-3c48-4835-9154-0217837ba52f


2025-05-05 16:00:28,878 [MainThread  ] [INFO ]  haddock3 /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_c72059a1-3c48-4835-9154-0217837ba52f/docking-barnase-barstar.cfg --extend-run /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_c72059a1-3c48-4835-9154-0217837ba52f/7d79ee1d-9ae4-43fd-b2b3-37f8187cbfb0

2025-05-05 16:03:28,376 [MainThread  ] [INFO ]  Executing: haddock3 /home/rchaves/repo/ab_design/biobb_wf_haddock/sandbox_c72059a1-3c48-483...
2025-05-05 16:03:28,377 [MainThread  ] [INFO ]  Exit code: 0
2025-05-05 16:03:28,378 [MainThread  ] [INFO ]  [2025-05-05 16:00:29,420 cli INFO] 
##############################################
#                                            #
#                 HADDOCK3                   #
#                                            #
##############################################

!! Some of the HADDOCK3 components use CNS (Crystallographic and NMR System) which is free of use for non-profit applications. !!
!! For commercial use it is 

0

In [21]:
with zipfile.ZipFile(output_haddock_wf_data_zip, 'r') as zip_ref:
    zip_ref.extractall(output_haddock_wf_data_zip[:-4])
    
webbrowser.open(f"http://0.0.0.0:8000/{output_haddock_wf_data_zip[:-4]}/analysis/8_caprieval_analysis/report.html")
!python3 -m http.server

Serving HTTP on 0.0.0.0 port 8000 (http://0.0.0.0:8000/) ...
127.0.0.1 - - [05/May/2025 16:03:53] "GET /data/barnase_barstar/3/extend_wf/analysis/8_caprieval_analysis/report.html HTTP/1.1" 200 -
127.0.0.1 - - [05/May/2025 16:03:57] code 404, message File not found
127.0.0.1 - - [05/May/2025 16:03:57] "GET /data/barnase_barstar/3/extend_wf/7_emref/emref_9.pdb.gz HTTP/1.1" 404 -
127.0.0.1 - - [05/May/2025 16:04:52] "GET /data/barnase_barstar/3/extend_wf/analysis/ HTTP/1.1" 200 -
127.0.0.1 - - [05/May/2025 16:04:54] "GET /data/barnase_barstar/3/extend_wf/analysis/6_caprieval_analysis/ HTTP/1.1" 200 -
127.0.0.1 - - [05/May/2025 16:04:55] "GET /data/barnase_barstar/3/extend_wf/analysis/6_caprieval_analysis/report.html HTTP/1.1" 200 -
127.0.0.1 - - [05/May/2025 16:05:42] "GET /data/barnase_barstar/3/extend_wf/analysis/4_caprieval_analysis/ HTTP/1.1" 200 -
127.0.0.1 - - [05/May/2025 16:05:44] "GET /data/barnase_barstar/3/extend_wf/analysis/4_caprieval_analysis/report.html HTTP/1.1" 200 -
127.

In [23]:
import pandas as pd

tsv_dir = output_haddock_wf_data_zip[:-4]+'/8_caprieval/'
# Load the cluster and single data into pandas DataFrames
cluster_df = pd.read_csv(tsv_dir + 'capri_clt.tsv', sep='\t',comment='#')
single_df = pd.read_csv(tsv_dir + 'capri_ss.tsv', sep='\t',comment='#')

# DockQ: incorrect (<0.23), acceptable (0.23-0.49), medium (0.49-0.80), and high (>=0.80) 
display(single_df.head())
single_df = single_df.sort_values(by='dockq', ascending=False)
display(single_df.head())
display(cluster_df.head())

Unnamed: 0,model,md5,caprieval_rank,score,irmsd,fnat,lrmsd,ilrmsd,dockq,rmsd,...,dihe,elec,improper,rdcs,rg,sym,total,vdw,vean,xpcs
0,../7_emref/emref_18.pdb,-,1,-120.098,10.365,0.127,15.461,16.703,0.127,9.804,...,0.0,-274.875,0.0,0.0,0.0,0.0,-325.708,-51.643,0.0,0.0
1,../7_emref/emref_3.pdb,-,2,-115.348,12.775,0.055,18.909,18.743,0.079,11.925,...,0.0,-345.125,0.0,0.0,0.0,0.0,-382.854,-40.634,0.0,0.0
2,../7_emref/emref_9.pdb,-,3,-111.546,1.469,0.6,2.43,2.393,0.678,1.421,...,0.0,-340.269,0.0,0.0,0.0,0.0,-379.879,-41.365,0.0,0.0
3,../7_emref/emref_15.pdb,-,4,-105.09,11.924,0.036,17.889,18.371,0.079,11.277,...,0.0,-252.707,0.0,0.0,0.0,0.0,-286.816,-36.093,0.0,0.0
4,../7_emref/emref_4.pdb,-,5,-104.415,12.638,0.091,18.653,18.593,0.092,12.072,...,0.0,-276.266,0.0,0.0,0.0,0.0,-310.324,-40.087,0.0,0.0


Unnamed: 0,model,md5,caprieval_rank,score,irmsd,fnat,lrmsd,ilrmsd,dockq,rmsd,...,dihe,elec,improper,rdcs,rg,sym,total,vdw,vean,xpcs
2,../7_emref/emref_9.pdb,-,3,-111.546,1.469,0.6,2.43,2.393,0.678,1.421,...,0.0,-340.269,0.0,0.0,0.0,0.0,-379.879,-41.365,0.0,0.0
10,../7_emref/emref_10.pdb,-,11,-95.662,1.42,0.582,2.56,2.482,0.675,1.416,...,0.0,-304.442,0.0,0.0,0.0,0.0,-327.697,-29.073,0.0,0.0
11,../7_emref/emref_2.pdb,-,12,-95.461,1.483,0.509,5.794,4.232,0.566,1.619,...,0.0,-331.2,0.0,0.0,0.0,0.0,-346.908,-24.219,0.0,0.0
14,../7_emref/emref_7.pdb,-,15,-87.22,2.305,0.4,7.167,5.766,0.427,2.437,...,0.0,-261.264,0.0,0.0,0.0,0.0,-291.301,-30.562,0.0,0.0
16,../7_emref/emref_24.pdb,-,17,-81.04,2.174,0.345,7.846,5.798,0.403,2.448,...,0.0,-222.034,0.0,0.0,0.0,0.0,-216.602,-28.297,0.0,0.0


Unnamed: 0,cluster_rank,cluster_id,n,under_eval,score,score_std,irmsd,irmsd_std,fnat,fnat_std,...,bsa_std,desolv,desolv_std,elec,elec_std,total,total_std,vdw,vdw_std,caprieval_rank
0,-,-,25,-,-113.021,5.49,9.133,4.508,0.205,0.231,...,114.879,-10.124,6.382,-303.244,40.261,-343.814,40.004,-42.434,5.688,1
