# INITIALIZATION

In [1]:
# Loading some modules
%load_ext autoreload
%autoreload 2

# Getting basic libraries:
import os, sys, math
import numpy as np
from Bio.PDB import *

# Libraries for structure checking: --> 
import biobb_structure_checking
import biobb_structure_checking.constants as cts
from biobb_structure_checking.structure_checking import StructureChecking
base_dir_path=biobb_structure_checking.__path__[0]
args = cts.set_defaults(base_dir_path,{'notebook':True})

# PREPARATION

All data should be in a folder named assignment_data

    1 - Obtain the required structure from the PDB

In [2]:
# Loading the protein into a variable
f=open("./assignment_data/6m0j.pdb", "r")
parser = PDBParser()
structure = parser.get_structure("6m0j",f)
st=structure

    2 - Check at PDB which is the composition of a “Biological unit”. Remove all chains but those involved in the biological unit, if necessary

In [3]:
# Preparing the paths
base_path = './assignment_data/'
args['output_format'] = "pdb"
args['keep_canonical'] = False
args['input_structure_path'] = base_path + '6m0j.cif'
args['output_structure_path'] = base_path + '6m0j_fixed.pdb'
args['output_structure_path_charges'] = base_path + '6m0j_fixed.pdbqt'
args['time_limit'] = False
args['nocache'] = False
args['copy_input'] = False
args['build_warnings'] = False
args['debug'] = False
args['verbose'] = False
args['coords_only'] = False
args['overwrite'] = False

In [4]:
# Intializing the checking engine, loading the structure and showing info
st_c = StructureChecking(base_dir_path, args)

Canonical sequence for model 0:
Structure ./assignment_data/6m0j.cif loaded
 PDB id: 6M0J 
 Title: Crystal structure of 2019-nCoV spike receptor-binding domain bound with ACE2
 Experimental method: X-RAY DIFFRACTION
 Keywords: VIRAL PROTEIN/HYDROLASE
 Resolution (A): 2.4500

 Num. models: 1
 Num. chains: 2 (A: Protein, E: Protein)
 Num. residues:  876
 Num. residues with ins. codes:  0
 Num. residues with H atoms: 0
 Num. HETATM residues:  85
 Num. ligands or modified residues:  5
 Num. water mol.:  80
 Num. atoms:  6543
Metal/Ion residues found
 ZN A901
Small mol ligands found
NAG A902
NAG A903
NAG A904
NAG E601


    3 - Remove all heteroatoms


In [5]:
#Remove Hydrogen
st_c.rem_hydrogen()
#Remove water
st_c.water("yes")
#Remove metals
st_c.metals("All")
#Remove ligands
st_c.ligands("All")

Running rem_hydrogen.
No residues with Hydrogen atoms found
Running water. Options: yes
Detected 80 Water molecules
Canonical sequence for model 0:
Removed 80 Water molecules
Running metals. Options: All
Found 1 Metal ions
  ZN A901.ZN 
Canonical sequence for model 0:
Metal Atoms removed All (1)
Running ligands. Options: All
Detected 4 Ligands
 NAG A902
 NAG A903
 NAG A904
 NAG E601
Canonical sequence for model 0:
Ligands removed All (4)


    4 - Perform a quality checking on the structures, and add missing side-chains and hydrogen atoms and atom charges, using the biobb_structure_checking module

In [6]:
#Fix amides
st_c.amide("All")
#fix the chirality of some aa
st_c.chiral("All") 
#fix the backbone
st_c.backbone('--fix_atoms All --fix_chain none --add_caps none')
#detects and rebuilds missing protein side chains
st_c.fixside("All")
#add hydrogens
st_c.add_hydrogen("auto")

#you could also do everything with st_c.checkall() but we did it manually so its clearer what we do
st_c._save_structure(args['output_structure_path'])

f.close()

Running amide. Options: All
Found 6 unusual contact(s) involving amide atoms
 VAL A59.O    ASN A63.OD1     2.784 A
 ALA A80.O    GLN A101.OE1    2.931 A
 GLN A81.OE1  ASN A103.OD1    2.859 A
 ASN A134.ND2 ASN A137.N      2.987 A
 GLU A150.O   ASN A154.OD1    2.871 A
 ARG E357.NH1 ASN E394.ND2    2.964 A
Amide residues fixed All (7)
Rechecking
Found 4 unusual contact(s) involving amide atoms
 GLN A81.NE2  ASN A103.ND2    2.859 A
 ASN A103.OD1 ASN A194.OD1    2.485 A
 ARG E357.NH1 ASN E394.ND2    3.058 A
 ASN E394.OD1 GLU E516.OE2    2.870 A
Running chiral. Options: All
Found no residues with incorrect side-chain chirality
Running backbone. Options: --fix_atoms All --fix_chain none --add_caps none
Found 2 Residues with missing backbone atoms
 ASP A615   OXT
 GLY E526   OXT
No backbone breaks
No unexpected backbone links
Capping terminal ends
True terminal residues: A19,A615,E333,E526
No caps added
Fixing missing backbone atoms
Adding missing backbone atoms
ASP A615
  Adding new atom OXT


## STEP 1

In [7]:
#Finding out atoms in both chains
def chain_atoms(structure, dt):
    model =structure[0]
    t=0
    chain1_atoms=[]
    chain2_atoms=[]
    for chain in model:
        if t==0:
            t=1
            for residual in chain.get_residues():
                for atom1 in residual:
                    chain1_atoms.append(atom1)
        else:
            for residual in chain.get_residues():
                for atom2 in residual:
                    chain2_atoms.append(atom2)
    return chain_comparison(chain1_atoms, chain2_atoms, dt)

#comparing atoms of both chains
def chain_comparison(chain1_atoms, chain2_atoms, dt):
    interface_residues=set()
    for a1 in chain1_atoms:
        for a2 in chain2_atoms:
            if a1.get_parent().id==a1.get_parent().id and a2.get_parent().id == a2.get_parent().id:
                distance=a1-a2
            if distance<=dt:#dt is the treshold of the distance
                interface_residues.add(a1.get_parent().id[1])
                interface_residues.add(a2.get_parent().id[1])
    return interface_residues

dt=5
print(chain_atoms(structure, dt))

{393, 19, 24, 27, 28, 30, 31, 417, 34, 35, 37, 38, 41, 42, 45, 446, 447, 449, 453, 455, 456, 330, 79, 82, 83, 473, 475, 476, 353, 354, 355, 484, 357, 486, 487, 489, 493, 496, 498, 500, 501, 502, 505}


## Step 2

Step 2 preparation

In [None]:
from modules_classes import *

def calc_solvation(st, res):
    '''Solvation energy based on ASA'''
    solv = 0.
    for at in res.get_atoms():
        if 'EXP_NACCESS' not in at.xtra:
            continue
        s = float(at.xtra['EXP_NACCESS'])* at.xtra['vdw'].fsrf
        solv += s
    return solv


def vdw_int(at1, at2, r):
    '''Vdw interaction energy between two atoms'''
    eps12 = math.sqrt(at1.xtra['vdw'].eps * at2.xtra['vdw'].eps)
    sig12_2 = at1.xtra['vdw'].sig * at2.xtra['vdw'].sig
    return 4 * eps12 * (sig12_2**6/r**12 - sig12_2**3/r**6)

def MH_diel(r):
    '''Mehler-Solmajer dielectric'''
    return 86.9525 / (1 - 7.7839 * math.exp(-0.3153 * r)) - 8.5525

def elec_int(at1, at2, r):
    '''Electrostatic interaction energy between two atoms at r distance'''
    return 332.16 * at1.xtra['charge'] * at2.xtra['charge'] / MH_diel(r) / r

def calc_int_energies(st, res):
    elec = 0.
    vdw = 0.
    for at1 in res.get_atoms():
        for at2 in st.get_atoms():
        # skip same chain atom pairs
            if at2.get_parent().get_parent() != res.get_parent():
                r = at1 - at2
                e = elec_int(at1, at2, r)
                elec += e
                e = vdw_int(at1, at2, r)
                vdw += e
    return elec, vdw

def add_atom_parameters(st, res_lib, ff_params):
    ''' Adds parameters from libraries to atom .xtra field
        For not recognized atoms, issues a warning and put default parameters
    '''
    for at in st.get_atoms():
        resname = at.get_parent().get_resname()
        params = res_lib.get_params(resname, at.id)
        if not params:
            #print("WARNING: residue/atom pair not in library ("+atom_id(at) + ')')
            at.xtra['atom_type'] = at.element
            at.xtra['charge'] = 0
        else:
            at.xtra['atom_type'] = params.at_type
            at.xtra['charge'] = params.charge
        at.xtra['vdw'] = ff_params.at_types[at.xtra['atom_type']]

Step 2 execution

In [3]:

wd=str(input("Input working directory: "))
residue_library = ResiduesDataLib(wd+'/assignment_data/parameters_step2.lib')
ff_params = VdwParamset(wd+'/assignment_data/parameters_vanderw.txt')
NACCESS_BINARY =wd+'/soft/NACCESS/naccess'

add_atom_parameters(st, residue_library,ff_params)
def chain_atoms2(structure, dt):
    model =structure[0]
    t=0
    chain1_atoms=set()
    chain2_atoms=set()
    for chain in model:
        if t==0:
            t=1
            chain1=chain
        else: chain2=chain
    NeighborSearch_chain2 = NeighborSearch(list(chain2.get_atoms()))
    for residual1 in chain1.get_residues():
        for atom1 in residual1:
            Neighbor_atom_chain2 = NeighborSearch_chain2.search(atom1.coord, dt)
            for atom2 in Neighbor_atom_chain2:
                residual2=atom2.get_parent()
                chain1_atoms.add(residual1)
                chain2_atoms.add(residual2) 
    return calc(chain1_atoms, chain2_atoms)

def calc(chain1_atoms, chain2_atoms):
    s=sA=sE=e=v=0
    for res in chain1_atoms:
        s += calc_solvation(st[0], res)
        sA += calc_solvation(st[0]['A'], res)
        E, V = calc_int_energies(st, res)
        e+=E
        v+=V
    for res in chain2_atoms:
        s += calc_solvation(st[0], res)
        sE += calc_solvation(st[0]['E'], res)
        E, V = calc_int_energies(st, res)
        e+=E
        v+=V
    G = e + v + s - sA - sE
    print("Electrostatic interactions:", e)
    print("Van der Waals interactions", v)
    print("Solvation A-B complex:", s)
    print("Solvation of A:", sA)
    print("Solvation of B", sE)
    return("Interaction energy between components in A-B complex:",G)
print(chain_atoms2(structure, dt))

Input working directory: /home/jj/Desktop/Bioinformatics/Github/Bioinformatics_p/Biophysics/Biophysics_A1
Electrostatic interactions: 32.39369940518064
Van der Waals interactions -153.6372390102208
Solvation A-B complex: -10.092080999999999
Solvation of A: -21.994508
Solvation of B 11.902427
('Interaction energy between components in A-B complex:', -121.24353960504017)


Electrostatic interactions: 3.7189757150511804
Van der Waals interactions -154.05349663187712
Solvation A-B complex: -10.785248999999999
Solvation of A: -22.42739
Solvation of B 11.642141
('Interaction energy between components in A-B complex:', -150.33452091682594)


KeyboardInterrupt: 