In [1]:
# Basic imports
import numpy as np
import pandas as pd
from pathlib import Path

# OpenMM
import simtk.openmm as omm
import simtk.openmm.app as app
from simtk.unit import *

# ProDy
import prody as pdy

# Bio
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.PDB.PDBParser import PDBParser
from Bio.PDB.Polypeptide import PPBuilder

# Project
import project
project.setup()

# Utils
from core.utils import read_benchmark_table, get_structure_paths

In [2]:
# Benchmark paths
pp_benchmark_dir = Path('/home/semyon/mipt/GPCR-TEAM/pp_benchmark_v5')
pp_benchmark_structures = pp_benchmark_dir / 'benchmark5' / 'structures'
pp_benchmark_table_path = pp_benchmark_dir / 'Table_BM5.xlsx'

In [3]:
benchmark_table = read_benchmark_table(pp_benchmark_table_path)
benchmark_table.head()

1,Cat.,PDB ID 1,Protein 1,PDB ID 2,Protein 2,I-RMSD (Å),ΔASA(Å2),BM version introduced,Difficulty,Complex ID,Chains R,Chains L
Complex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1AHW_AB:C,A,1FGN_LH,Fab 5g9,1TFH_A,Tissue factor,0.69,1899,2,Rigid-body,1AHW,AB,C
1BVK_DE:F,A,1BVL_BA,Fv Hulys11,3LZT_,HEW lysozyme,1.24,1321,2,Rigid-body,1BVK,DE,F
1DQJ_AB:C,A,1DQQ_CD,Fab Hyhel63,3LZT_,HEW lysozyme,0.75,1765,2,Rigid-body,1DQJ,AB,C
1E6J_HL:P,A,1E6O_HL,Fab,1A43_,HIV-1 capsid protein p24,1.05,1245,2,Rigid-body,1E6J,HL,P
1JPS_HL:T,A,1JPT_HL,Fab D3H44,1TFH_B,Tissue factor,0.51,1852,2,Rigid-body,1JPS,HL,T


In [4]:
# Test cell
complex_id = benchmark_table.index[4]
paths = get_structure_paths(complex_id, benchmark_table, pp_benchmark_structures)
print('Complex:', complex_id)
print('Structures: \n\t{0}'.format("\n\t".join(paths)))

Complex: 1JPS_HL:T
Structures: 
	/home/semyon/mipt/GPCR-TEAM/pp_benchmark_v5/benchmark5/structures/1JPS_r_u.pdb
	/home/semyon/mipt/GPCR-TEAM/pp_benchmark_v5/benchmark5/structures/1JPS_r_b.pdb
	/home/semyon/mipt/GPCR-TEAM/pp_benchmark_v5/benchmark5/structures/1JPS_l_u.pdb
	/home/semyon/mipt/GPCR-TEAM/pp_benchmark_v5/benchmark5/structures/1JPS_l_b.pdb


In [5]:
from core.refine import *

In [12]:
pp_benchmark_bound_complex = str(
    project.data_path / 'benchmark' / 'modeled_pp5_bound' /
    (benchmark_table.loc[complex_id, 'Complex ID'] + '.pdb')
)
pp_benchmark_unbound_complex = str(
    project.data_path / 'benchmark' / 'modeled_pp5_unbound' /
    (benchmark_table.loc[complex_id, 'Complex ID'] + '.pdb')
)

In [13]:
project.project_path

PosixPath('/home/semyon/mipt/GPCR-TEAM/DiplomaPython')

In [14]:
print('Create protein complex')
omm_structure = app.PDBFile(pp_benchmark_bound_complex)
chains = list(omm_structure.topology.chains())
chains.sort(key=lambda c: len(list(c.residues())))
selections = [f"chain {chain.id}" for chain in chains]
print('Selections:', selections)
complex_bound = ProteinComplex(
    pp_benchmark_bound_complex, 'amber14/protein.ff14SB.xml',
    selections,
    cid='complex_bound'
)

Create protein complex
@> 9579 atoms and 1 coordinate set(s) were parsed in 0.09s.
Selections: ['chain C', 'chain A', 'chain B']


In [15]:
print('Create protein complex')
omm_structure = app.PDBFile(pp_benchmark_unbound_complex)
chains = list(omm_structure.topology.chains())
chains.sort(key=lambda c: len(list(c.residues())))
selections = [f"chain {chain.id}" for chain in chains]
print('Selections:', selections)
complex_unbound = ProteinComplex(
    pp_benchmark_unbound_complex, 'amber14/protein.ff14SB.xml',
    selections,
    cid='complex_unbound'
)

Create protein complex
@> 9289 atoms and 1 coordinate set(s) were parsed in 0.08s.
Selections: ['chain C', 'chain A', 'chain B']


ValueError: No template found for residue 504 (VAL).  The set of atoms matches CVAL, but the bonds are different.

In [48]:
def get_chains(pdy_data):
    pass

In [49]:
pp_benchmark_unbound_complex_original = str(
    project.data_path / 'benchmark' / 'pp5_unbound' /
    (benchmark_table.loc[complex_id, 'Complex ID'] + '.pdb')
)
complex_unbound_pdy = pdy.parsePDB(pp_benchmark_unbound_complex_original)

@> 4708 atoms and 1 coordinate set(s) were parsed in 0.05s.


In [50]:
chids = complex_unbound_pdy.getChids()
print(chids, chids.shape)

['H' 'H' 'H' ... 'B' 'B' 'B'] (4708,)


In [51]:
resnames = complex_unbound_pdy.getResnames()
print(resnames, resnames.shape)

['GLU' 'GLU' 'GLU' ... 'MET' 'MET' 'MET'] (4708,)


In [52]:
resindices = complex_unbound_pdy.getResindices()
print(resindices, resindices.shape)

[  0   0   0 ... 606 606 606] (4708,)


In [53]:
for ch in complex_unbound_pdy.iterChains():
    print(ch)

Chain H
Chain L
Chain B


In [54]:
pp_benchmark_bound_complex_original = str(
    project.data_path / 'benchmark' / 'pp5_bound' /
    (benchmark_table.loc[complex_id, 'Complex ID'] + '.pdb')
)
complex_bound_pdy = pdy.parsePDB(pp_benchmark_bound_complex_original)

@> 4858 atoms and 1 coordinate set(s) were parsed in 0.06s.


In [55]:
chids = complex_bound_pdy.getChids()
print(chids, chids.shape)

['H' 'H' 'H' ... 'T' 'T' 'T'] (4858,)


In [56]:
resnames = complex_bound_pdy.getResnames()
print(resnames, resnames.shape)

['GLU' 'GLU' 'GLU' ... 'GLY' 'GLY' 'GLY'] (4858,)


In [57]:
resindices = complex_bound_pdy.getResindices()
print(resindices, resindices.shape)

[  0   0   0 ... 625 625 625] (4858,)


In [58]:
for ch in complex_bound_pdy.iterChains():
    print(ch)

Chain H
Chain L
Chain T
