In [3]:
from Bio.PDB.PDBParser import PDBParser
parser = PDBParser(PERMISSIVE=1)
import warnings
from Bio.PDB.PDBExceptions import PDBConstructionWarning
warnings.simplefilter('ignore', PDBConstructionWarning)

In [4]:
def get_characteristics_of_structure(name):
    name = name.lower()
    structure = parser.get_structure(name, name+'.pdb')

    water_counter = 0 
    ligands = set()
    chains_counter = 0
    residues_dict = {}
    for model in structure:
        for chain in model:
            residues_counter = 0
            chains_counter += 1
            for residue in chain:
                hetero_flag = residue.get_id()[0]
                if hetero_flag == 'W':
                    water_counter += 1
                if hetero_flag != ' ' and hetero_flag != 'W': # heteroatom (ligand)
                    ligands.add(residue.id[0])
                residues_counter += 1
#                 for atom in residue:
#                     print(atom)
            residues_dict[chain.get_id()] = residues_counter

    print(f'--------- {name.upper()} ---------')
    print(f'The total number of chains is {chains_counter}')        
    print('The number of residues per chain is:')
    for chain_id, res in residues_dict.items():
        print(f'Chain {chain_id} contains {res} residues.')
    print(f'Number of water molecules is {water_counter}')
    print(f'The ligands that present in the structure are {ligands}')
    
    return residues_dict, water_counter, ligands

In [5]:
struc_7neh = get_characteristics_of_structure(name='7neh')

--------- 7NEH ---------
The total number of chains is 4
The number of residues per chain is:
Chain H contains 468 residues.
Chain L contains 387 residues.
Chain E contains 296 residues.
Chain A contains 3 residues.
Number of water molecules is 496
The ligands that present in the structure are {'H_FUC', 'H_NAG', 'H_EDO', 'H_ CL', 'H_SO4', 'H_NO3', 'H_PEG'}


In [6]:
struc_7neg = get_characteristics_of_structure(name='7neg')

--------- 7NEG ---------
The total number of chains is 4
The number of residues per chain is:
Chain H contains 285 residues.
Chain L contains 258 residues.
Chain E contains 213 residues.
Chain A contains 3 residues.
Number of water molecules is 134
The ligands that present in the structure are {'H_FUC', 'H_NAG', 'H_GOL', 'H_SO4'}


### Determine the R.M.S.D. between receptor binding domain os SARS-COV-2 Spike glycoprotein complex and its mutant

In [135]:
def get_RBD_atoms_of_structure(name, start_res_name, start_res_code, stop_res_name, stop_res_code):
    name = name.lower()
    structure = parser.get_structure(name, name+'.pdb')
    rbd_flag = False
    receptor_binding_domain = []

    for model in structure:
        for chain in model:
            counter = 0
            if chain.id == 'E': # Spike Glycoprotein
                for residue in chain:
                    name = residue.get_resname() 
                    code = int(str(residue).split('=')[2].split(' ')[0])
#                     print(chain)
#                     print(name)
#                     print(code)
                        
                    if name == start_res_name and code == start_res_code: # start of the receptor binding domain
                        print('hello')
                        rbd_flag = True                         
                    if name == stop_res_name and code == stop_res_code: # end of the receptor binding domain
                        for atom in residue:
                            receptor_binding_domain.append((name, code, atom.get_name(), atom.get_coord()))
                        rbd_flag = False
                        
                    if rbd_flag:
                        
                        for atom in residue:
                            receptor_binding_domain.append((name, code, atom.get_name(), atom.get_coord()))
    return receptor_binding_domain

In [142]:
rbd_7neh = get_RBD_atoms_of_structure(name='7neh', start_res_name='THR', start_res_code=333, stop_res_name='GLY', stop_res_code=526)
print(len(rbd_7neh))

hello
1536


In [141]:
rbd_7neg = get_RBD_atoms_of_structure(name='7neg', start_res_name='ASN', start_res_code=334, stop_res_name='GLU', stop_res_code=516)
print(len(rbd_7neg))

hello
1466


In [148]:
def get_residues(rbd_list):
    residues = []
    previous_residue = 'LLL'
    for item in rbd_list:
        residue = item[0]
        if residue != previous_residue:
            residues.append(residue)
        previous_residue = residue
    return residues

In [165]:
def valid_residues(rbd_list1, rbd_list2):
    residues1 = get_residues(rbd_list1)
    residues1 = residues1[1:]
    residues1 = residues1[:-9]
    residues2 = get_residues(rbd_list2)    
    
    for (res1, res2) in zip(residues1, residues2):
        if res1 != res2:
            print(res1)
            print(res2)
            print('-------------')

In [166]:
valid_residues(rbd_7neh, rbd_7neg)

ASN
TYR
-------------


In [106]:
def get_atoms_of_structure(name):
    name = name.lower()
    structure = parser.get_structure(name, name+'.pdb')

    receptor_binding_domain = []

    for model in structure:
        for chain in model:
            for residue in chain:
                name = residue.get_resname() 
                code = int(str(residue).split('=')[2].split(' ')[0])

                for atom in residue:
                    receptor_binding_domain.append((name, code, atom.get_name(), atom.get_coord()))
                    
    return receptor_binding_domain

In [109]:
atoms_7neh = get_atoms_of_structure(name='7neh')
print(len(atoms_7neh))

5455


In [110]:
atoms_7neg = get_atoms_of_structure(name='7neg')
print(len(atoms_7neg))

4932
