In [3]:
import pandas as pd 

In [4]:
import re
import requests

base_url = "https://www.ebi.ac.uk/pdbe/"

api_base = base_url + "api/"

secondary_structure_url = api_base + 'pdb/entry/secondary_structure/'

molecules_url = api_base + '/pdb/entry/molecules/'

In [5]:
def make_request(url, mode, pdb_id):
    """
    This function can make GET and POST requests to
    the PDBe API
    
    :param url: String,
    :param mode: String,
    :param pdb_id: String
    :return: JSON or None
    """
    if mode == "get":
        response = requests.get(url=url+pdb_id)
    elif mode == "post":
        response = requests.post(url, data=pdb_id)

    if response.status_code == 200:
        return response.json()
    else:
        print("[No data retrieved - %s] %s" % (response.status_code, response.text))
    
    return None

In [6]:
def get_secondary_structure_ranges(pdb_id=None, pdb_list=None):
    """
    This function calls the PDBe API and retrieves the residue
    ranges of secondary structural elements in a single PDB entry
    or in a list of PDB entries
    
    :param pdb_id: String,
    :param pdb_list: String
    :return: None
    """
    # If neither a single PDB id, nor a list was provided,
    # exit the function
    if not pdb_id and not pdb_list:
        print("Either provide one PDB id, or a list of ids")
        return None
    
    if pdb_id:
        # If a single PDB id was provided, call the API with GET
        data = make_request(secondary_structure_url, "get", pdb_id)
    else:
        # If multiple PDB ids were provided, call the API with POST
        # The POST API call expects PDB ids as a comma-separated lise
        pdb_list_string = ", ".join(pdb_list)
        data = make_request(secondary_structure_url, "post", pdb_list_string)
        
    # When no data is returned by the API, exit the function
    if not data:
        print("No data available")
        return None
    
    # Loop through all the PDB entries in the retrieved data
    for entry_id in data.keys():
        entry = data[entry_id]
        molecules = entry["molecules"]
        
        # Loop through all the molecules of a given PDB entry
        for i in range(len(molecules)):
            chains = molecules[i]["chains"]          
            # Loop through all the chains of a given molecules
            for j in range(len(chains)):
                secondary_structure = chains[j]["secondary_structure"]
                
                if "helices" in secondary_structure.keys():
                    helix_list = []
                    helices = secondary_structure["helices"]
                    for k in range(len(helices)):
                        start = helices[k]["start"]["residue_number"]
                        end = helices[k]["end"]["residue_number"]
                        helix_list.append("%d-%d" % (start, end))
                else:
                    helix_list = []
                
                if "strands" in secondary_structure.keys():
                    strand_list = []
                    strands = secondary_structure["strands"]
                    for l in range(len(strands)):
                        start = strands[l]["start"]["residue_number"]
                        end = strands[l]["end"]["residue_number"]
                        strand_list.append("%d-%d" % (start, end))
                else:
                    strand_list = []
                                    
                report = "%s chain %s has " % (entry_id, chains[j]["chain_id"])
                if len(helix_list) > 0:
                    report += "helices at residue ranges %s " % str(helix_list)
                else:
                    report += "no helices "
                report += "and "
                if len(strand_list) > 0:
                    report += "strands at %s" % str(strand_list)
                else:
                    report += "no strands"
                print(report)
    return None

In [9]:
get_secondary_structure_ranges(pdb_id='1myn')

1myn chain A has helices at residue ranges ['15-27'] and strands at ['2-3', '30-34', '38-42']


In [10]:
%store -r rem ids

In [11]:
for i in range(len(ids)):
    get_secondary_structure_ranges(pdb_id=ids[i])

1lyp chain A has helices at residue ranges ['1-32'] and no strands
1myn chain A has helices at residue ranges ['15-27'] and strands at ['2-3', '30-34', '38-42']
1s6w chain A has no helices and strands at ['3-5', '16-18']
2l3i chain A has helices at residue ranges ['11-27'] and no strands
2lt8 chain A has helices at residue ranges ['7-19'] and strands at ['24-28', '37-41']
2mbd chain A has helices at residue ranges ['3-15', '20-26'] and no strands
2mhw chain A has helices at residue ranges ['7-16', '17-25'] and no strands
2mwt chain A has helices at residue ranges ['2-22'] and no strands
5j6v chain A has helices at residue ranges ['5-19'] and no strands
5z1y chain A has helices at residue ranges ['7-12', '13-19'] and no strands
[No data retrieved - 404] {}
No data available
6ry9 chain A has helices at residue ranges ['11-16'] and no strands
[No data retrieved - 404] {}
No data available
1dum chain A has helices at residue ranges ['8-21'] and no strands
1dum chain B has helices at residu

In [75]:
get_secondary_structure_ranges(pdb_list=['1ib9','1ha9'])

1ib9 chain A has helices at residue ranges ['17-21'] and strands at ['26-27', '33-34']
1ha9 chain A has helices at residue ranges ['17-21'] and strands at ['26-27', '33-34']
