In [1]:
import pandas as pd 

In [2]:
import re
import requests

base_url = "https://www.ebi.ac.uk/pdbe/"

api_base = base_url + "api/"

secondary_structure_url = api_base + 'pdb/entry/secondary_structure/'

molecules_url = api_base + '/pdb/entry/molecules/'

In [3]:
def make_request(url, mode, pdb_id):
    """
    This function can make GET and POST requests to
    the PDBe API
    
    :param url: String,
    :param mode: String,
    :param pdb_id: String
    :return: JSON or None
    """
    if mode == "get":
        response = requests.get(url=url+pdb_id)
    elif mode == "post":
        response = requests.post(url, data=pdb_id)

    if response.status_code == 200:
        return response.json()
    else:
        print("[No data retrieved - %s] %s" % (response.status_code, response.text))
    
    return None

In [4]:
def get_secondary_structure_ranges(pdb_id=None, pdb_list=None):
    """
    This function calls the PDBe API and retrieves the residue
    ranges of secondary structural elements in a single PDB entry
    or in a list of PDB entries
    
    :param pdb_id: String,
    :param pdb_list: String
    :return: None
    """
    # If neither a single PDB id, nor a list was provided,
    # exit the function
    if not pdb_id and not pdb_list:
        print("Either provide one PDB id, or a list of ids")
        return None
    
    if pdb_id:
        # If a single PDB id was provided, call the API with GET
        data = make_request(secondary_structure_url, "get", pdb_id)
    else:
        # If multiple PDB ids were provided, call the API with POST
        # The POST API call expects PDB ids as a comma-separated lise
        pdb_list_string = ", ".join(pdb_list)
        data = make_request(secondary_structure_url, "post", pdb_list_string)
        
    # When no data is returned by the API, exit the function
    if not data:
        print("No data available for %s" % (pdb_id) )
        return None
    
    # Loop through all the PDB entries in the retrieved data
    for entry_id in data.keys():
        entry = data[entry_id]
        molecules = entry["molecules"]
        
        # Loop through all the molecules of a given PDB entry
        for i in range(len(molecules)):
            chains = molecules[i]["chains"]          
            # Loop through all the chains of a given molecules
            for j in range(len(chains)):
                secondary_structure = chains[j]["secondary_structure"]
                
                if "helices" in secondary_structure.keys():
                    helix_list = []
                    helices = secondary_structure["helices"]
                    for k in range(len(helices)):
                        start = helices[k]["start"]["residue_number"]
                        end = helices[k]["end"]["residue_number"]
                        helix_list.append("%d-%d" % (start, end))
                else:
                    helix_list = []
                
                if "strands" in secondary_structure.keys():
                    strand_list = []
                    strands = secondary_structure["strands"]
                    for l in range(len(strands)):
                        start = strands[l]["start"]["residue_number"]
                        end = strands[l]["end"]["residue_number"]
                        strand_list.append("%d-%d" % (start, end))
                else:
                    strand_list = []
                                    
                report = "%s chain %s has " % (entry_id, chains[j]["chain_id"])
                if len(helix_list) > 0:
                    report += "helices at residue ranges %s " % str(helix_list)
                else:
                    report += "no helices "
                report += "and "
                if len(strand_list) > 0:
                    report += "strands at %s" % str(strand_list)
                else:
                    report += "no strands"
                print(report)
    return None

In [9]:
get_secondary_structure_ranges(pdb_id='7ns1')

7ns1 chain A has helices at residue ranges ['3-29'] and no strands


In [9]:
%store -r rem ids

In [11]:
for i in range(len(ids)):
    get_secondary_structure_ranges(pdb_id=ids[i])

1lyp chain A has helices at residue ranges ['1-32'] and no strands
1myn chain A has helices at residue ranges ['15-27'] and strands at ['2-3', '30-34', '38-42']
1s6w chain A has no helices and strands at ['3-5', '16-18']
2l3i chain A has helices at residue ranges ['11-27'] and no strands
2lt8 chain A has helices at residue ranges ['7-19'] and strands at ['24-28', '37-41']
2mbd chain A has helices at residue ranges ['3-15', '20-26'] and no strands
2mhw chain A has helices at residue ranges ['7-16', '17-25'] and no strands
2mwt chain A has helices at residue ranges ['2-22'] and no strands
5j6v chain A has helices at residue ranges ['5-19'] and no strands
5z1y chain A has helices at residue ranges ['7-12', '13-19'] and no strands
[No data retrieved - 404] {}
No data available
6ry9 chain A has helices at residue ranges ['11-16'] and no strands
[No data retrieved - 404] {}
No data available
1dum chain A has helices at residue ranges ['8-21'] and no strands
1dum chain B has helices at residu

In [75]:
get_secondary_structure_ranges(pdb_list=['1ib9','1ha9'])

1ib9 chain A has helices at residue ranges ['17-21'] and strands at ['26-27', '33-34']
1ha9 chain A has helices at residue ranges ['17-21'] and strands at ['26-27', '33-34']


In [16]:
jpred_oneid=pd.read_csv('./pdb/jpred/jpred_oneid.tsv', sep='\t', names=['pdb_id','sequence','secondary_structure', '0'])
jpred_oneid.drop(columns='0', inplace=True)

In [17]:
jpred_oneid

Unnamed: 0,pdb_id,sequence,secondary_structure
0,1avf,AVVKVPLKKFKSIRETMKEKGLLGEF,CCEEEECCCCHHHHHHHHHHHHHHHC
1,1bh4,CGESCVWIPCISAALGCSCKNKVCYRNGIP,CCCCEEEEHHHHHHHCCCCCCCCCCCCCCC
2,1bk8,LCNERPSQTWSGNCGNTAHCDKQCQDWEKASHGACHKRENHWKCFC...,CCCCCCCCCEEEEECCCCCCCHHHHHHHCCCCCEEEECCCCCEEEE...
3,1bnb,APLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW,CCCCCCCCCCEEEEEECCCCCHHHHHCCCCCCCCCCCC
4,1cix,YSRCQLQGFNCVVRSYGLPTIPCCRGLTCRSYFPGSTYGRCQRY,CCCCEECCCCEEECCCCCCCCCCCCCCCCCCCCCCCCCCEECCC
...,...,...,...
256,7c2p,RTCESQSHKFKGPCLRASNCANVCKTEGFHGGKCRGFRRRCFCTKHC,CCCCCCCCCEEEEEECCCCCHHHHCCCCCCCCEEECCCCCEECCCCC
257,7c31,RVCESQSHKFEGACMGDHNCALVCRNEGFSGGKCKGLRRRCFCTKLC,CCCCCCCCCCEEEEECCCCCHHHHCCCCCCCCEEECCCCEEEECCCC
258,7k7x,GLPVCGETCVGGTCNTPGCVCSWPVCTRN,CCCCCCCCEECCCCCCCCCEEEEEEECCC
259,7kpd,AIPCGESCVYIPCISVVIGCSCRNKVCYR,CCCCCCCEEEEEEHHHHHCCCCCCCECCC


In [18]:
id_list1=jpred_oneid.pdb_id.tolist()

In [19]:
for i in range(len(id_list1)):
    get_secondary_structure_ranges(pdb_id=id_list1[i])

1avf chain P has helices at residue ranges ['12-21'] and strands at ['3-9']
1avf chain Q has helices at residue ranges ['12-20'] and strands at ['3-9']
1avf chain A has helices at residue ranges ['47-52', '57-61', '109-115', '125-129', '129-133', '135-143', '229-237', '253-257', '272-276', '305-310'] and strands at ['14-20', '25-32', '38-41', '65-73', '80-91', '94-104', '119-122', '150-155', '164-168', '176-187', '192-195', '212-216', '223-226', '288-291', '301-304', '312-317', '322-328', '197-200', '203-204', '260-264', '267-271', '240-241', '247-249', '277-279', '284-286']
1avf chain J has helices at residue ranges ['47-52', '57-61', '109-115', '125-129', '129-133', '135-143', '172-174', '229-238', '250-257', '272-276', '305-310'] and strands at ['14-20', '25-32', '38-41', '65-72', '81-91', '94-104', '119-122', '150-155', '164-168', '176-187', '192-195', '212-216', '223-226', '288-291', '301-304', '312-317', '322-328', '197-200', '203-204', '260-264', '267-271', '240-241', '247-249',

In [8]:
multid_met=pd.read_csv('./Data/mult_id_method.csv')

In [16]:
multid_met_id=multid_met['pdb_id'].tolist()

In [17]:
for i in range(len(multid_met_id)):
    get_secondary_structure_ranges(pdb_id=multid_met_id[i])

1f0d chain A has helices at residue ranges ['8-18'] and no strands
1d9j chain A has helices at residue ranges ['2-8', '15-19'] and no strands
1f0g chain A has helices at residue ranges ['4-9'] and no strands
1d9p chain A has helices at residue ranges ['3-9', '10-19'] and no strands
1f0h chain A has helices at residue ranges ['9-20'] and no strands
1d9o chain A has helices at residue ranges ['11-20'] and no strands
1fqq chain A has helices at residue ranges ['6-11'] and strands at ['36-39', '14-16', '25-28']
1fd4 chain A has helices at residue ranges ['4-11'] and strands at ['25-29', '35-39', '2-3', '14-17']
1fd4 chain B has helices at residue ranges ['4-11'] and strands at ['36-39', '14-16', '25-28']
1fd4 chain C has helices at residue ranges ['4-11'] and strands at ['36-39', '14-16', '25-28']
1fd4 chain D has helices at residue ranges ['4-11'] and strands at ['36-39', '14-16', '25-29']
1fd4 chain E has helices at residue ranges ['4-11'] and strands at ['25-29', '36-39', '2-3', '14-16'