# PDB Data APIを試す
- 参考: https://qiita.com/TSU_mi/items/c105c37e9c58977b2a1e 

## PDBのAPIを使って、chain idを取得したい

### chainの数

In [8]:
import requests
def get_chain_count(pdb_id):
    url = f'https://data.rcsb.org/rest/v1/core/entry/{pdb_id}'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        entities = data.get('rcsb_entry_container_identifiers', {}).get('polymer_entity_ids', [])
        return len(entities), entities
    else:
        print(f'Error: Unable to fetch data for PDB ID {pdb_id}')
        return None


In [9]:
pdb_id = '6l62'  # 例として PDB ID '1TUP' を使用
chain_count, entries = get_chain_count(pdb_id)
if chain_count is not None:
    print(f'PDB ID {pdb_id} のチェーン数: {chain_count}')
    print(f'PDB ID {pdb_id} のエントリ: {entries}')


PDB ID 6l62 のチェーン数: 3
PDB ID 6l62 のエントリ: ['1', '2', '3']


### chain instanceの数(こちらを採用)

In [12]:
def get_chain_info(pdb_id):
    url = f'https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/1'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        # チェーン数の取得
        chain_count = len(data.get('rcsb_polymer_entity_container_identifiers', {}).get('auth_asym_ids', []))
        # 各チェーンのインスタンス数の取得
        chain_instances = data.get('rcsb_polymer_entity_container_identifiers', {}).get('auth_asym_ids', [])
        return chain_count, chain_instances
    else:
        print(f'Error: Unable to fetch data for PDB ID {pdb_id}')
        return None, None


In [13]:
pdb_id = '6l62'  # 例として PDB ID '1TUP' を使用
chain_count, chain_instances = get_chain_info(pdb_id)
if chain_count is not None:
    print(f'PDB ID {pdb_id} のチェーン数: {chain_count}')
    print(f'各チェーンのインスタンス: {chain_instances}')


PDB ID 6l62 のチェーン数: 1
各チェーンのインスタンス: ['L']


### 他のカテゴリでも試してみる

In [14]:
def get_chain_info(pdb_id):
    url = f'https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/1'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        # チェーン数の取得
        chain_count = len(data.get('rcsb_polymer_entity_container_identifiers', {}).get('asym_ids', []))
        # 各チェーンのインスタンス数の取得
        chain_instances = data.get('rcsb_polymer_entity_container_identifiers', {}).get('asym_ids', [])
        return chain_count, chain_instances
    else:
        print(f'Error: Unable to fetch data for PDB ID {pdb_id}')
        return None, None
    
pdb_id = '6l62'  # 例として PDB ID '1TUP' を使用
chain_count, chain_instances = get_chain_info(pdb_id)
if chain_count is not None:
    print(f'PDB ID {pdb_id} のチェーン数: {chain_count}')
    print(f'各チェーンのインスタンス: {chain_instances}')


PDB ID 6l62 のチェーン数: 1
各チェーンのインスタンス: ['A']


In [15]:
# save json file
import json
pdb_id = '6l62'
with open('6l62.json', 'w') as f:
    json.dump(requests.get(f'https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/1').json(), f, indent=4)


In [16]:
get_chain_info('1stm')

(5, ['A', 'B', 'C', 'D', 'E'])

In [17]:
def get_chain_info(pdb_id):
    url = f'https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/1'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        # チェーン数の取得
        chain_count = len(data.get('rcsb_polymer_entity_container_identifiers', {}).get('label_asym_ids', []))
        # 各チェーンのインスタンス数の取得
        chain_instances = data.get('rcsb_polymer_entity_container_identifiers', {}).get('label_asym_ids', [])
        return chain_count, chain_instances
    else:
        print(f'Error: Unable to fetch data for PDB ID {pdb_id}')
        return None, None
    
pdb_id = '6l62'  # 例として PDB ID '1TUP' を使用
chain_count, chain_instances = get_chain_info(pdb_id)
if chain_count is not None:
    print(f'PDB ID {pdb_id} のチェーン数: {chain_count}')
    print(f'各チェーンのインスタンス: {chain_instances}')


PDB ID 6l62 のチェーン数: 0
各チェーンのインスタンス: []


In [18]:
def get_chain_info(pdb_id):
    url = f'https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/1'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        # チェーン数の取得
        chain_count = len(data.get('rcsb_polymer_entity_container_identifiers.auth_asym_ids', []))
        # 各チェーンのインスタンス数の取得
        chain_instances = data.get('entity_src_gen', {}).get('pdbx_beg_seq_num', [])
        return chain_count, chain_instances
    else:
        print(f'Error: Unable to fetch data for PDB ID {pdb_id}')
        return None, None
    
pdb_id = '6l62'  # 例として PDB ID '1TUP' を使用
chain_count, chain_instances = get_chain_info(pdb_id)
if chain_count is not None:
    print(f'PDB ID {pdb_id} のチェーン数: {chain_count}')
    print(f'各チェーンのインスタンス: {chain_instances}')


PDB ID 6l62 のチェーン数: 0
各チェーンのインスタンス: []


In [24]:
def get_chain_info(pdb_id):
    url = f'https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/1'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        # チェーン数の取得
        chain_count = len(data.get('rcsb_polymer_entity_container_identifiers', {}).get('auth_asym_ids', []))
        # 各チェーンのインスタンス数の取得
        chain_instances = data.get('rcsb_polymer_entity_container_identifiers', {}).get('auth_asym_ids', [])
        return chain_count, chain_instances
    else:
        print(f'Error: Unable to fetch data for PDB ID {pdb_id}')
        return None, None

pdb_id = '1stm'  # 例として PDB ID '1TUP' を使用
chain_count, chain_instances = get_chain_info(pdb_id)
if chain_count is not None:
    print(f'PDB ID {pdb_id} のチェーン数: {chain_count}')
    print(f'各チェーンのインスタンス: {chain_instances}')

PDB ID 1stm のチェーン数: 5
各チェーンのインスタンス: ['A', 'B', 'C', 'D', 'E']


## query

In [3]:
import requests
import os
import json

In [6]:
query = {
  "query": {
    "type": "terminal",
    "service": "text",
    "parameters": {
      "attribute": "exptl.method",
      "operator": "exact_match",
      "value": "ELECTRON MICROSCOPY"
    }
  },
  "request_options": {
    "results_content_type": [
      "experimental"
    ],
    "paginate": {
      "start": 0,
      "rows": 25
    }
  },
  "return_type": "entry"
}

In [None]:
# Serch API の URL
base_url="https://search.rcsb.org/rcsbsearch/v2/query"

# レスポンスの取得
response = requests.post(base_url, json=query) #getではなくpostを使用
print(response.status_code) # ステータスコードの確認
data = response.json() # 結果をjson形式で取得
print(data) # 結果の確認

# PDBIDを格納するリスト
pdb_ids = []

# dataの内、PDBIDだけを取得
for entry in data['result_set']:
    pdb_ids.append(entry['identifier'])

print(data['total_count']) #検索結果数
print(len(pdb_ids)) #取得されたPDBIDの数
print(pdb_ids) #取得されたPDBID

200
{'query_id': 'a997e504-3c4a-4ef4-aac0-0dcd83bbf3b6', 'result_type': 'entry', 'total_count': 23103, 'result_set': [{'identifier': '1C2W', 'score': 1.0}, {'identifier': '1C2X', 'score': 1.0}, {'identifier': '1D3E', 'score': 1.0}, {'identifier': '1D3I', 'score': 1.0}, {'identifier': '1DGI', 'score': 1.0}, {'identifier': '1DYL', 'score': 1.0}, {'identifier': '1EG0', 'score': 1.0}, {'identifier': '1FCW', 'score': 1.0}, {'identifier': '1FOQ', 'score': 1.0}, {'identifier': '1GR5', 'score': 1.0}, {'identifier': '1GRU', 'score': 1.0}, {'identifier': '1GW7', 'score': 1.0}, {'identifier': '1GW8', 'score': 1.0}, {'identifier': '1HB5', 'score': 1.0}, {'identifier': '1HB7', 'score': 1.0}, {'identifier': '1HB9', 'score': 1.0}, {'identifier': '1IA0', 'score': 1.0}, {'identifier': '1IF0', 'score': 1.0}, {'identifier': '1JEW', 'score': 1.0}, {'identifier': '1JQM', 'score': 1.0}, {'identifier': '1JQS', 'score': 1.0}, {'identifier': '1JQT', 'score': 1.0}, {'identifier': '1K4R', 'score': 1.0}, {'identi