# viperdb_test
viperdbからWebAPI経由で各T-numberのPDBIDを取得し、mvsjファイルを作成する
## 注意
事前準備として、SSL証明書を取得しておく必要があります。
1. [viperdb]()にアクセスして、サーバーの証明書・中間証明書・ルート証明書をエクスポート
2. 証明書を連結する
   ```
   cat [サーバの証明書] [中間証明書] [ルート証明書] > full_chain.pem
   ```

上の作業が面倒な場合は、自己責任で以下のコードの`cert=False`に設定してください。

In [None]:
import requests

In [None]:
cert = './full_chain.pem'
BASE_URL = 'http://viperdb.org'

## T-numberの分類を取得

In [10]:
response = requests.get(f"{BASE_URL}/services/tnumber_index.php?serviceName=tnumbers&tnumber=1", verify=cert)
if response.status_code == 200:
    tnumbers = response.json()
    print("T-numbers:", tnumbers)
else:
    print("Failed to fetch T-numbers:", response.status_code)

T-numbers: [{'tnumber': '1'}, {'tnumber': '13'}, {'tnumber': '16'}, {'tnumber': '169'}, {'tnumber': '2'}, {'tnumber': '27'}, {'tnumber': '277'}, {'tnumber': '28d'}, {'tnumber': '3'}, {'tnumber': '4'}, {'tnumber': '43'}, {'tnumber': '7d'}, {'tnumber': '7l'}, {'tnumber': '9'}, {'tnumber': 'NA'}, {'tnumber': 'pT169'}, {'tnumber': 'pT21'}, {'tnumber': 'pT25'}, {'tnumber': 'pT27'}, {'tnumber': 'pT3'}, {'tnumber': 'pT31'}]


## T=1のメンバーを取得

In [6]:
response = requests.get(f"{BASE_URL}/services/tnumber_index.php?serviceName=tnumber_members&tnumber=1", verify=cert)
if response.status_code == 200:
    tnumbers = response.json()
    print("T-numbers:", tnumbers)
else:
    print("Failed to fetch T-numbers:", response.status_code)

T-numbers: [{'entry_id': '2c9g', 'name': 'Adenovirus Type 2 Penton Base Dodecahedron', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '9.30'}, {'entry_id': '2c9f', 'name': 'Adenovirus Type 3 Penton', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '16.50'}, {'entry_id': '4aqq', 'name': 'DODECAHEDRON FORMED OF PENTON BASE PROTEIN FROM ADENOVIRUS A', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '4.75'}, {'entry_id': '4ar2', 'name': 'DODECAHEDRON FORMED OF PENTON BASE PROTEIN FROM ADENOVIRUS A', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.80'}, {'entry_id': '1x9p', 'name': 'Human Adenovirus 2 Penton Base', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.30'}, {'entry_id': '1x9t', 'name': 'Human Adenovirus 2 Penton Base In Complex With An Ad2 N-Terminal Fibre Peptide', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.50'}, {'entry_id': '2c6s', 'name': 'HUMAN ADE

In [7]:
print(type(tnumbers[0]))
print(len(tnumbers))
print(len(tnumbers[0]))

<class 'dict'>
324
5


In [8]:
tnumbers[0].keys()

dict_keys(['entry_id', 'name', 'family', 'genus', 'resolution'])

## ファミリーでソートされたT=1メンバーを取得

In [26]:
response = requests.get(f"{BASE_URL}/services/tnumber_index.php?serviceName=familiesAndtnumbers&tnumber=1", verify=cert)
if response.status_code == 200:
    tnumbers = response.json()
    print("T-numbers:", tnumbers)
else:
    print("Failed to fetch T-numbers:", response.status_code)

T-numbers: {'Adenoviridae': [{'entry_id': '2c9g', 'name': 'Adenovirus Type 2 Penton Base Dodecahedron', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '9.30'}, {'entry_id': '2c9f', 'name': 'Adenovirus Type 3 Penton', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '16.50'}, {'entry_id': '4aqq', 'name': 'DODECAHEDRON FORMED OF PENTON BASE PROTEIN FROM ADENOVIRUS A', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '4.75'}, {'entry_id': '4ar2', 'name': 'DODECAHEDRON FORMED OF PENTON BASE PROTEIN FROM ADENOVIRUS A', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.80'}, {'entry_id': '1x9p', 'name': 'Human Adenovirus 2 Penton Base', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.30'}, {'entry_id': '1x9t', 'name': 'Human Adenovirus 2 Penton Base In Complex With An Ad2 N-Terminal Fibre Peptide', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.50'}, {'entry_id': '2c6s', '

In [28]:
print("T-numbers:", tnumbers)
print(type(tnumbers))
print(len(tnumbers))
print(len(tnumbers['Adenoviridae'][0]))
print(tnumbers['Adenoviridae'][0].keys())
print(tnumbers['Adenoviridae'][0])

T-numbers: {'Adenoviridae': [{'entry_id': '2c9g', 'name': 'Adenovirus Type 2 Penton Base Dodecahedron', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '9.30'}, {'entry_id': '2c9f', 'name': 'Adenovirus Type 3 Penton', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '16.50'}, {'entry_id': '4aqq', 'name': 'DODECAHEDRON FORMED OF PENTON BASE PROTEIN FROM ADENOVIRUS A', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '4.75'}, {'entry_id': '4ar2', 'name': 'DODECAHEDRON FORMED OF PENTON BASE PROTEIN FROM ADENOVIRUS A', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.80'}, {'entry_id': '1x9p', 'name': 'Human Adenovirus 2 Penton Base', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.30'}, {'entry_id': '1x9t', 'name': 'Human Adenovirus 2 Penton Base In Complex With An Ad2 N-Terminal Fibre Peptide', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.50'}, {'entry_id': '2c6s', '

In [13]:
tnumbers['Adenoviridae'][0].keys()

dict_keys(['entry_id', 'name', 'family', 'genus', 'resolution'])

## 全entryの数を数えてみる

In [29]:
response = requests.get(f"{BASE_URL}/services/tnumber_index.php?serviceName=tnumbers&tnumber=1", verify=cert)
if response.status_code == 200:
    tnumbers = response.json()
    print("T-numbers:", tnumbers)
else:
    print("Failed to fetch T-numbers:", response.status_code)

T-numbers: [{'tnumber': '1'}, {'tnumber': '13'}, {'tnumber': '16'}, {'tnumber': '169'}, {'tnumber': '2'}, {'tnumber': '27'}, {'tnumber': '277'}, {'tnumber': '28d'}, {'tnumber': '3'}, {'tnumber': '4'}, {'tnumber': '43'}, {'tnumber': '7d'}, {'tnumber': '7l'}, {'tnumber': '9'}, {'tnumber': 'NA'}, {'tnumber': 'pT169'}, {'tnumber': 'pT21'}, {'tnumber': 'pT25'}, {'tnumber': 'pT27'}, {'tnumber': 'pT3'}, {'tnumber': 'pT31'}]


In [39]:
all_entries = []
for tnumber in tqdm(tnumbers, leave=False):
    print("T-number:", tnumber['tnumber'])
    response = requests.get(f"{BASE_URL}/services/tnumber_index.php?serviceName=tnumber_members&tnumber={tnumber['tnumber']}", verify=cert)
    if response.status_code == 200:
        entries = response.json()
        print("entries:", entries)
        print("num of entries:", len(entries))
        all_entries.append(entries)
    else:
        print("Failed to fetch T-numbers:", response.status_code)

  0%|          | 0/21 [00:00<?, ?it/s]

T-number: 1
entries: [{'entry_id': '2c9g', 'name': 'Adenovirus Type 2 Penton Base Dodecahedron', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '9.30'}, {'entry_id': '2c9f', 'name': 'Adenovirus Type 3 Penton', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '16.50'}, {'entry_id': '4aqq', 'name': 'DODECAHEDRON FORMED OF PENTON BASE PROTEIN FROM ADENOVIRUS A', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '4.75'}, {'entry_id': '4ar2', 'name': 'DODECAHEDRON FORMED OF PENTON BASE PROTEIN FROM ADENOVIRUS A', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.80'}, {'entry_id': '1x9p', 'name': 'Human Adenovirus 2 Penton Base', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.30'}, {'entry_id': '1x9t', 'name': 'Human Adenovirus 2 Penton Base In Complex With An Ad2 N-Terminal Fibre Peptide', 'family': 'Adenoviridae', 'genus': 'Mastadenovirus', 'resolution': '3.50'}, {'entry_id': '2c6s', 'name': 

In [42]:
import numpy as np
print("num of all_entries:", np.sum([len(entries) for entries in all_entries]))

num of all_entries: 1449


## PDBのAPIを使って、chain idを取得したい

### chainの数

In [75]:
import requests
def get_chain_count(pdb_id):
    url = f'https://data.rcsb.org/rest/v1/core/entry/{pdb_id}'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        entities = data.get('rcsb_entry_container_identifiers', {}).get('polymer_entity_ids', [])
        return len(entities)
    else:
        print(f'Error: Unable to fetch data for PDB ID {pdb_id}')
        return None


In [76]:
pdb_id = '1JS9'  # 例として PDB ID '1TUP' を使用
chain_count = get_chain_count(pdb_id)
if chain_count is not None:
    print(f'PDB ID {pdb_id} のチェーン数: {chain_count}')


PDB ID 1JS9 のチェーン数: 1


In [77]:
requests.get('https://data.rcsb.org/rest/v1/core/entry/1JSQ')

<Response [404]>

### chain instanceの数(こちらを採用)

In [78]:
def get_chain_info(pdb_id):
    url = f'https://data.rcsb.org/rest/v1/core/polymer_entity/{pdb_id}/1'
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        # チェーン数の取得
        chain_count = len(data.get('rcsb_polymer_entity_container_identifiers', {}).get('auth_asym_ids', []))
        # 各チェーンのインスタンス数の取得
        chain_instances = data.get('rcsb_polymer_entity_container_identifiers', {}).get('auth_asym_ids', [])
        return chain_count, chain_instances
    else:
        print(f'Error: Unable to fetch data for PDB ID {pdb_id}')
        return None, None


In [140]:
pdb_id = '3jci'  # 例として PDB ID '1TUP' を使用
chain_count, chain_instances = get_chain_info(pdb_id)
if chain_count is not None:
    print(f'PDB ID {pdb_id} のチェーン数: {chain_count}')
    print(f'各チェーンのインスタンス: {chain_instances}')


PDB ID 3jci のチェーン数: 1
各チェーンのインスタンス: ['A']
