In [1]:
from typing import List
from pandas import concat

In [2]:
from pdb_profiling.fetcher.webfetch import UnsyncFetch
from pdb_profiling.utils import DisplayPDB
from pdb_profiling.processers.pdbe.api import *
from pdb_profiling.processers.pdbe.record import *

ProcessPDBe.use_existing = True  # Use Existing Handled PDBe API Results (e.g. tsv format results)
ProcessPDBe.init_logger()  # Init PDBe API Logger
UnsyncFetch.use_existing = True  # Use Existing API Results (e.g. json format results downloaded from web)
UnsyncFetch.init_setting(ProcessPDBe.logger)  # Init WebFetcher's Logger (pass it with PDBe API Logger)
PDBeModelServer.init_logger(logger=ProcessPDBe.logger)  # Init ModelServer API's Logger (pass it with PDBe API Logger)
PDBArchive.init_logger(logger=ProcessPDBe.logger)  # Init PDBArchive API's Logger (pass it with PDBe API Logger)
PDB.set_web_semaphore(30)  # Set WebFetcher's Semaphore
PDB.set_folder('../test/demo')  # Set Folder that store downloaded and handled files

In [3]:
demo_pdbs = ('1a04', '2xya', '3hl5', '4hhr', '5tm3')
demo_pdb_obs:List[PDB] = [PDB(pdb_id) for pdb_id in demo_pdbs]

In [4]:
%time assert all(pdb_ob.status['status_code'] == 'REL' for pdb_ob in demo_pdb_obs), "Unexcepted Cases"

CPU times: user 297 ms, sys: 156 ms, total: 453 ms
Wall time: 14.1 s


In [5]:
%time demo_pdb_obs[0].summary

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 20.3 µs


{'related_structures': [],
 'split_entry': [],
 'title': 'THE STRUCTURE OF THE NITRATE/NITRITE RESPONSE REGULATOR PROTEIN NARL IN THE MONOCLINIC C2 CRYSTAL FORM',
 'release_date': '19980318',
 'experimental_method': ['X-ray diffraction'],
 'experimental_method_class': ['x-ray'],
 'revision_date': '20110713',
 'entry_authors': ['Baikalov, I.',
  'Schroder, I.',
  'Kaczor-Grzeskowiak, M.',
  'Cascio, D.',
  'Gunsalus, R.P.',
  'Dickerson, R.E.'],
 'deposition_site': None,
 'number_of_entities': {'polypeptide': 1,
  'dna': 0,
  'ligand': 0,
  'dna/rna': 0,
  'rna': 0,
  'sugar': 0,
  'water': 1,
  'other': 0,
  'carbohydrate_polymer': 0},
 'processing_site': None,
 'deposition_date': '19971208',
 'assemblies': [{'assembly_id': '1',
   'form': 'homo',
   'preferred': True,
   'name': 'monomer'},
  {'assembly_id': '2', 'form': 'homo', 'preferred': False, 'name': 'monomer'}]}

In [6]:
demo_pdb_obs[0]

<PDB 1a04>

In [7]:
res_dfs = UnsyncFetch.unsync_tasks([pdb_ob.fetch_from_web_api('api/pdb/entry/residue_listing/', PDB.to_dataframe) for pdb_ob in demo_pdb_obs]).result()

100%|██████████| 5/5 [00:07<00:00,  1.55s/it]


In [8]:
concat(res_dfs, sort=False, ignore_index=True)

Unnamed: 0,author_insertion_code,author_residue_number,multiple_conformers,observed_ratio,residue_name,residue_number,chain_id,struct_asym_id,entity_id,pdb_id
0,,298,,0.0,ILE,1,A,A,1,5tm3
1,,299,,0.0,LYS,2,A,A,1,5tm3
2,,300,,0.0,ARG,3,A,A,1,5tm3
3,,301,,0.0,SER,4,A,A,1,5tm3
4,,302,,0.0,LYS,5,A,A,1,5tm3
...,...,...,...,...,...,...,...,...,...,...
2014,,714,,1.0,BNG,1,A,O,6,4hhr
2015,,715,,1.0,P6G,1,A,P,7,4hhr
2016,,716,,1.0,PGE,1,A,Q,8,4hhr
2017,,718,,1.0,PGE,1,A,S,8,4hhr


In [9]:
async def pipe_fetch_assg(pdb_ob: PDB):
    return await pdb_ob.fetch_from_modelServer_api(
        'atoms', 
        data_collection=await pdb_ob.pipe_assg_data_collection(),
        then_func=PDB.to_assg_oper_df)

res_dfs = UnsyncFetch.unsync_tasks([pipe_fetch_assg(pdb_ob) for pdb_ob in demo_pdb_obs]).result()

100%|██████████| 5/5 [00:07<00:00,  1.55s/it]


In [10]:
concat(res_dfs, sort=False, ignore_index=True)

Unnamed: 0,assembly_id,struct_asym_id,oper_expression,model_id,asym_id_rank,symmetry_operation
0,1,A,"[""1""]",1,1,"[""x,y,z""]"
1,1,C,"[""1""]",1,1,"[""x,y,z""]"
2,1,D,"[""1""]",1,1,"[""x,y,z""]"
3,1,G,"[""1""]",1,1,"[""x,y,z""]"
4,2,B,"[""1""]",1,1,"[""x,y,z""]"
5,2,E,"[""1""]",1,1,"[""x,y,z""]"
6,2,F,"[""1""]",1,1,"[""x,y,z""]"
7,2,H,"[""1""]",1,1,"[""x,y,z""]"
8,1,A,"[""1""]",1,1,"[""x,y,z""]"
9,1,C,"[""1""]",1,1,"[""x,y,z""]"


In [11]:
pdb_2a01 = PDB('2a01')

In [12]:
pdb_2a01.fetch_from_PDBArchive('obsolete/mmCIF/', PDB.cif2residue_listing).result()

Unnamed: 0,pdb_id,struct_asym_id,entity_id,residue_number,residue_name,authore_residue_number,chain_id,author_insertion_code
0,2a01,A,1,1,ASP,1,A,
1,2a01,A,1,2,GLU,2,A,
2,2a01,A,1,3,PRO,3,A,
3,2a01,A,1,4,PRO,4,A,
4,2a01,A,1,5,GLN,5,A,
...,...,...,...,...,...,...,...,...
724,2a01,C,1,239,LYS,239,C,
725,2a01,C,1,240,LEU,240,C,
726,2a01,C,1,241,ASN,241,C,
727,2a01,C,1,242,THR,242,C,


In [13]:
pdb_2a01.status



{'title': 'Crystal Structure of Lipid-free Human Apolipoprotein A-I',
 'status_code': 'OBS',
 'since': '20060221',
 'experimental_method_class': 'x-ray',
 'entry_authors': 'Ajees, A.A., Anantharamaiah, G.M., Mishra, V.K., Hussain, M.M., Murthy, K.H.M.',
 'obsoletes': [],
 'superceded_by': [None],
 'experimental_method': 'X-RAY DIFFRACTION'}