## Install

```bash
pip install pdb-profiling
```

## Import Modules

In [1]:
from pdb_profiling.fetcher.webfetch import UnsyncFetch
from pdb_profiling.utils import DisplayPDB
from pdb_profiling.processers.pdbe.api import *
from pdb_profiling.processers.pdbe.record import *

In [2]:
# Other packages
from pprint import pprint
from unsync import unsync

## Basic Setting

In [3]:
# Use Existing Handled PDBe API Results (e.g. tsv format results)
ProcessPDBe.use_existing = True
# Init PDBe API Logger
ProcessPDBe.init_logger()
# Use Existing API Results (e.g. json format results downloaded from web)
UnsyncFetch.use_existing = True
# Init WebFetcher's Logger (pass it with PDBe API Logger)
UnsyncFetch.init_setting(ProcessPDBe.logger)
# Set WebFetcher's Semaphore
PDB.set_web_semaphore(30)
# Set Folder that store downloaded and handled files
PDB.set_folder('../test/')
# Init ModelServer API's Logger (pass it with PDBe API Logger)
PDBeModelServer.init_logger(logger=ProcessPDBe.logger)

In [4]:
# Availiable api(s) that can be passed to `PDB.fetch_from_web_api`
API_SET

{'api/mappings/all_isoforms/',
 'api/pdb/entry/assembly/',
 'api/pdb/entry/binding_sites/',
 'api/pdb/entry/carbohydrate_polymer/',
 'api/pdb/entry/cofactor/',
 'api/pdb/entry/drugbank/',
 'api/pdb/entry/electron_density_statistics/',
 'api/pdb/entry/experiment/',
 'api/pdb/entry/files/',
 'api/pdb/entry/ligand_monomers/',
 'api/pdb/entry/modified_AA_or_NA/',
 'api/pdb/entry/molecules/',
 'api/pdb/entry/mutated_AA_or_NA/',
 'api/pdb/entry/observed_residues_ratio/',
 'api/pdb/entry/polymer_coverage/',
 'api/pdb/entry/related_experiment_data/',
 'api/pdb/entry/residue_listing/',
 'api/pdb/entry/secondary_structure/',
 'api/pdb/entry/status/',
 'api/pdb/entry/summary/',
 'api/pisa/interfacedetail/',
 'api/pisa/interfacelist/',
 'swissmodel/repository/uniprot/'}

## Demo Usage

In [5]:
pdb_demo_existing = PDB('1m4x')
pdb_demo_new = PDB('1m11')

### Fetch Data From PDBe API

#### Properties (Implicit implementation)


In [6]:
%time pprint(pdb_demo_existing.status)
%time pprint(pdb_demo_existing.summary)

%time pprint(pdb_demo_new.status)
%time pprint(pdb_demo_new.summary)

{'entry_authors': 'Nandhagopal, N., Simpson, A.A., Gurnon, J.R., Yan, X., '
                  'Baker, T.S., Graves, M.V., Van Etten, J.L., Rossmann, M.G.',
 'experimental_method': 'ELECTRON MICROSCOPY',
 'experimental_method_class': 'em',
 'obsoletes': [],
 'since': '20021204',
 'status_code': 'REL',
 'superceded_by': [],
 'title': 'PBCV-1 virus capsid, quasi-atomic model'}
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 30.5 ms
{'assemblies': [{'assembly_id': '1',
                 'form': 'homo',
                 'name': '5040-mer',
                 'preferred': True},
                {'assembly_id': '2',
                 'form': 'homo',
                 'name': '84-mer',
                 'preferred': False},
                {'assembly_id': '3',
                 'form': 'homo',
                 'name': '420-mer',
                 'preferred': False},
                {'assembly_id': '4',
                 'form': 'homo',
                 'name': '504-mer',
                 'pref

#### Explicit implementation

In [7]:
%time pdb_demo_existing.fetch_from_web_api('api/pdb/entry/residue_listing/', PDB.to_dataframe).result()

CPU times: user 0 ns, sys: 15.6 ms, total: 15.6 ms
Wall time: 33.2 ms


Unnamed: 0,author_insertion_code,author_residue_number,multiple_conformers,observed_ratio,residue_name,residue_number,chain_id,struct_asym_id,entity_id,pdb_id
0,,225,,1,GLU,201,A,A,1,1m4x
1,,226,,1,TYR,202,A,A,1,1m4x
2,,227,,1,LEU,203,A,A,1,1m4x
3,,228,,1,ILE,204,A,A,1,1m4x
4,,229,,1,GLU,205,A,A,1,1m4x
...,...,...,...,...,...,...,...,...,...,...
1234,,164,,1,PRO,140,B,B,1,1m4x
1235,,165,,1,LEU,141,B,B,1,1m4x
1236,,166,,1,ILE,142,B,B,1,1m4x
1237,,167,,1,ALA,143,B,B,1,1m4x


In [8]:
%time pdb_demo_new.fetch_from_web_api('api/pdb/entry/residue_listing/', PDB.to_dataframe).result()

CPU times: user 78.1 ms, sys: 46.9 ms, total: 125 ms
Wall time: 2.6 s


Unnamed: 0,author_insertion_code,author_residue_number,multiple_conformers,observed_ratio,residue_name,residue_number,chain_id,struct_asym_id,entity_id,pdb_id
0,,192,,0.142,PRO,192,R,A,1,1m11
1,,193,,0.200,ALA,193,R,A,1,1m11
2,,194,,0.142,PRO,194,R,A,1,1m11
3,,195,,0.142,PRO,195,R,A,1,1m11
4,,196,,0.111,GLN,196,R,A,1,1m11
...,...,...,...,...,...,...,...,...,...,...
1008,,234,,0.142,THR,234,3,D,4,1m11
1009,,235,,0.200,ALA,235,3,D,4,1m11
1010,,236,,0.125,LEU,236,3,D,4,1m11
1011,,237,,0.125,LEU,237,3,D,4,1m11


In [9]:
%time pdb_demo_existing.set_assembly().result()
pprint(pdb_demo_existing.assembly)

%time pdb_demo_new.set_assembly().result()
pprint(pdb_demo_new.assembly)

CPU times: user 31.2 ms, sys: 15.6 ms, total: 46.9 ms
Wall time: 48.9 ms
{0: <PDBAssemble 1m4x/0>,
 1: <PDBAssemble 1m4x/1>,
 2: <PDBAssemble 1m4x/2>,
 3: <PDBAssemble 1m4x/3>,
 4: <PDBAssemble 1m4x/4>,
 5: <PDBAssemble 1m4x/5>,
 6: <PDBAssemble 1m4x/6>,
 7: <PDBAssemble 1m4x/7>}
CPU times: user 46.9 ms, sys: 15.6 ms, total: 62.5 ms
Wall time: 2.51 s
{0: <PDBAssemble 1m11/0>,
 1: <PDBAssemble 1m11/1>,
 2: <PDBAssemble 1m11/2>,
 3: <PDBAssemble 1m11/3>,
 4: <PDBAssemble 1m11/4>,
 5: <PDBAssemble 1m11/5>}


In [10]:
%time pdb_demo_existing.fetch_from_web_api('api/pdb/entry/assembly/', PDB.to_dataframe).result()

CPU times: user 15.6 ms, sys: 15.6 ms, total: 31.2 ms
Wall time: 15.8 ms


Unnamed: 0,entity_id,in_chains,molecule_name,molecule_type,number_of_copies,polymeric_count,assembly_composition,molecular_weight,details,assembly_id,pdb_id
0,1,"[""A"",""AA"",""AAA"",""AAAA"",""AAAB"",""AAAC"",""AAAD"",""A...","[""Major capsid protein""]",polypeptide(L),5040,5040,protein structure,230503.86,complete icosahedral assembly,1,1m4x
1,1,"[""A"",""AA"",""AAA"",""AAB"",""AAC"",""AAD"",""AAE"",""AAF"",...","[""Major capsid protein""]",polypeptide(L),420,420,protein structure,19208.654,icosahedral pentamer,3,1m4x
2,1,"[""A"",""AA"",""AAA"",""AB"",""AC"",""AD"",""AE"",""AF"",""AG"",...","[""Major capsid protein""]",polypeptide(L),84,84,protein structure,3841.731,icosahedral asymmetric unit,2,1m4x
3,1,"[""A"",""AA"",""AAA"",""AAB"",""AAC"",""AB"",""AC"",""AD"",""AE...","[""Major capsid protein""]",polypeptide(L),90,90,protein structure,4116.14,pentasymmetron capsid unit,5,1m4x
4,1,"[""A"",""AA"",""AAA"",""AAB"",""AAC"",""AAD"",""AAE"",""AAF"",...","[""Major capsid protein""]",polypeptide(L),504,504,protein structure,23050.387,icosahedral 23 hexamer,4,1m4x
5,1,"[""A"",""AA"",""AAA"",""AB"",""AC"",""AD"",""AE"",""AF"",""AG"",...","[""Major capsid protein""]",polypeptide(L),84,84,protein structure,3841.731,"icosahedral asymmetric unit, std point frame",7,1m4x
6,1,"[""A"",""AA"",""AAA"",""AAB"",""AAC"",""AAD"",""AAE"",""AAF"",...","[""Major capsid protein""]",polypeptide(L),198,198,protein structure,9055.508,trisymmetron capsid unit,6,1m4x


In [11]:
%time pdb_demo_new.fetch_from_web_api('api/pdb/entry/assembly/', PDB.to_dataframe).result()

CPU times: user 31.2 ms, sys: 0 ns, total: 31.2 ms
Wall time: 24.2 ms


Unnamed: 0,entity_id,in_chains,molecule_name,molecule_type,number_of_copies,polymeric_count,assembly_composition,molecular_weight,details,assembly_id,pdb_id
0,1,"[""A"",""AA"",""AAA"",""AAB"",""AAC"",""AAD"",""AAE"",""AAF"",...","[""Complement decay-accelerating factor""]",polypeptide(L),60,240,protein/protein complex,6804.412,complete icosahedral assembly,1,1m11
1,2,"[""B"",""BA"",""BAA"",""BAB"",""BAC"",""BAD"",""BAE"",""BAF"",...","[""COAT PROTEIN VP1""]",polypeptide(L),60,240,protein/protein complex,6804.412,complete icosahedral assembly,1,1m11
2,3,"[""C"",""CA"",""CAA"",""CAB"",""CAC"",""CAD"",""CAE"",""CAF"",...","[""COAT PROTEIN VP2""]",polypeptide(L),60,240,protein/protein complex,6804.412,complete icosahedral assembly,1,1m11
3,4,"[""D"",""DA"",""DAA"",""DAB"",""DAC"",""DAD"",""DAE"",""DAF"",...","[""COAT PROTEIN VP3""]",polypeptide(L),60,240,protein/protein complex,6804.412,complete icosahedral assembly,1,1m11
4,1,"[""A"",""AA"",""AB"",""AC"",""AD""]","[""Complement decay-accelerating factor""]",polypeptide(L),5,20,protein/protein complex,567.034,icosahedral pentamer,3,1m11
5,2,"[""B"",""BA"",""BB"",""BC"",""BD""]","[""COAT PROTEIN VP1""]",polypeptide(L),5,20,protein/protein complex,567.034,icosahedral pentamer,3,1m11
6,3,"[""C"",""CA"",""CB"",""CC"",""CD""]","[""COAT PROTEIN VP2""]",polypeptide(L),5,20,protein/protein complex,567.034,icosahedral pentamer,3,1m11
7,4,"[""D"",""DA"",""DB"",""DC"",""DD""]","[""COAT PROTEIN VP3""]",polypeptide(L),5,20,protein/protein complex,567.034,icosahedral pentamer,3,1m11
8,1,"[""A""]","[""Complement decay-accelerating factor""]",polypeptide(L),1,4,protein/protein complex,113.407,icosahedral asymmetric unit,2,1m11
9,2,"[""B""]","[""COAT PROTEIN VP1""]",polypeptide(L),1,4,protein/protein complex,113.407,icosahedral asymmetric unit,2,1m11


In [12]:
DisplayPDB(pdb_demo_existing.pdb_id, pdb_demo_existing.assembly.keys())
DisplayPDB(pdb_demo_new.pdb_id, pdb_demo_new.assembly.keys())

0,1,2,3,4,5,6,7
Asymmetric unit of 1m4x,Biological assembly 1 of 1m4x,Biological assembly 2 of 1m4x,Biological assembly 3 of 1m4x,Biological assembly 4 of 1m4x,Biological assembly 5 of 1m4x,Biological assembly 6 of 1m4x,Biological assembly 7 of 1m4x
,,,,,,,


0,1,2,3,4,5
Asymmetric unit of 1m11,Biological assembly 1 of 1m11,Biological assembly 2 of 1m11,Biological assembly 3 of 1m11,Biological assembly 4 of 1m11,Biological assembly 5 of 1m11
,,,,,


<pdb_profiling.utils.DisplayPDB at 0x7fb044dcda00>