In [5]:
import requests
import json

In [6]:
query = {
    "query" : {
        "type" : "group",
        "logical_operator" : "and",
        "nodes" : [
            {
                "type" : "terminal",
                "service" : "text",
                "parameters" : {
                    "attribute" : "rcsb_entry_info.resolution_combined",
                    "operator" : "range",
                    "value" : {
                        "from" : 1,
                        "to" : 3,
                        "include_lower" : True,
                        "include_upper" : True
                    }
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "rcsb_entity_source_organism.taxonomy_lineage.name",
                    "operator": "exact_match",
                    "value": "Homo sapiens"
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "rcsb_entry_info.selected_polymer_entity_types",
                    "operator": "exact_match",
                    "value": "Protein (only)"
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "rcsb_struct_symmetry.type",
                    "operator": "exact_match",
                    "value": "Asymmetric"
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "entity_poly.rcsb_sample_sequence_length",
                    "operator": "exists",
                }
            }
        ],
        "label" : "text"
    },
    "return_type": "entry",
    "request_options": {
        "paginate": {
            "rows": 10000,
            "start": 0
        },
        "results_content_type": [
            "experimental"
        ],
        "sort": [
            {
                "sort_by": "score",
                "direction": "desc"
            }
        ],
        "scoring_strategy": "combined"
    }
}

In [7]:
request_url = "https://search.rcsb.org/rcsbsearch/v2/query"

In [8]:
first_req = requests.get(request_url, params = {"json": json.dumps(query, separators=(',', ':'))})

In [9]:
identifiers = []
for entry in first_req.json()["result_set"]:
    identifiers.append(entry["identifier"])

In [10]:
for i in range(10000, first_req.json()["total_count"], 10000):
    query["request_options"]["paginate"]["start"] = i
    req = requests.get(request_url, params = {"json": json.dumps(query, separators=(',', ':'))})
    for entry in req.json()["result_set"]:
        identifiers.append(entry["identifier"])

In [39]:
from Bio.PDB import PDBList, PDBParser

In [36]:
pdb_list = PDBList(server="https://files.wwpdb.org/")

In [37]:
pdb_list.download_pdb_files(identifiers, pdir="pdb_files", file_format="pdb", overwrite=False)

Downloading PDB structure '12CA'...
Downloading PDB structure '133L'...
Downloading PDB structure '134L'...
Downloading PDB structure '1A07'...
Downloading PDB structure '1A08'...
Downloading PDB structure '1A09'...
Downloading PDB structure '1A1A'...
Downloading PDB structure '1A1B'...
Downloading PDB structure '1A1C'...
Downloading PDB structure '1A1E'...
Downloading PDB structure '1A1M'...
Downloading PDB structure '1A1N'...
Downloading PDB structure '1A1O'...
Downloading PDB structure '1A22'...
Downloading PDB structure '1A28'...
Downloading PDB structure '1A2B'...
Downloading PDB structure '1A2C'...
Downloading PDB structure '1A3B'...
Downloading PDB structure '1A3E'...
Downloading PDB structure '1A3S'...
Downloading PDB structure '1A42'...
Downloading PDB structure '1A46'...
Downloading PDB structure '1A4V'...
Downloading PDB structure '1A4W'...
Downloading PDB structure '1A5G'...
Downloading PDB structure '1A5H'...
Downloading PDB structure '1A5Y'...
Downloading PDB structure '1