In [5]:
import requests
import json

In [1]:
query = {
    "query" : {
        "type" : "group",
        "logical_operator" : "and",
        "nodes" : [
            {
                "type" : "terminal",
                "service" : "text",
                "parameters" : {
                    "attribute" : "rcsb_entry_info.resolution_combined",
                    "operator" : "range",
                    "value" : {
                        "from" : 1,
                        "to" : 3,
                        "include_lower" : True,
                        "include_upper" : True
                    }
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "rcsb_entity_source_organism.taxonomy_lineage.name",
                    "operator": "exact_match",
                    "value": "Homo sapiens"
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "rcsb_entry_info.selected_polymer_entity_types",
                    "operator": "exact_match",
                    "value": "Protein (only)"
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "rcsb_struct_symmetry.type",
                    "operator": "exact_match",
                    "value": "Asymmetric"
                }
            },
            {
                "type": "terminal",
                "service": "text",
                "parameters": {
                    "attribute": "entity_poly.rcsb_sample_sequence_length",
                    "operator": "exists",
                }
            }
        ],
        "label" : "text"
    },
    "return_type": "entry",
    "request_options": {
        "paginate": {
            "rows": 10000,
            "start": 0
        },
        "results_content_type": [
            "experimental"
        ],
        "sort": [
            {
                "sort_by": "score",
                "direction": "desc"
            }
        ],
        "scoring_strategy": "combined"
    }
}

In [3]:
request_url = "https://search.rcsb.org/rcsbsearch/v2/query"

In [17]:
first_req = requests.get(request_url, params = {"json": json.dumps(query, separators=(',', ':'))})

In [22]:
identifiers = []
for entry in first_req.json()["result_set"]:
    identifiers.append(entry["identifier"])

In [24]:
for i in range(10000, first_req.json()["total_count"], 10000):
    query["request_options"]["paginate"]["start"] = i
    req = requests.get(request_url, params = {"json": json.dumps(query, separators=(',', ':'))})
    for entry in req.json()["result_set"]:
        identifiers.append(entry["identifier"])

In [27]:
with open("data/rcsb/identifier_list.txt", "w") as f:
    for identifier in identifiers:
        f.write(identifier + "\n")