In [24]:
import requests
import json


SUMMARY_INDEX = "xrd-stash*"
ENDPOINT = "https://gracc.opensciencegrid.org:443/q"
HEADERS = {
    'Content-Type': 'application/json'
}


# Get the number of records for the last day
query = {
    "size": 0,
    "query": {
        "range": {
            "@timestamp": {
                "gte": "now-7d/d",
                "lt": "now/d"
            }
        }
    },
    "aggs": {
        "by_host": {
            "terms": {
                "field": "host.keyword",
                "size": 10000
            },
            "aggs": {
                "total_write": {
                    "sum": {
                        "field": "write"
                    }
                }
            }
        }
    }
}

response_json = requests.get(f"{ENDPOINT}/{SUMMARY_INDEX}/_search", data=json.dumps(query), headers=HEADERS).json()

open("response.json", "w").write(json.dumps(response_json, indent=4))

str(response_json)[:1000]

"{'took': 2096, 'timed_out': False, '_shards': {'total': 559, 'successful': 559, 'skipped': 547, 'failed': 0}, 'hits': {'total': {'value': 10000, 'relation': 'gte'}, 'max_score': None, 'hits': []}, 'aggregations': {'by_host': {'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 18925, 'buckets': [{'key': 'amst-osdf-xcache01.es.net', 'doc_count': 34407831, 'total_write': {'value': 0.0}}, {'key': 'hcc-nrp-shor-c5832.unl.edu', 'doc_count': 19027351, 'total_write': {'value': 0.0}}, {'key': 'lond-osdf-xcache01.es.net', 'doc_count': 3731430, 'total_write': {'value': 0.0}}, {'key': '[2001:468:1f16:6::2]', 'doc_count': 3451115, 'total_write': {'value': 0.0}}, {'key': '[::ffff:132.249.252.215]', 'doc_count': 3174124, 'total_write': {'value': 0.0}}, {'key': '[::128.118.7.6]', 'doc_count': 2438950, 'total_write': {'value': 0.0}}, {'key': 'osg.newy32aoa.nrp.internet2.edu', 'doc_count': 2346615, 'total_write': {'value': 0.0}}, {'key': '[2001:468:1f16:8::2]', 'doc_count': 1431816, 'total_write'

In [25]:
# Print out the set of values for the value field
values = set([x['total_write']['value'] for x in response_json['aggregations']['by_host']['buckets']])

values

{0.0,
 142.0,
 264.0,
 2560.0,
 5394.0,
 11520.0,
 12800.0,
 30464.0,
 255744.0,
 271872.0,
 44286000616.0}

In [26]:
# Print out the non 0 hosts
hosts = [x['key'] for x in response_json['aggregations']['by_host']['buckets'] if x['total_write']['value'] != 0]

hosts

['etf-07.cern.ch',
 'etf-08.cern.ch',
 'etf-14.cern.ch',
 'datarucio.virgo.infn.it',
 'osg-test',
 'etf-06.cern.ch',
 'etf-05.cern.ch',
 'etf-13.cern.ch',
 'virgo-test-01.to.infn.it',
 'virgo-test-02.to.infn.it']