In [38]:
import requests
import json


SUMMARY_INDEX = "xrd-stash*"
ENDPOINT = "https://gracc.opensciencegrid.org:443/q"
HEADERS = {
    'Content-Type': 'application/json'
}


# Get the number of records for the last day
query = {
    "size": 0,
    "query": {
        "range": {
            "@timestamp": {
                "gte": "now-7d/d",
                "lt": "now/d"
            }
        }
    },
    "aggs": {
        "by_host": {
            "terms": {
                "field": "server_hostname.keyword",
                "size": 10000
            },
            "aggs": {
                "total_write": {
                    "sum": {
                        "field": "write"
                    }
                }
            }
        }
    }
}

response_json = requests.get(f"{ENDPOINT}/{SUMMARY_INDEX}/_search", data=json.dumps(query), headers=HEADERS).json()

open("response.json", "w").write(json.dumps(response_json, indent=4))

str(response_json)[:1000]

# Print out number of hosts
len(response_json['aggregations']['by_host']['buckets'])

397

In [39]:
# Print out the set of values for the value field
values = set([x['total_write']['value'] for x in response_json['aggregations']['by_host']['buckets']])

values

{0.0,
 232.0,
 512.0,
 696.0,
 768.0,
 1536.0,
 10752.0,
 14336.0,
 14592.0,
 15616.0,
 16384.0,
 19200.0,
 598272.0,
 620617.0,
 623300.0,
 656014.0,
 683528.0,
 792421.0,
 828315.0,
 1332517.0,
 1335653.0,
 1385999.0,
 1441030.0,
 2060726.0,
 3989457.0,
 4836947.0,
 4981003.0,
 12464054.0,
 13601743.0,
 13624050.0,
 13719356.0,
 14119394.0,
 14263603.0,
 15759176.0,
 18234377.0,
 18270116.0,
 18865568.0,
 19102838.0,
 20453558.0,
 22287760.0,
 22732970.0,
 23085318.0,
 25510449.0,
 27234253.0,
 28124105.0,
 28145866.0,
 28504356.0,
 30364133.0,
 30461413.0,
 31669414.0,
 32688912.0,
 36671071.0,
 36946347.0,
 37242052.0,
 37253140.0,
 43562285.0,
 48542684.0,
 49587439.0,
 51712950.0,
 52123566.0,
 53234240.0,
 54599099.0,
 56147415.0,
 57252675.0,
 57705028.0,
 58493307.0,
 59783863.0,
 64698415.0,
 70209473.0,
 72424370.0,
 73454341.0,
 75824067.0,
 75944836.0,
 77903638.0,
 84768210.0,
 85109404.0,
 95427760.0,
 103874732.0,
 121668871.0,
 123085597.0,
 141051644.0,
 160416995.0,


In [40]:
# Print out the non 0 hosts
hosts = [x['key'] for x in response_json['aggregations']['by_host']['buckets'] if x['total_write']['value'] != 0]

hosts

['ingrid-se09.cism.ucl.ac.be',
 'CMSDATA.PHYS.CMU.EDU',
 'xrootd15.cmsaf.mit.edu',
 'xrootd11.cmsaf.mit.edu',
 'xrootd10.cmsaf.mit.edu',
 'xrootd5.cmsaf.mit.edu',
 'xrootd17.cmsaf.mit.edu',
 'xrootd14.cmsaf.mit.edu',
 'xrootd13.cmsaf.mit.edu',
 'xrootd3.cmsaf.mit.edu',
 'xrootd9.cmsaf.mit.edu',
 'xrootd18.cmsaf.mit.edu',
 'xrootd4.cmsaf.mit.edu',
 'xrootd8.cmsaf.mit.edu',
 'xrootd12.cmsaf.mit.edu',
 'xrootd2.cmsaf.mit.edu',
 'xrootd6.cmsaf.mit.edu',
 'xrootd16.cmsaf.mit.edu',
 'xrootd7.cmsaf.mit.edu',
 'umiss005.hep.olemiss.edu',
 'cmsdtn03.hep.wisc.edu',
 'xrd-ds07.sprace.org.br',
 'xrd-ds01.sprace.org.br',
 'cmsdtn04.hep.wisc.edu',
 'xrd-ds08.sprace.org.br',
 'cmsdtn02.hep.wisc.edu',
 'xrd-ds03.sprace.org.br',
 'xrd-ds02.sprace.org.br',
 'xrd-ds06.sprace.org.br',
 'cmsdtn08.hep.wisc.edu',
 'xrd-ds04.sprace.org.br',
 'xrd-ds05.sprace.org.br',
 'cmsdtn06.hep.wisc.edu',
 'red-xfer1.unl.edu',
 'g39n02.hep.wisc.edu',
 'red-xfer5.unl.edu',
 'g41n08.hep.wisc.edu',
 'g36n16.hep.wisc.edu',
 '

In [41]:
# Print out the most recent records n=100
query = {
    "size": 100,
    "query": {
        "range": {
            "@timestamp": {
                "gte": "now-7d/d",
                "lt": "now/d"
            }
        }
    },
    "sort": [
        {
            "@timestamp": {
                "order": "desc"
            }
        }
    ]
}

response_json = requests.get(f"{ENDPOINT}/{SUMMARY_INDEX}/_search", data=json.dumps(query), headers=HEADERS).json()
response_json

{'took': 212,
 'timed_out': False,
 '_shards': {'total': 559, 'successful': 559, 'skipped': 547, 'failed': 0},
 'hits': {'total': {'value': 10000, 'relation': 'gte'},
  'max_score': None,
  'hits': [{'_index': 'xrd-stash-000074',
    '_id': 'jVnTVJIBEJdaYGYbgOlR',
    '_score': None,
    '_source': {'read_vector_count_average': 7.798325102431469e-06,
     '@version': '1',
     'write_max': 0,
     'read_vector_count_min': 1,
     'operation_time': 0,
     'read_vector_average': 2135073.85,
     'server_ip': '128.104.227.213',
     'HasFileCloseMsg': 0,
     'read_operations': 22,
     '@timestamp': '2024-10-03T23:59:59.980274148Z',
     'end_time': 1727999628000,
     'site': 'T2_US_Wisconsin',
     'read_average': 1941102.9545454546,
     'read_single_average': 1394.0,
     'read_min': 446,
     'timestamp': 1727999628000,
     'read_single_max': 2342,
     'serverID': '1716823090#128.104.227.213#17534',
     'read': 2788,
     'read_vector_min': 60481,
     'write_bytes_at_close': 0,