In [2]:
import requests
import datetime
import json

In [8]:
SUMMARY_INDEX = "gracc.osg.summary"
ENDPOINT = "https://gracc.opensciencegrid.org:443/q"
HEADERS = {'Content-Type': 'application/json'}

test_query = {
    "size": 0,
    "query": {
        "bool" : {
            "filter" : [
                { "term" : { "ResourceType": "Payload" }},
                { "range": {
                    "EndTime": {
                        "lte": int(datetime.datetime.now().timestamp()*1000),
                        "gte": int((datetime.datetime.now() - datetime.timedelta(days=365)).timestamp()*1000)
                    }
                }}
            ]
        }
    },
    "aggs": {
        "fieldsOfScience": {
            "cardinality": {
                "field": "OIM_FieldOfScience",
            },
        },
        "jobsRan": {
            "sum": {
                "field": "Count",
            },
        },
        "projects": {
            "cardinality": {
                "field": "ProjectName"
            }
        },
        "facilities": {
            "terms": {
                "field": "OIM_Facility",
                "size": 99999999
            },
            "aggs": {
                "facilityCpuProvided": {
                    "sum": {
                        "field": "CoreHours"
                    }
                },
                "facilityGpuProvided": {
                    "sum": {
                        "field": "GPUHours"
                    }
                },
                "countProjectsImpacted": {
                    "cardinality": {
                        "field": "ProjectName"
                    }
                },
                "countFieldsOfScienceImpacted": {
                    "cardinality": {
                        "field": "OIM_FieldOfScience"
                    }
                },
                "countOrganizationImpacted": {
                    "cardinality": {
                        "field": "OIM_Organization"
                    }
                },
                "gpu_bucket_filter": {
                  "bucket_selector": {
                    "buckets_path": {
                      "totalGPU": "facilityGpuProvided",
                      "totalCPU": "facilityCpuProvided"
                    },
                    "script": "params.totalGPU > 0 || params.totalCPU > 0"
                  }
                },
            }
        }
    }
}

response_json = requests.get(f"{ENDPOINT}/{SUMMARY_INDEX}/_search", data=json.dumps(test_query), headers=HEADERS).json()

response_json

{'took': 249,
 'timed_out': False,
 '_shards': {'total': 22, 'successful': 22, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 10000, 'relation': 'gte'},
  'max_score': None,
  'hits': []},
 'aggregations': {'jobsRan': {'value': 344186636.0},
  'projects': {'value': 282},
  'FieldsOfScience': {'value': 66},
  'facilities': {'doc_count_error_upper_bound': 0,
   'sum_other_doc_count': 0,
   'buckets': [{'key': 'Fermi National Accelerator Laboratory',
     'doc_count': 73993,
     'countProjectsImpacted': {'value': 163},
     'facilityCpuProvided': {'value': 256736800.25555554},
     'countOrganizationImpacted': {'value': 89},
     'countFieldsOfScienceImpacted': {'value': 48},
     'facilityGpuProvided': {'value': 10.045555114746094}},
    {'key': 'University of Wisconsin',
     'doc_count': 54668,
     'countProjectsImpacted': {'value': 205},
     'facilityCpuProvided': {'value': 169695972.22555557},
     'countOrganizationImpacted': {'value': 104},
     'countFieldsOfScienceIm

In [11]:
json.dumps(test_query)

'{"size": 0, "query": {"bool": {"filter": [{"term": {"ResourceType": "Payload"}}, {"range": {"EndTime": {"lte": 1667862127510, "gte": 1636326127510}}}]}}, "aggs": {"facilities": {"terms": {"field": "OIM_Facility", "size": 99999999}, "aggs": {"facilityCpuProvided": {"sum": {"field": "CoreHours"}}, "facilityGpuProvided": {"sum": {"field": "GPUHours"}}, "countProjectsImpacted": {"cardinality": {"field": "ProjectName"}}, "countFieldsOfScienceImpacted": {"cardinality": {"field": "OIM_FieldOfScience"}}, "countOrganizationImpacted": {"cardinality": {"field": "OIM_Organization"}}, "gpu_bucket_filter": {"bucket_selector": {"buckets_path": {"totalGPU": "facilityGpuProvided", "totalCPU": "facilityCpuProvided"}, "script": "params.totalGPU > 0 || params.totalCPU > 0"}}}}}}'

In [17]:
gracc_facility_names = set(x['key'] for x in response_json['aggregations']['facilities']['buckets'])

In [18]:
topology_facility_names = json.load(open("/Users/clock/Downloads/facility_json.json")).keys()

In [19]:
gracc_facility_names.difference(topology_facility_names)

set()