In [1]:
import requests
import datetime
import json

In [2]:
SUMMARY_INDEX = "gracc.osg.summary"
ENDPOINT = "https://gracc.opensciencegrid.org:443/q"
HEADERS = {'Content-Type': 'application/json'}

test_query = {
    "size": 0,
    "query": {
        "bool" : {
            "filter" : [
                { "term" : { "ResourceType": "Payload" }},
                { "range": {
                    "EndTime": {
                        "lte": int(datetime.datetime.now().timestamp()*1000),
                        "gte": int((datetime.datetime.now() - datetime.timedelta(days=365)).timestamp()*1000)
                    }
                }},
                { "regexp" : {
                    "ProbeName" : {
                        "value" : ".*(osgconnect\\.net|grid\\.uchicago\\.edu|ci-connect\\.net|xd-login\\.opensciencegrid.org|SUBMIT.MIT.EDU|csiu.grid.iu.edu|otsgrid.iit.edu|workflow.isi.edu|lsst-glidein.rcac.purdue.edu|scosg16.jlab.org|gluex.phys.uconn.edu|login.duke.ci-connect.net|huxley-osgsub-001.sdmz.amnh.org|pcf-osg.t2.ucsd.edu|login.ci-connect.uchicago.edu|pcf-osg.t2.ucsd.edu|login.ci-connect.uchicago.edu|aragon.cyverse.org|akbul.cyverse.org|glidein-1.sbgrid.org|ce1.opensciencegrid.org|descmp3.cosmology.illinois.edu|osg-learn.chtc.wisc.edu|xd-submit0000.chtc.wisc.edu|login.snowmass21.io|nsgosg.sdsc.edu|osgsub01.sdcc.bnl.gov)"
                    }
                }}
            ]
        }
    },
    "aggs": {
        "projects": {
            "terms": {
                "field": "ProjectName",
                "size": 99999999
            },
            "aggs": {
                "projectCpuUse": {
                    "sum": {
                        "field": "CoreHours"
                    }
                },
                "projectGpuUse": {
                    "sum": {
                        "field": "GPUHours"
                    }
                },
                "projectJobsRan": {
                    "sum": {
                        "field": "Count"
                    }
                },
                "gpu_bucket_filter": {
                  "bucket_selector": {
                    "buckets_path": {
                      "totalCPU": "projectCpuUse",
                      "totalGPU": "projectGpuUse",
                      "totalJobs": "projectJobsRan"
                    },
                    "script": "params.totalJobs > 100"
                  }
                },
            }
        }
    }
}

response_json = requests.get(f"{ENDPOINT}/{SUMMARY_INDEX}/_search", data=json.dumps(test_query), headers=HEADERS).json()

response_json

ConnectionError: HTTPSConnectionPool(host='gracc.opensciencegrid.org', port=443): Max retries exceeded with url: /q/gracc.osg.summary/_search (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fea80675d60>: Failed to establish a new connection: [Errno 8] nodename nor servname provided, or not known'))

In [3]:
json.dumps(test_query)

'{"size": 0, "query": {"bool": {"filter": [{"term": {"ResourceType": "Payload"}}, {"range": {"EndTime": {"lte": 1670256802056, "gte": 1638720802056}}}, {"regexp": {"ProbeName": {"value": ".*(osgconnect\\\\.net|grid\\\\.uchicago\\\\.edu|ci-connect\\\\.net|xd-login\\\\.opensciencegrid.org|SUBMIT.MIT.EDU|csiu.grid.iu.edu|otsgrid.iit.edu|workflow.isi.edu|lsst-glidein.rcac.purdue.edu|scosg16.jlab.org|gluex.phys.uconn.edu|login.duke.ci-connect.net|huxley-osgsub-001.sdmz.amnh.org|pcf-osg.t2.ucsd.edu|login.ci-connect.uchicago.edu|pcf-osg.t2.ucsd.edu|login.ci-connect.uchicago.edu|aragon.cyverse.org|akbul.cyverse.org|glidein-1.sbgrid.org|ce1.opensciencegrid.org|descmp3.cosmology.illinois.edu|osg-learn.chtc.wisc.edu|xd-submit0000.chtc.wisc.edu|login.snowmass21.io|nsgosg.sdsc.edu|osgsub01.sdcc.bnl.gov)"}}}]}}, "aggs": {"projects": {"terms": {"field": "ProjectName", "size": 99999999}, "aggs": {"projectCpuUse": {"sum": {"field": "CoreHours"}}, "projectGpuUse": {"sum": {"field": "GPUHours"}}, "projec

In [41]:
gracc_project_names = set(x['key'] for x in response_json['aggregations']['projects']['buckets'])
json.dump(list(gracc_project_names), open("data/gracc_projects_active_in_last_year.json", "w"))
len(gracc_project_names)

140

In [18]:
topology_project_names = requests.get("https://topology-itb.opensciencegrid.org/miscproject/json").json().keys()

In [19]:
in_gracc_not_topology = gracc_project_names.difference(topology_project_names)
len(in_gracc_not_topology)
in_gracc_not_topology

{'Bucknell_Ryan',
 'CMB_Petravick',
 'Caltech_Vallisneri',
 'Emory_Lin',
 'GLOW',
 'GeoSCIFramework',
 'NRAO_2022_Bhatnagar',
 'annie',
 'argoneut',
 'cms',
 'cms.org',
 'collab.KOTO',
 'des',
 'dune',
 'fermilab',
 'genie',
 'gluex',
 'icecube',
 'lariat',
 'ligo',
 'microboone',
 'minerva',
 'minos',
 'mu2e',
 'nova',
 'osg',
 'osg.REDTOP',
 'sbnd',
 'seaquest'}

In [20]:
set([1,2,3,4]).difference(set([1,2,5]))

{3, 4}

In [21]:
len(gracc_project_names)

115