In [1]:
from globus_sdk import SearchClient
from fair_research_login import NativeClient
from pprint import pprint

# Search indices
indices = [{
    'name': 'ssx',
    'uuid': '5e63bb08-5b39-4a02-86f3-44cec03e8bc0',
    },
    {
    'name': 'xpcs',
    'uuid': '6871e83e-866b-41bc-8430-e3cf83b43bdc',
    }
]

In [2]:
# Auth
client = NativeClient(client_id='7414f0b4-7d05-4bb6-bb00-076fa3f17cf5')
client.login(requested_scopes='urn:globus:auth:scope:search.api.globus.org:search')
search_authorizer = client.get_authorizers()['search.api.globus.org']
sc = SearchClient(authorizer=search_authorizer)


In [3]:
def date_query(index):

    query = {
        'facets': [{
            "name": "Dates",
            "field_name": "dc.dates.date",
            "type": "date_histogram",
            "date_interval": "year",
        }],
        'q': '*',
    }
    return sc.post_search(index, query)

    
    
def size_query_by_dates(index, date_ranges):
    sums = []
    for low_date, high_date in date_ranges:
        query = {
            "facets": [{
                "name": "Sizes",
                "field_name": "files.length",
                "type": "sum",
            }],
            "filters": [{
            "type": "range",
            "field_name": "dc.dates.date",
            "values": [
                {
                "from": low_date,
                "to": high_date
                }
            ]
            }],
            'q': '*'
        }
        sum = sc.post_search(index, query).data['facet_results'][0]['value']
        sums.append(int(sum))
    return sums
    

In [7]:
for index in indices:
    facets = date_query(index['uuid']).data['facet_results']
    years = [k['value'] for k in facets[0]['buckets']]

    ranges = [(f'{year}-01-01', f'{year}-12-31') for year in years]
    sums = size_query_by_dates(index['uuid'], ranges)
    
    rows = [[date, str(size), index['name']] for date, size in zip(years, sums)]
    rows.insert(0, ['year', 'size', 'index'])
    
    with open(f'{index["name"]}_data.csv', 'w+') as f:
        for row in rows:
            print(row)
            f.write(','.join(row) + '\n')

['year', 'size', 'index']
['2020', '706373071', 'ssx']
['2021', '890594124', 'ssx']
['2022', '1518853139', 'ssx']
['year', 'size', 'index']
['2019', '16985141', 'xpcs']
['2020', '41304558036', 'xpcs']
['2021', '2010164218451', 'xpcs']
['2022', '38245236644285', 'xpcs']
['2023', '695972040387', 'xpcs']
