In [33]:
from rucio.client.client import Client
import os
import re
import json
from rucio_helper import setup_rucio_account
# set environment variable RUCIO_CONFIG
os.environ['RUCIO_CONFIG'] = '/work/sbrommer/embedding/publish_ul_samples/rucio.cfg'

In [34]:
setup_rucio_account()
client = Client()

Using rucio account sbrommer


In [35]:
def get_account_rules(account, defaultname):
    sample_names = []
    transfered_samples = []
    request_details = client.list_account_rules(account=account)
    for i, request in enumerate(request_details):
        if request['state'] == 'OK' and defaultname in request['name']:
            transfered_samples.append(request)
            # if i == 0:
            #     print(request)
            if request['name'] in sample_names:
                print("Duplicate sample name found: ", request['name'])
            sample_names.append(request['name'])
        else:
            print(request['name'], request['state'])
    print(f"Successfully transfered samples: {len(transfered_samples)} / {6*(18)}")
    return transfered_samples


def get_block_files(block, scope):
    files = client.list_files(scope=scope, name=block)
    return list(files)


def get_rucio_blocks(scope, account, samplename):
    # in rucio, different naming is used:
    # containers -> datasets
    # datasets -> blocks
    # get the full list of available blocks
    results = client.list_dids(scope=scope, filters={'account': account}, did_type='dataset', long=True)
    # now filter the list so only matching blocks are returned
    results = [block for block in results if re.search(samplename, block['name'])]
    slim_results = []
    for block in list(results):
        files = get_block_files(block['name'], scope)
        slim_results.append({'blockname': block['name'], 'files': files})
    return slim_results

In [36]:
# defaults
account = 'pog_tau_group'
scope = 'group.pog_tau_group'
defaultname = 'inputDoubleMu_106X_ULegacy_miniAOD'

transfered_samples = get_account_rules(account, defaultname)
data = {}
for sample in transfered_samples:
    rucio_data = get_rucio_blocks(scope, account, sample['name'])
    data[sample['name']] = rucio_data
    print(f"Sample: {sample['name']}")
    print(f"Number of blocks: {len(rucio_data)}")
    print(f"Number of files: {sum([len(rucio_data[x]['files']) for x in range(len(rucio_data))])}")


Successfully transfered samples: 108 / 108
Sample: /EmbeddingRun2016_G/MuonEmbedding-inputDoubleMu_106X_ULegacy_miniAOD-v1/USER
Number of blocks: 16
Number of files: 153
Sample: /EmbeddingRun2016_G/ElectronEmbedding-inputDoubleMu_106X_ULegacy_miniAOD-v1/USER
Number of blocks: 16
Number of files: 153
Sample: /EmbeddingRun2018A/MuonEmbedding-inputDoubleMu_106X_ULegacy_miniAOD-v1/USER
Number of blocks: 26
Number of files: 253
Sample: /EmbeddingRun2016_H/ElTauFinalState-inputDoubleMu_106X_ULegacy_miniAOD-v1/USER
Number of blocks: 17
Number of files: 165
Sample: /EmbeddingRun2016-HIPM_E/ElTauFinalState-inputDoubleMu_106X_ULegacy_miniAOD-v1/USER
Number of blocks: 10
Number of files: 95
Sample: /EmbeddingRun2017F/ElMuFinalState-inputDoubleMu_106X_ULegacy_miniAOD-v1/USER
Number of blocks: 26
Number of files: 258
Sample: /EmbeddingRun2016-HIPM_E/MuTauFinalState-inputDoubleMu_106X_ULegacy_miniAOD-v1/USER
Number of blocks: 10
Number of files: 95
Sample: /EmbeddingRun2018B/ElMuFinalState-inputDoub

In [37]:
# dump data into json file
json.dump(data, open('rucio_data.json', 'w'), indent=4)
