In [119]:
# %load datasvc.py
import httpx
import pandas
from .util import format_dates


BLOCKARRIVE_BASISCODE = {
    -6: "no_source",
    -5: "no_link",
    -4: "auto_suspend",
    -3: "no_download_link",
    -2: "manual_suspend",
    -1: "block_open",
    0: "routed",
    1: "queue_full",
    2: "rerouting",
}


class DataSvc:
    """PhEDEx datasvc REST API

    Full documentation at https://cmsweb.cern.ch/phedex/datasvc/doc
    """

    defaults = {
        # PhEDEx datasvc base URL with trailing slash
        "datasvc_base": "https://cmsweb.cern.ch/phedex/datasvc/",
        # Options: prod, dev, debug
        "phedex_instance": "prod",
    }

    def __init__(self, client, datasvc_base=None, phedex_instance=None):
        if datasvc_base is None:
            datasvc_base = DataSvc.defaults["datasvc_base"]
        if phedex_instance is None:
            phedex_instance = DataSvc.defaults["phedex_instance"]
        self.client = client
        self.baseurl = httpx.URL(datasvc_base)
        self.jsonurl = self.baseurl.join("json/%s/" % phedex_instance)
        self.xmlurl = self.baseurl.join("xml/%s/" % phedex_instance)

    async def jsonmethod(self, method, **params):
        return await self.client.getjson(url=self.jsonurl.join(method), params=params)

    async def blockreplicas(self, **params):
        """Get block replicas as a pandas dataframe

        Parameters
        ----------
        block          block name, can be multiple (*)
        dataset        dataset name, can be multiple (*)
        node           node name, can be multiple (*)
        se             storage element name, can be multiple (*)
        update_since   unix timestamp, only return replicas whose record was
                        updated since this time
        create_since   unix timestamp, only return replicas whose record was
                        created since this time. When no "dataset", "block"
                        or "node" are given, create_since is default to 24 hours ago
        complete       y or n, whether or not to require complete or incomplete
                        blocks. Open blocks cannot be complete.  Default is to
                        return either.
        dist_complete  y or n, "distributed complete".  If y, then returns
                        only block replicas for which at least one node has
                        all files in the block.  If n, then returns block
                        replicas for which no node has all the files in the
                        block.  Open blocks cannot be dist_complete.  Default is
                        to return either kind of block replica.
        subscribed     y or n, filter for subscription. default is to return either.
        custodial      y or n. filter for custodial responsibility.  default is
                        to return either.
        group          group name.  default is to return replicas for any group.
        show_dataset   y or n, default n. If y, show dataset information with
                        the blocks; if n, only show blocks
        """
        resjson = await self.jsonmethod("blockreplicas", **params)
        df = pandas.io.json.json_normalize(
            resjson["phedex"]["block"],
            record_path="replica",
            record_prefix="replica.",
            meta=["bytes", "files", "name", "id", "is_open"],
        )
        format_dates(df, ["replica.time_create", "replica.time_update"])
        return df
    
    
   
        

ModuleNotFoundError: No module named 'httpx'

In [120]:
# %load util.py
import pandas
import json

def format_dates(df, columns):
    """Convert UNIX timestamp columns to datetime"""
    if df.size > 0:
        df[columns] = df[columns].apply(lambda v: pandas.to_datetime(v, unit="s"))
    return df

In [151]:
with open('data_method_test.json') as f:
    data = json.load(f)

In [122]:
#Data_Method

async def data(self, **params):
    
        """Shows data which is registered (injected) to phedex
        
        Parameters
        ----------
        
        dataset                  dataset name to output data for (wildcard support)
        block                    block name to output data for (wildcard support)
        file                     file name to output data for (wildcard support)
        level                    display level, 'file' or 'block'. when level=block
                                 no file details would be shown. Default is 'file'.
                                 when level = 'block', return data of which blocks were created since this time;
                                 when level = 'file', return data of which files were created since this time
        create_since             when no parameters are given, default create_since is set to one day ago
        """
        resjson = await self.jsonmethod("data", **params)
        df = pandas.io.json.json_normalize(
            rejson['phedex']['dbs']['dataset'],
            )

In [153]:
data

{'phedex': {'request_timestamp': 1584563680.53556,
  'dbs': [{'dataset': [{'time_update': None,
      'is_transient': 'n',
      'is_open': 'y',
      'name': '/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO',
      'block': [{'time_update': 1574197309.67013,
        'bytes': 2353710973,
        'files': 3,
        'file': [{'checksum': 'adler32:84458e13,cksum:843148796',
          'node': 'T2_FR_GRIF_IRFU',
          'lfn': '/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL2018-v2/270000/EE179485-B1C5-7249-A077-3D133C0198EB.root',
          'time_create': 1574024907.65851,
          'size': 569420538},
         {'checksum': 'adler32:d290b9c8,cksum:4293630207',
          'node': 'T2_FR_GRIF_IRFU',
          'lfn': '/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL2018-v2/270000/81217908-BEC2-E84C-ACDC-8132B48A4A25.root',
          'time_create': 1574197074.00792,
          'size': 1406279668},
         {'checksum': 'adler32:b4fddac3,cksum:4218643856',
          'node': 'T2_FR_GRIF_IRFU',
        

In [155]:
#json = [
    {
        'some': 'nested',
        'data': [
            {'key': 'val1'},
            {'key': 'val2'},
            {'key': 'val3'},
        ]
    },
    {
        'some': 'more',
        'data': [
            {'key': 'val4'},
            {'key': 'val5'},
            {'key': 'val6'},
        ]
    },
]
out = []
for toplevel in json:
    for secondlevel in toplevel['data']:
        out.append({
            'some': toplevel['some'],
            'key': secondlevel['key'],
        })
print(out)

IndentationError: unexpected indent (<ipython-input-155-8ae5c0c0030b>, line 2)

In [185]:
out = []
for _instance in data['phedex']['dbs']:
    for _dataset in _instance['dataset']:
        for _block in _dataset['block']:
            for _file in _block['file']:
                out.append({
                'Dataset': _dataset['name'],
                'Is dataset open': _dataset['is_open'],
                'block Name': _block['name'],
                'Block size (GB)': _block['bytes']/1000000000.0,
                'Time block was created': _block['time_create'],
                'File name': _file['lfn'],
                'File checksum': _file['checksum'],
                'File size':  _file['size'],
                'Time file was created': _file['time_create']
                })

In [193]:
df=json_normalize(out)
format_dates(df, ["Time file was created",'Time block was created'])

Unnamed: 0,Dataset,Is dataset open,block Name,Block size (GB),Time block was created,File name,File checksum,File size,Time file was created
0,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO,y,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO#2f9...,2.353711,2019-11-17 21:08:27.658509970,/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL...,"adler32:84458e13,cksum:843148796",569420538,2019-11-17 21:08:27.658509970
1,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO,y,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO#2f9...,2.353711,2019-11-17 21:08:27.658509970,/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL...,"adler32:d290b9c8,cksum:4293630207",1406279668,2019-11-19 20:57:54.007920027
2,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO,y,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO#2f9...,2.353711,2019-11-17 21:08:27.658509970,/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL...,"adler32:b4fddac3,cksum:4218643856",378010767,2019-11-18 20:59:33.452300072
3,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO,y,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO#eb8...,1.812333,2019-11-18 20:54:18.046839952,/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL...,"adler32:d273d7ef,cksum:2120672845",1440351579,2019-11-18 20:54:18.046839952
4,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO,y,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO#eb8...,1.812333,2019-11-18 20:54:18.046839952,/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL...,"adler32:e92fd24e,cksum:1545795842",313277819,2019-11-19 16:44:42.158459902
...,...,...,...,...,...,...,...,...,...
768,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO,y,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO#ad5...,18.634834,2019-11-24 08:28:46.577680111,/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL...,"adler32:dba8c7b8,cksum:4232881920",1881687126,2019-11-24 21:54:32.379650116
769,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO,y,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO#ad5...,18.634834,2019-11-24 08:28:46.577680111,/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL...,"adler32:8a1bd427,cksum:4294360641",1800784548,2019-11-25 04:46:15.614670038
770,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO,y,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO#ad5...,18.634834,2019-11-24 08:28:46.577680111,/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL...,"adler32:f7c98d51,cksum:3807650252",1855002748,2019-11-24 18:25:09.755899906
771,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO,y,/EGamma/Run2018A-12Nov2019_UL2018-v2/DQMIO#ad5...,18.634834,2019-11-24 08:28:46.577680111,/store/data/Run2018A/EGamma/DQMIO/12Nov2019_UL...,"adler32:3b70368d,cksum:2767044387",1957746080,2019-11-24 15:04:56.384190083
