In [19]:
# %load datasvc.py
import httpx
import pandas
from .util import format_dates


BLOCKARRIVE_BASISCODE = {
    -6: "no_source",
    -5: "no_link",
    -4: "auto_suspend",
    -3: "no_download_link",
    -2: "manual_suspend",
    -1: "block_open",
    0: "routed",
    1: "queue_full",
    2: "rerouting",
}


class DataSvc:
    """PhEDEx datasvc REST API

    Full documentation at https://cmsweb.cern.ch/phedex/datasvc/doc
    """

    defaults = {
        # PhEDEx datasvc base URL with trailing slash
        "datasvc_base": "https://cmsweb.cern.ch/phedex/datasvc/",
        # Options: prod, dev, debug
        "phedex_instance": "prod",
    }

    def __init__(self, client, datasvc_base=None, phedex_instance=None):
        if datasvc_base is None:
            datasvc_base = DataSvc.defaults["datasvc_base"]
        if phedex_instance is None:
            phedex_instance = DataSvc.defaults["phedex_instance"]
        self.client = client
        self.baseurl = httpx.URL(datasvc_base)
        self.jsonurl = self.baseurl.join("json/%s/" % phedex_instance)
        self.xmlurl = self.baseurl.join("xml/%s/" % phedex_instance)

    async def jsonmethod(self, method, **params):
        return await self.client.getjson(url=self.jsonurl.join(method), params=params)

    async def blockreplicas(self, **params):
        """Get block replicas as a pandas dataframe

        Parameters
        ----------
        block          block name, can be multiple (*)
        dataset        dataset name, can be multiple (*)
        node           node name, can be multiple (*)
        se             storage element name, can be multiple (*)
        update_since   unix timestamp, only return replicas whose record was
                        updated since this time
        create_since   unix timestamp, only return replicas whose record was
                        created since this time. When no "dataset", "block"
                        or "node" are given, create_since is default to 24 hours ago
        complete       y or n, whether or not to require complete or incomplete
                        blocks. Open blocks cannot be complete.  Default is to
                        return either.
        dist_complete  y or n, "distributed complete".  If y, then returns
                        only block replicas for which at least one node has
                        all files in the block.  If n, then returns block
                        replicas for which no node has all the files in the
                        block.  Open blocks cannot be dist_complete.  Default is
                        to return either kind of block replica.
        subscribed     y or n, filter for subscription. default is to return either.
        custodial      y or n. filter for custodial responsibility.  default is
                        to return either.
        group          group name.  default is to return replicas for any group.
        show_dataset   y or n, default n. If y, show dataset information with
                        the blocks; if n, only show blocks
        """
        resjson = await self.jsonmethod("blockreplicas", **params)
        df = pandas.io.json.json_normalize(
            resjson["phedex"]["block"],
            record_path="replica",
            record_prefix="replica.",
            meta=["bytes", "files", "name", "id", "is_open"],
        )
        format_dates(df, ["replica.time_create", "replica.time_update"])
        return df
    
    
   
        

ModuleNotFoundError: No module named 'httpx'

In [45]:
# %load util.py
import pandas
import json

def format_dates(df, columns):
    """Convert UNIX timestamp columns to datetime"""
    if df.size > 0:
        df[columns] = df[columns].apply(lambda v: pandas.to_datetime(v, unit="s"))
    return df

In [46]:
with open('data_method_test.json') as f:
    data = json.load(f)

In [47]:
#Data_Method

async def data(self, **params):
    
        """Shows data which is registered (injected) to phedex
        
        Parameters
        ----------
        
        dataset                  dataset name to output data for (wildcard support)
        block                    block name to output data for (wildcard support)
        file                     file name to output data for (wildcard support)
        level                    display level, 'file' or 'block'. when level=block
                                 no file details would be shown. Default is 'file'.
                                 when level = 'block', return data of which blocks were created since this time;
                                 when level = 'file', return data of which files were created since this time
        create_since             when no parameters are given, default create_since is set to one day ago
        """
        resjson = await self.jsonmethod("data", **params)
        df = pandas.io.json.json_normalize(
            rejson['phedex']['dbs']['dataset'],
            )

In [48]:
data

<function __main__.data(self, **params)>

In [49]:
#json = [
    {
        'some': 'nested',
        'data': [
            {'key': 'val1'},
            {'key': 'val2'},
            {'key': 'val3'},
        ]
    },
    {
        'some': 'more',
        'data': [
            {'key': 'val4'},
            {'key': 'val5'},
            {'key': 'val6'},
        ]
    },
]
out = []
for toplevel in json:
    for secondlevel in toplevel['data']:
        out.append({
            'some': toplevel['some'],
            'key': secondlevel['key'],
        })
print(out)

IndentationError: unexpected indent (<ipython-input-49-8ae5c0c0030b>, line 2)

In [50]:
out = []
for _instance in data['phedex']['dbs']:
    for _dataset in _instance['dataset']:
        for _block in _dataset['block']:
            for _file in _block['file']:
                out.append({
                'Dataset': _dataset['name'],
                'Is dataset open': _dataset['is_open'],
                'block Name': _block['name'],
                'Block size (GB)': _block['bytes']/1000000000.0,
                'Time block was created': _block['time_create'],
                'File name': _file['lfn'],
                'File checksum': _file['checksum'],
                'File size':  _file['size'],
                'Time file was created': _file['time_create']
                })

TypeError: 'function' object is not subscriptable

In [55]:
df=pandas.json_normalize(out)
format_dates(df, ["Time file was created",'Time block was created'])

AttributeError: module 'pandas' has no attribute 'json_normalize'

In [90]:
import pandas
from pandas.io.json import json_normalize
import json
with open('erroglog.json') as f:
    data = json.load(f)

In [91]:
out = []
for _instance in data['phedex']['link']:
    for _block in _instance['block']:
        for _file in _block['file']:
            for _transfer_error in _file['transfer_error']:
                out.append({
                'Link': _instance['from']+' to '+_instance['to'],
                'LFN': _file['name'],
                'file Checksum': _file['checksum'],
                'file size (GB)': _file['size']/1000000000.0,
                'Block name': _block['name'],
                'Error log': str(_transfer_error['detail_log']['$t']),
                'From PFN': _transfer_error['from_pfn'],
                'To PFN': _transfer_error['to_pfn'],
                'Time': _transfer_error['time_done']
                    })
df=json_normalize(out)
format_dates(df, ["Time"])                

Unnamed: 0,Link,LFN,file Checksum,file size (GB),Block name,Error log,From PFN,To PFN,Time
0,T1_IT_CNAF_Buffer to T2_US_Caltech,/store/mc/RunIISummer16DR80Premix/TTTo2L2Nu_Tu...,"adler32:e69f054a,cksum:2164955100",4.057674,/TTTo2L2Nu_TuneCUETP8M2_ttHtranche3_13TeV-powh...,TRANSFER [125] TRANSFER Transfer canceled bec...,srm://storm-fe-cms.cr.cnaf.infn.it:8444/srm/ma...,gsiftp://transfer.ultralight.org:2811//mnt/had...,2020-03-23 18:00:29.930370092
1,T1_IT_CNAF_Buffer to T2_US_Caltech,/store/mc/RunIISummer16DR80Premix/TTTo2L2Nu_Tu...,"adler32:c58c9b75,cksum:655270540",2.205868,/TTTo2L2Nu_TuneCUETP8M2_ttHtranche3_13TeV-powh...,TRANSFER [125] TRANSFER Transfer canceled bec...,srm://storm-fe-cms.cr.cnaf.infn.it:8444/srm/ma...,gsiftp://transfer.ultralight.org:2811//mnt/had...,2020-03-23 19:01:47.449539900
2,T1_IT_CNAF_Buffer to T2_US_Caltech,/store/mc/RunIISummer16DR80Premix/QCD_HT300to5...,"adler32:b9b37cfd,cksum:753292588",2.978345,/QCD_HT300to500_TuneCUETP8M1_13TeV-madgraphMLM...,TRANSFER [110] SOURCE SRM_GET_TURL srm-ifce er...,srm://storm-fe-cms.cr.cnaf.infn.it:8444/srm/ma...,gsiftp://transfer.ultralight.org:2811//mnt/had...,2020-03-23 11:53:10.385149956
3,T1_IT_CNAF_Buffer to T2_US_Caltech,/store/mc/RunIISummer16DR80Premix/QCD_HT300to5...,"adler32:a58a41ab,cksum:2987891731",3.192424,/QCD_HT300to500_TuneCUETP8M1_13TeV-madgraphMLM...,TRANSFER [110] SOURCE SRM_GET_TURL srm-ifce er...,srm://storm-fe-cms.cr.cnaf.infn.it:8444/srm/ma...,gsiftp://transfer.ultralight.org:2811//mnt/had...,2020-03-23 11:53:10.383640051
4,T1_IT_CNAF_Buffer to T2_US_Caltech,/store/mc/RunIISummer16DR80Premix/QCD_HT300to5...,"adler32:2cea590,cksum:2302868579",3.642728,/QCD_HT300to500_TuneCUETP8M1_13TeV-madgraphMLM...,TRANSFER [110] SOURCE SRM_GET_TURL srm-ifce er...,srm://storm-fe-cms.cr.cnaf.infn.it:8444/srm/ma...,gsiftp://transfer.ultralight.org:2811//mnt/had...,2020-03-23 11:53:10.476039886
...,...,...,...,...,...,...,...,...,...
95,T1_IT_CNAF_Buffer to T2_US_Caltech,/store/mc/RunIISummer16DR80Premix/QCD_HT300to5...,"adler32:3e4c0bf7,cksum:2148375856",2.775506,/QCD_HT300to500_TuneCUETP8M1_13TeV-madgraphMLM...,TRANSFER [110] SOURCE SRM_GET_TURL srm-ifce er...,srm://storm-fe-cms.cr.cnaf.infn.it:8444/srm/ma...,gsiftp://transfer.ultralight.org:2811//mnt/had...,2020-03-23 11:47:24.131419897
96,T1_IT_CNAF_Buffer to T2_US_Caltech,/store/mc/RunIISummer16DR80Premix/QCD_HT300to5...,"adler32:fc4840d8,cksum:891338753",3.370152,/QCD_HT300to500_TuneCUETP8M1_13TeV-madgraphMLM...,TRANSFER [110] SOURCE SRM_GET_TURL srm-ifce er...,srm://storm-fe-cms.cr.cnaf.infn.it:8444/srm/ma...,gsiftp://transfer.ultralight.org:2811//mnt/had...,2020-03-23 11:47:18.237540007
97,T1_IT_CNAF_Buffer to T2_US_Caltech,/store/mc/RunIISummer16DR80Premix/QCD_HT300to5...,"adler32:2c3d9ca0,cksum:3721690423",3.320303,/QCD_HT300to500_TuneCUETP8M1_13TeV-madgraphMLM...,TRANSFER [110] SOURCE SRM_GET_TURL srm-ifce er...,srm://storm-fe-cms.cr.cnaf.infn.it:8444/srm/ma...,gsiftp://transfer.ultralight.org:2811//mnt/had...,2020-03-23 11:44:48.773030043
98,T1_IT_CNAF_Buffer to T2_US_Caltech,/store/mc/RunIISummer16DR80Premix/QCD_HT300to5...,"adler32:2fdfad2a,cksum:523422905",4.321990,/QCD_HT300to500_TuneCUETP8M1_13TeV-madgraphMLM...,TRANSFER [125] TRANSFER Transfer canceled bec...,srm://storm-fe-cms.cr.cnaf.infn.it:8444/srm/ma...,gsiftp://transfer.ultralight.org:2811//mnt/had...,2020-03-23 14:18:01.328530073


In [93]:
import pandas
from pandas.io.json import json_normalize
import json
with open('blockarrive_test_file.json') as f:
    data = json.load(f)

In [100]:
out = []
for _block in data['phedex']['block']:
    for _destination in _block['destination']:
        out.append({
        'Block Name': _block['name'],
        'Destination':_destination['name'],
        'Time Arrive': _destination['time_arrive'],
        'Time update':_destination['time_update'],
        'Number of files':_destination['files'],
        'Block size (GB)':_destination['bytes']/1000000000.0,
        'Basis code':_destination['basis']
                })
df=json_normalize(out)
format_dates(df, ["Time Arrive",'Time update'])        

Unnamed: 0,Block Name,Destination,Time Arrive,Time update,Number of files,Block size (GB),Basis code
0,/Cosmics/Run2017F-UL17-v1/RECO#ce4160b1-9b5c-4...,T1_FR_CCIN2P3_Disk,NaT,2020-03-24 14:59:31.233400106,1,5.22298,-1
1,/Cosmics/Run2017F-UL17-v1/RECO#7b7e6758-8ecb-4...,T1_RU_JINR_Disk,NaT,2020-03-24 14:59:31.233400106,3,14.737185,-1
2,/Cosmics/Run2017F-UL17-v1/RECO#b32f8297-7563-4...,T2_FR_CCIN2P3,NaT,2020-03-24 14:59:31.233400106,1,6.003502,-1
3,/Cosmics/Run2017F-UL17-v1/RECO#dd2b49b9-be51-4...,T2_IN_TIFR,NaT,2020-03-24 14:59:31.233400106,2,3.727002,1
4,/Cosmics/Run2017F-UL17-v1/RECO#00dd728f-9570-4...,T2_IN_TIFR,NaT,2020-03-24 14:59:31.233400106,1,0.000912,1
5,/Cosmics/Run2017F-UL17-v1/RECO#e5a98038-ffea-4...,T2_IN_TIFR,NaT,2020-03-24 14:59:31.233400106,1,0.005306,1
6,/Cosmics/Run2017F-UL17-v1/RECO#789ff325-caee-4...,T2_FR_GRIF_LLR,NaT,2020-03-24 14:59:31.233400106,1,0.00125,-1
7,/Cosmics/Run2017F-UL17-v1/RECO#19658bb3-3513-4...,T2_FR_CCIN2P3,NaT,2020-03-24 14:59:31.233400106,1,0.000824,-1


In [106]:
import pandas
from pandas.io.json import json_normalize
import json
with open('filereplicas_test_file.json') as f:
    data = json.load(f)

In [113]:
out = []
for _block in data['phedex']['block']:
    for _file in _block['file']:
        for _replica in _file['replica']:
            out.append({
            'Block_name':_block['name'],
            'Files in block':_block['files'],
            'Block size (GB)':_block['bytes']/1000000000.0,
            'File name':_file['name'],
            'File checksum':_file['checksum'],
            'File created on':_file['time_create'],
            'File replica at':_replica['node'],
            'File subcribed?':_replica['subscribed'],
            'Custodial?':_replica['custodial'],
            'Group':_replica['group'],
            'File in node since':_replica['time_create']
            })
df=json_normalize(out)
format_dates(df, ["File created on",'File in node since'])        

Unnamed: 0,Block_name,Files in block,Block size (GB),File name,File checksum,File created on,File replica at,File subcribed?,Custodial?,Group,File in node since
0,/Cosmics/Run2017F-UL17-v1/RECO#ce4160b1-9b5c-4...,1,5.22298,/store/data/Run2017F/Cosmics/RECO/UL17-v1/5000...,"adler32:acdf458d,cksum:2513236351",2020-03-22 15:48:18.569590092,T1_FR_CCIN2P3_Disk,y,n,DataOps,2020-03-22 15:48:18.569590092
1,/Cosmics/Run2017F-UL17-v1/RECO#577cfa61-1fc0-4...,14,59.397748,/store/data/Run2017F/Cosmics/RECO/UL17-v1/3000...,"adler32:6d9bcc89,cksum:389824848",2020-03-23 13:38:30.112149954,T1_FR_CCIN2P3_Disk,y,n,DataOps,2020-03-23 13:38:30.112149954
2,/Cosmics/Run2017F-UL17-v1/RECO#577cfa61-1fc0-4...,14,59.397748,/store/data/Run2017F/Cosmics/RECO/UL17-v1/3000...,"adler32:7fe29fff,cksum:665026416",2020-03-22 16:19:14.994199991,T1_FR_CCIN2P3_Disk,y,n,DataOps,2020-03-22 16:19:14.994199991
3,/Cosmics/Run2017F-UL17-v1/RECO#577cfa61-1fc0-4...,14,59.397748,/store/data/Run2017F/Cosmics/RECO/UL17-v1/3000...,"adler32:df4cec77,cksum:636884196",2020-03-22 14:00:25.953000069,T1_FR_CCIN2P3_Disk,y,n,DataOps,2020-03-22 14:00:25.953000069
4,/Cosmics/Run2017F-UL17-v1/RECO#577cfa61-1fc0-4...,14,59.397748,/store/data/Run2017F/Cosmics/RECO/UL17-v1/3000...,"adler32:c19c1c2c,cksum:2659889322",2020-03-22 14:00:25.953000069,T1_FR_CCIN2P3_Disk,y,n,DataOps,2020-03-22 14:00:25.953000069
5,/Cosmics/Run2017F-UL17-v1/RECO#577cfa61-1fc0-4...,14,59.397748,/store/data/Run2017F/Cosmics/RECO/UL17-v1/3000...,"adler32:28a46b6f,cksum:2611765294",2020-03-22 15:08:19.790260077,T1_FR_CCIN2P3_Disk,y,n,DataOps,2020-03-22 15:08:19.790260077
6,/Cosmics/Run2017F-UL17-v1/RECO#577cfa61-1fc0-4...,14,59.397748,/store/data/Run2017F/Cosmics/RECO/UL17-v1/3000...,"adler32:df1b5756,cksum:1088425438",2020-03-22 14:00:25.953000069,T1_FR_CCIN2P3_Disk,y,n,DataOps,2020-03-22 14:00:25.953000069
7,/Cosmics/Run2017F-UL17-v1/RECO#577cfa61-1fc0-4...,14,59.397748,/store/data/Run2017F/Cosmics/RECO/UL17-v1/3000...,"adler32:bf8c12eb,cksum:945287788",2020-03-22 14:00:25.953000069,T1_FR_CCIN2P3_Disk,y,n,DataOps,2020-03-22 14:00:25.953000069
8,/Cosmics/Run2017F-UL17-v1/RECO#577cfa61-1fc0-4...,14,59.397748,/store/data/Run2017F/Cosmics/RECO/UL17-v1/3000...,"adler32:6a8ea9c6,cksum:17034160",2020-03-22 14:00:25.953000069,T1_FR_CCIN2P3_Disk,y,n,DataOps,2020-03-22 14:00:25.953000069
9,/Cosmics/Run2017F-UL17-v1/RECO#577cfa61-1fc0-4...,14,59.397748,/store/data/Run2017F/Cosmics/RECO/UL17-v1/3000...,"adler32:b197664e,cksum:2716669583",2020-03-22 14:00:25.953000069,T1_FR_CCIN2P3_Disk,y,n,DataOps,2020-03-22 14:00:25.953000069
