In [1]:
import boto3
import requests
import pandas as pd
import numpy as np
from edc import setup_environment_variables

setup_environment_variables()
bucket_name = %env AWS_BUCKET

API credentials have automatically been injected for your active subscriptions.  
The following environment variables are now available:
* `GEODB_API_SERVER_PORT`, `GEODB_API_SERVER_URL`, `GEODB_AUTH_AUD`, `GEODB_AUTH_CLIENT_ID`, `GEODB_AUTH_CLIENT_SECRET`, `GEODB_AUTH_DOMAIN`
* `SH_CLIENT_ID`, `SH_CLIENT_NAME`, `SH_CLIENT_SECRET`, `SH_INSTANCE_ID`

The following additional environment variables have been loaded from `~/custom.env`:
* `AWS_BUCKET`


In [2]:
with open('requests.txt') as log:
    loglines = [line[line.rfind(']')+2:].rstrip() for line in log]
    s3requests = []
    
    for idx,ll in enumerate(loglines):
        if ll.startswith('g') and ll != loglines[-1] and loglines[idx+1].startswith('N'):
            s3requests.append([ll.split(' ')[-1],
                   int(loglines[idx+1].split(':')[-1])])# + int(loglines[idx+2].split(':')[-1])])
            
s3requests = pd.DataFrame(s3requests, columns=['id', 's3requests'])
s3requests

Unnamed: 0,id,s3requests
0,aade3d2484804fafa9bc075baa755513,69
1,b703127c649e459ebb5fb70ea0000ddd,22411
2,616500a3baed47c7bbcad3222658c5e5,3550
3,98721703b76243ee9bd25509f23c24aa,293
4,657742086071402a8c426e7fff9a3d6b,11938
5,1e19775eab6647418971b750e133529f,5837
6,94dc3d7fcc654ee38b615971ed6fa6ff,51780
7,10a6bbc2d49543f9a623542c74e6e050,41223
8,6c8268fb4d454e269e3db22a9373434b,3558
9,1a59306b151b45ff92202a08280c9c25,4928


In [3]:
jobs = requests.get('https://demo-m.hub.eox.at/jobs/').json()
jobs = [j for j in jobs if 'mdc' in j['properties']['config']['mapchete_config']['output']['path']]

print('Status of unfinished tiles', [j['properties']['progress_data'] for j in jobs if j['properties']['state'] != 'SUCCESS'])

results = pd.DataFrame([{
    'id': j['id'],
    'runtime': j['properties']['runtime'],
    'path': j['properties']['config']['mapchete_config']['output']['path'].replace('s3://eox-masterdatacube/', '')
} for j in jobs])

results = pd.merge(results,s3requests, how='left')

print(results)

metrics = pd.DataFrame(results.groupby('path').agg({'id': list,
                                                    'runtime': [np.sum, np.min, np.max],
                                                    's3requests': np.sum}))

metrics

Status of unfinished tiles []
                                  id      runtime                       path  \
0   b703127c649e459ebb5fb70ea0000ddd   655.667576  mapchete_cubes/mdc_02_01/   
1   616500a3baed47c7bbcad3222658c5e5   464.066579  mapchete_cubes/mdc_02_01/   
2   98721703b76243ee9bd25509f23c24aa   784.729690  mapchete_cubes/mdc_02_01/   
3   657742086071402a8c426e7fff9a3d6b   629.219358  mapchete_cubes/mdc_02_01/   
4   1e19775eab6647418971b750e133529f   601.767835  mapchete_cubes/mdc_02_01/   
5   94dc3d7fcc654ee38b615971ed6fa6ff   573.001364  mapchete_cubes/mdc_02_01/   
6   10a6bbc2d49543f9a623542c74e6e050  1129.352243  mapchete_cubes/mdc_04_01/   
7   6c8268fb4d454e269e3db22a9373434b   749.995984  mapchete_cubes/mdc_04_01/   
8   1a59306b151b45ff92202a08280c9c25  1397.318961  mapchete_cubes/mdc_04_01/   
9   9b778ab8a8b54976bcc7837a6014c864  1076.571560  mapchete_cubes/mdc_04_01/   
10  58450d2e1f374e0d94519a22419aca20  1041.177847  mapchete_cubes/mdc_04_01/   
11  473cdd

Unnamed: 0_level_0,id,runtime,runtime,runtime,s3requests
Unnamed: 0_level_1,list,sum,amin,amax,sum
path,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
mapchete_cubes/mdc_02_01/,"[b703127c649e459ebb5fb70ea0000ddd, 616500a3bae...",3708.452402,464.066579,784.72969,95809
mapchete_cubes/mdc_04_01/,"[10a6bbc2d49543f9a623542c74e6e050, 6c8268fb4d4...",6499.19424,749.995984,1397.318961,117972
mapchete_cubes/mdc_06_01/,"[c1a70baf47e94d4d9498d5b0902264b2, 631083feac3...",8817.409989,1041.759525,1828.350021,349419
mapchete_cubes/mdc_08_01/,"[dbc4188882d24938a2debe6be29c1dc1, 72646899d54...",11227.24679,1330.166794,2338.528545,536730
mapchete_cubes/mdc_10_01/,"[5c0e0c98596f49d7a3567c07e3f00bb1, 930d8cadf99...",14338.95107,1695.721057,2947.752656,575558
mapchete_cubes/mdc_12_01/,"[4ad0ccbb9ad04a6e8671e24cfefe13ca, fb1e013d627...",17153.962519,2002.482632,3539.18779,648850
mapchete_cubes/mdc_17_01/,"[0a312ca669d94531b27a5d3d6265cba8, b893c79df42...",24133.4662,2876.555089,5041.536348,901076


In [4]:
s3 = boto3.resource('s3')
bk = s3.Bucket(bucket_name)

sizes = []

for path in metrics.index:
    objects = bk.objects.filter(Prefix=path+'5')
    sizes.append(sum([o.size for o in objects])/2**30)
    
metrics['size'] = sizes
metrics

Unnamed: 0_level_0,id,runtime,runtime,runtime,s3requests,size
Unnamed: 0_level_1,list,sum,amin,amax,sum,Unnamed: 6_level_1
path,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
mapchete_cubes/mdc_02_01/,"[b703127c649e459ebb5fb70ea0000ddd, 616500a3bae...",3708.452402,464.066579,784.72969,95809,7.212899
mapchete_cubes/mdc_04_01/,"[10a6bbc2d49543f9a623542c74e6e050, 6c8268fb4d4...",6499.19424,749.995984,1397.318961,117972,13.393613
mapchete_cubes/mdc_06_01/,"[c1a70baf47e94d4d9498d5b0902264b2, 631083feac3...",8817.409989,1041.759525,1828.350021,349419,18.780106
mapchete_cubes/mdc_08_01/,"[dbc4188882d24938a2debe6be29c1dc1, 72646899d54...",11227.24679,1330.166794,2338.528545,536730,23.463265
mapchete_cubes/mdc_10_01/,"[5c0e0c98596f49d7a3567c07e3f00bb1, 930d8cadf99...",14338.95107,1695.721057,2947.752656,575558,29.429667
mapchete_cubes/mdc_12_01/,"[4ad0ccbb9ad04a6e8671e24cfefe13ca, fb1e013d627...",17153.962519,2002.482632,3539.18779,648850,34.245206
mapchete_cubes/mdc_17_01/,"[0a312ca669d94531b27a5d3d6265cba8, b893c79df42...",24133.4662,2876.555089,5041.536348,901076,48.370471


In [5]:
metrics.to_csv('metrics.csv')