In [1]:
import boto3
import requests
import pandas as pd
import numpy as np
from edc import setup_environment_variables

setup_environment_variables()
bucket_name = %env AWS_BUCKET

API credentials have automatically been injected for your active subscriptions.  
The following environment variables are now available:
* `SH_CLIENT_ID`, `SH_CLIENT_NAME`, `SH_CLIENT_SECRET`, `SH_INSTANCE_ID`

The following additional environment variables have been loaded from `~/custom.env`:
* `AWS_BUCKET`


In [2]:
with open('requests.txt') as log:
    loglines = [line[line.rfind(']')+2:].rstrip() for line in log]
    s3requests = []
    
    for idx,ll in enumerate(loglines):
        if ll.startswith('g') and ll != loglines[-1] and loglines[idx+1].startswith('N'):
            s3requests.append([ll.split(' ')[-1],
                   int(loglines[idx+1].split(':')[-1]) + int(loglines[idx+2].split(':')[-1])])
            
s3requests = pd.DataFrame(s3requests, columns=['id', 's3requests'])
s3requests

Unnamed: 0,id,s3requests
0,aade3d2484804fafa9bc075baa755513,7610
1,b703127c649e459ebb5fb70ea0000ddd,22669
2,616500a3baed47c7bbcad3222658c5e5,3808
3,98721703b76243ee9bd25509f23c24aa,551
4,657742086071402a8c426e7fff9a3d6b,12196
5,1e19775eab6647418971b750e133529f,6095
6,94dc3d7fcc654ee38b615971ed6fa6ff,52038
7,10a6bbc2d49543f9a623542c74e6e050,41481
8,6c8268fb4d454e269e3db22a9373434b,3816
9,1a59306b151b45ff92202a08280c9c25,5186


In [3]:
jobs = requests.get('https://demo-m.hub.eox.at/jobs/').json()
jobs = [j for j in jobs if 'mdc' in j['properties']['config']['mapchete_config']['output']['path']]

print('Status of unfinished tiles', [j['properties']['progress_data'] for j in jobs if j['properties']['state'] != 'SUCCESS'])

results = pd.DataFrame([{
    'id': j['id'],
    'runtime': j['properties']['runtime'],
    'path': j['properties']['config']['mapchete_config']['output']['path'].replace('s3://eox-masterdatacube/', '')
} for j in jobs])

results = pd.merge(results,s3requests, how='left')
        

print(results)

metrics = pd.DataFrame(results.groupby('path').agg({'id': list,
                                                    'runtime': [np.sum, np.min, np.max],
                                                    's3requests': np.sum}))
metrics

Status of unfinished tiles [{'current': 172, 'total': 256}, None, None]
                                  id      runtime                       path  \
0   b703127c649e459ebb5fb70ea0000ddd   655.667576  mapchete_cubes/mdc_02_01/   
1   616500a3baed47c7bbcad3222658c5e5   464.066579  mapchete_cubes/mdc_02_01/   
2   98721703b76243ee9bd25509f23c24aa   784.729690  mapchete_cubes/mdc_02_01/   
3   657742086071402a8c426e7fff9a3d6b   629.219358  mapchete_cubes/mdc_02_01/   
4   1e19775eab6647418971b750e133529f   601.767835  mapchete_cubes/mdc_02_01/   
5   94dc3d7fcc654ee38b615971ed6fa6ff   573.001364  mapchete_cubes/mdc_02_01/   
6   10a6bbc2d49543f9a623542c74e6e050  1129.352243  mapchete_cubes/mdc_04_01/   
7   6c8268fb4d454e269e3db22a9373434b   749.995984  mapchete_cubes/mdc_04_01/   
8   1a59306b151b45ff92202a08280c9c25  1397.318961  mapchete_cubes/mdc_04_01/   
9   9b778ab8a8b54976bcc7837a6014c864  1076.571560  mapchete_cubes/mdc_04_01/   
10  58450d2e1f374e0d94519a22419aca20  1041.17784

Unnamed: 0_level_0,id,runtime,runtime,runtime,s3requests
Unnamed: 0_level_1,list,sum,amin,amax,sum
path,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
mapchete_cubes/mdc_02_01/,"[b703127c649e459ebb5fb70ea0000ddd, 616500a3bae...",3708.452402,464.066579,784.72969,97357.0
mapchete_cubes/mdc_04_01/,"[10a6bbc2d49543f9a623542c74e6e050, 6c8268fb4d4...",6499.19424,749.995984,1397.318961,119520.0
mapchete_cubes/mdc_06_01/,"[c1a70baf47e94d4d9498d5b0902264b2, 631083feac3...",4491.123461,1041.759525,1828.350021,0.0


In [4]:
s3 = boto3.resource('s3')
bk = s3.Bucket(bucket_name)

sizes = []

for path in metrics.index:
    objects = bk.objects.filter(Prefix=path+'5')
    sizes.append(sum([o.size for o in objects])/2**30)
    
metrics['size'] = sizes
metrics

Unnamed: 0_level_0,id,runtime,runtime,runtime,s3requests,size
Unnamed: 0_level_1,list,sum,amin,amax,sum,Unnamed: 6_level_1
path,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
mapchete_cubes/mdc_02_01/,"[b703127c649e459ebb5fb70ea0000ddd, 616500a3bae...",3708.452402,464.066579,784.72969,97357.0,7.212899
mapchete_cubes/mdc_04_01/,"[10a6bbc2d49543f9a623542c74e6e050, 6c8268fb4d4...",6499.19424,749.995984,1397.318961,119520.0,13.393613
mapchete_cubes/mdc_06_01/,"[c1a70baf47e94d4d9498d5b0902264b2, 631083feac3...",4491.123461,1041.759525,1828.350021,0.0,10.280537
