# Parameters

In [63]:
db_host      = 'atlas-kibana.mwt2.org'
db_port      = 9200
db_index     = 'jobs_archive_2018-05-*'

query_source = [
    'taskid',
    'pandaid',
    'jobstatus',
    'computingsite',
    'jobdispatchererrorcode',
    'jobdispatchererrordiag',
    'transformation',
    'wall_time',
    'corecount',
    'maxpss',
    'maxrss',
    'exeerrordiag',
    'piloterrordiag',
    'modificationhost'

]

query_should = [              
    {'term':{'jobstatus': 'failed'}},
]

query_must_not = [
    {'term':{'gShare': 'Analysis'}},
]

# Query database

In [64]:
import datetime

from elasticsearch import Elasticsearch, helpers 
from elasticsearch.helpers import scan

es = Elasticsearch([{'host':db_host, 'port':db_port}], timeout=60)

indices = es.cat.indices(index=db_index, h='index', 
                         request_timeout=600).split('\n')
indices = sorted(indices)
indices = [ii for ii in indices if ii != '']
indices = ','.join(indices)

job_query = {
    'size'   : 0, 
    '_source': query_source, 
    'query':   {'bool': {'should': query_should, 
                         'minimum_should_match':1, 
                         'must_not': query_must_not}}
}

scroll = scan(client=es, index=indices, query=job_query)

results = []
count = 1

print ('inf> index: ', db_index)

for res in scroll:
    r = res['_source']
    job_info = []
    
    for source in query_source:
        job_info.append(r[source])
            
    if job_info[0] == None:
        continue
    if job_info[5] == None:
        continue
        
    if not 'lost heartbeat' in job_info[5]:
        continue
        
    if not 'TOKYO' in job_info[3]:
        continue
        
    results.append(job_info)
    
    if (count % 100000) == 0:
        print ('inf> processed', count, 'events')
    count += 1
    
print ('inf> total', len(results), 'tasks queried')

inf> index:  jobs_archive_2018-05-*
inf> total 40 tasks queried


In [65]:
%matplotlib inline
import matplotlib.pyplot as plt

wall_trans = {}
wall_site  = {}

for result in results:
    
    site    = result[3]
    process = result[6]
    core    = result[8]
    mpss    = result[9]
    mrss    = result[10]
    
    print (result[1], result[7], core, mpss, mrss, result[5],result[11],result[12], result[13] ) 
    
    if core == None:
        core = 1
    loss    = (result[7]/3600)*core
    
    if process in wall_trans.keys():
        wall_trans[process] += loss
    else:
        wall_trans[process] = loss
        
    if site in wall_site.keys():
        wall_site[site] += loss
    else:
        wall_site[site] = loss

data_trans = sorted(wall_trans.items(), key=lambda x: -x[1])  
data_site  = sorted(wall_site.items(), key=lambda x: -x[1])

print (data_trans)
print (data_site)

3922526694 20296 8 5291549 8804268 lost heartbeat : 2018-05-07 02:01:26 None None slot1_1@lcg-wn12-13.icepp.jp
3922526686 18448 8 4754803 8745340 lost heartbeat : 2018-05-07 01:30:50 None None slot1_3@lcg-wn12-07.icepp.jp
3918219416 313555 8 None None lost heartbeat : 2018-05-05 22:36:35 None None None
3918219415 313555 8 None None lost heartbeat : 2018-05-05 22:36:35 None None None
3923142128 94 1 None None lost heartbeat : 2018-05-07 10:40:55 None None None
3923142125 99 1 None None lost heartbeat : 2018-05-07 10:41:00 None None None
3923142127 99 1 None None lost heartbeat : 2018-05-07 10:41:00 None None None
3923264419 85 8 None None lost heartbeat : 2018-05-07 14:28:00 None None None
3923264420 90 8 None None lost heartbeat : 2018-05-07 14:28:05 None None None
3922526701 267 8 None None lost heartbeat : 2018-05-06 20:22:52 None None None
3915360879 259280 1 1606728 1616364 lost heartbeat : 2018-05-02 09:05:05 None None slot1_13@lcg-wn03-04.icepp.jp
3915490453 259153 1 1588581 1598