In [2]:
import numpy as np
import matplotlib.pyplot as plt
import datetime
import re
from elasticsearch import Elasticsearch, exceptions as es_exceptions
from elasticsearch.helpers import scan
es = Elasticsearch(hosts=[{'host':'atlas-kibana.mwt2.org', 'port':9200}],timeout=60)

In [3]:
#get job archive indices from ES
indices = es.cat.indices(index="jobs_archive_*", h="index", request_timeout=600).split('\n')
indices = sorted(indices)
indices = [x for x in indices if x != '']
#print(indices)

In [4]:
#define function to filter on time
def time_filter(indices, last_days=1, pattern=''):
    if last_days == 0:
        return ["jobs_archive_*"]
    filtered = []
    if pattern:
        for i in indices:
            if pattern in i:
                filtered.append(i.rstrip())
        return filtered
    today = datetime.date.today()
    filtered = []
    datefmt = '%Y-%m-%d'
    for i in indices:
        day = re.sub(r'jobs_archive_', '', i).rstrip()
        day = datetime.datetime.strptime(day, datefmt).date()
        diff = today - day
        if diff.days < last_days:
            filtered.append(i.rstrip())
    return filtered

In [8]:
# use e.g. last_days=7 or pattern='2016-02' (no wildcard !)
ind = time_filter(indices, last_days=1, pattern='')
ind = ','.join(ind)
print(ind)

jobs_archive_2016-07-13


In [20]:
#test query
res = es.search(index=ind, q="jobstatus:finished AND computingsite:* AND (destinationdblock:hc_test*tid601* OR destinationdblock:hc_test*tid812*)", request_timeout=600)
print(res)

{'hits': {'max_score': 1.4747326, 'total': 659, 'hits': [{'_id': '2921297185', '_type': 'jobs_data', '_score': 1.4747326, '_source': {'avgvmem': 1813502, 'avgrss': 1141085, 'piloterrordiag': None, 'destinationse': 'IL-TAU-HEP', 'timeSetup': 0, 'commandtopilot': None, 'superrordiag': None, 'maxcpuunit': None, 'starttime': '2016-07-12T23:22:03', 'modificationhost': 'tau-wn56.hep.tau.ac.il', 'produserid': '/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=gangarbt/CN=601592/CN=Robot: Ganga Robot/CN=proxy', 'jobsetid': -1, 'pilotid': 'http://aipanda063.cern.ch/pilots/2016-07-12/IL-TAU-HEP-tau-cream-atlas-pbs-2355/31367136.0.out|6123538.tau-cream.hep.tau.ac.il|Torque|PR|PICARD 65.4\n', 'jeditaskid': None, 'brokerageerrordiag': None, 'timeExe': 2324, 'prodserieslabel': None, 'batchid': '6123538.tau-cream.hep.tau.ac.il', 'timeStageIn': 4, 'ipconnectivity': None, 'actualcorecount': None, 'superrorcode': 0, 'pilottiming': '0|4|2324|5|0', 'transfertype': None, 'workqueue_id': None, 'jobsubstatus': Non

In [6]:
myquery = {
  "query": {
    "filtered": {
      "query": {
        "query_string": {
          "query": "jobstatus:finished AND (destinationdblock:hc_test*tid601* OR destinationdblock:hc_test*tid812*)",
          "analyze_wildcard": True,
          "lowercase_expanded_terms": False
        }
      }
    }
  },
  "size": 0,
  "aggs": {
    "2": {
      "histogram": {
        "script": "(doc['nevents'].value) ? doc['wall_time'].value*10/doc['nevents'].value : 0",
        "lang": "expression",
        "interval": 5
      },
      "aggs": {
        "3": {
          "significant_terms": {
            "field": "computingsite",
            "size": 10
          }
        }
      }
    }
  }
}

In [7]:
res = es.search(index=ind, body=myquery, request_timeout=600)
print(res)

{'aggregations': {'2': {'buckets': [{'key': 1250, 'doc_count': 1, '3': {'doc_count': 1, 'buckets': []}}, {'key': 1255, 'doc_count': 0, '3': {'doc_count': 0, 'buckets': []}}, {'key': 1260, 'doc_count': 0, '3': {'doc_count': 0, 'buckets': []}}, {'key': 1265, 'doc_count': 1, '3': {'doc_count': 1, 'buckets': []}}, {'key': 1270, 'doc_count': 2, '3': {'doc_count': 2, 'buckets': []}}, {'key': 1275, 'doc_count': 0, '3': {'doc_count': 0, 'buckets': []}}, {'key': 1280, 'doc_count': 1, '3': {'doc_count': 1, 'buckets': []}}, {'key': 1285, 'doc_count': 1, '3': {'doc_count': 1, 'buckets': []}}, {'key': 1290, 'doc_count': 0, '3': {'doc_count': 0, 'buckets': []}}, {'key': 1295, 'doc_count': 4, '3': {'doc_count': 4, 'buckets': []}}, {'key': 1300, 'doc_count': 1, '3': {'doc_count': 1, 'buckets': []}}, {'key': 1305, 'doc_count': 0, '3': {'doc_count': 0, 'buckets': []}}, {'key': 1310, 'doc_count': 1, '3': {'doc_count': 1, 'buckets': []}}, {'key': 1315, 'doc_count': 1, '3': {'doc_count': 1, 'buckets': []}}

In [None]:
hists = []
xbins = []
rels = []

# First loop: determine AtlasRelease numbers and initialize histograms
for r in res['aggregations']['2']['buckets']:
    for e in r['3']['buckets']:
        if not e['key'] in rels:
            rels.append(e['key'])
            hists.append([])

# Now loop again and fill the histogram bins            
xbin = 0 # counter for x-axis bins
xcut = 80 # This is the x-axis cut !!!
for r in res['aggregations']['2']['buckets']:
    x = r['key']/10
    if x>xcut: # This is the x-axis cut !
        continue
    xbins.append(x)
    for hist in hists:
        hist.append(0)
    for e in r['3']['buckets']:
        #print(e)
        for rel, hist in zip(rels,hists):
            if e['key'] == rel:
                hist[xbin] = e['doc_count']
    xbin = xbin + 1

print(hists)
print(len(xbins),len(hists[0]))

# Initialisation for histogram stacking
y_offset = np.array([0.0] * len(xbins))
#colors = plt.cm.BuPu(np.linspace(0, 0.5, len(hists)))
allcolors = ('#ff3333', '#33ff33', '#3333ff', '#33ffff', '#ffffff', '#000000', '#ff0000', '#00ff00', '#0000ff')
colors = allcolors[:len(hists)]

print(len(hists),len(colors))

for hist, rel, icolor in zip(hists,rels,colors):
    #print(hist,xbins)
    #plt.plot(xbins,hist)
    plt.bar(xbins, hist, bottom=y_offset, log=True, label=rel, color=icolor)
    y_offset = y_offset + hist
    #plt.hist(hist,bins=xbins, log=True, label=rels[i])
    plt.xlabel('Walltime per Event [s]')
    plt.ylabel('Jobs')

plt.legend(loc='upper right')
plt.figure(figsize=(20, 20))
plt.savefig("WallTimePerEvent.png")
plt.show()

[[], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], [], []]
0 0
64 9
