In [1]:

import sys,re 
import datetime
import pprint
from elasticsearch import Elasticsearch, helpers

from __future__ import division

%matplotlib inline

es = Elasticsearch([{'host':'atlas-kibana.mwt2.org', 'port':9200}],timeout=60)

indices = es.cat.indices(index="jobs_archive_*", h="index", request_timeout=600).split('\n')
indices = sorted(indices)
indices = [x for x in indices if x != '']

def time_filter(indices, last_days=1):
    if last_days == 0:
        return ["jobs_archive_*"]
    today = datetime.date.today()
    filtered = []
    datefmt = '%Y-%m-%d'
    for i in indices:
        day = re.sub(r'jobs_archive_', '', i).rstrip()
        day = datetime.datetime.strptime(day, datefmt).date()
        diff = today - day
        if diff.days < last_days:
            filtered.append(i.rstrip())
    return filtered

ind = time_filter(indices, 100)
print ("indices:", len(ind))
ind = ','.join(ind)
print(ind)

indices: 100
jobs_archive_2016-03-08,jobs_archive_2016-03-09,jobs_archive_2016-03-10,jobs_archive_2016-03-11,jobs_archive_2016-03-12,jobs_archive_2016-03-13,jobs_archive_2016-03-14,jobs_archive_2016-03-15,jobs_archive_2016-03-16,jobs_archive_2016-03-17,jobs_archive_2016-03-18,jobs_archive_2016-03-19,jobs_archive_2016-03-20,jobs_archive_2016-03-21,jobs_archive_2016-03-22,jobs_archive_2016-03-23,jobs_archive_2016-03-24,jobs_archive_2016-03-25,jobs_archive_2016-03-26,jobs_archive_2016-03-27,jobs_archive_2016-03-28,jobs_archive_2016-03-29,jobs_archive_2016-03-30,jobs_archive_2016-03-31,jobs_archive_2016-04-01,jobs_archive_2016-04-02,jobs_archive_2016-04-03,jobs_archive_2016-04-04,jobs_archive_2016-04-05,jobs_archive_2016-04-06,jobs_archive_2016-04-07,jobs_archive_2016-04-08,jobs_archive_2016-04-09,jobs_archive_2016-04-10,jobs_archive_2016-04-11,jobs_archive_2016-04-12,jobs_archive_2016-04-13,jobs_archive_2016-04-14,jobs_archive_2016-04-15,jobs_archive_2016-04-16,jobs_archive_2016-04-17,job

In [2]:
s = {
    "size": 0,
    "query": {
        "filtered": {
            "filter": {
                "bool": {
                    "filter": [
                        { 
                            "match": {
                                "prodsourcelabel":"managed"
                            }
                        },
                        {
                            "match": {
                                "jobstatus": "finished"
                            }
                        },
                        {
                            "range": {
                                "nevents": {"gte": 1}
                            }
                        },
                        {
                            "range": {
                                "corecount": {"gte": 1}
                            }
                        },
                        {
                            "range": {
                                "wall_time": {"gt": 60}
                            }
                        }
                    ],
                    "must_not": [
                        {
                            "query": {
                                "wildcard": {
                                    "processingtype": "panda*"
                                }
                            }
                        },
                        {
                            "query": {
                                "wildcard": {
                                    "processingtype": "merge*"
                                }
                            }
                        },
                        {
                            "query": {
                                "wildcard": {
                                    "processingtype": "pmerge*"
                                }
                            }
                        }                 
                    ]
                }
            }
        }
    },
    "aggs": {
        "ptypes": {
            "terms": {
                "field": "processingtype",
                "size": 1000
            },
            "aggs": {
                "transf": {
                    "terms": {
                        "field": "transformation",
                        "size": 1000
                    },
                    "aggs": {
                        "atlrel": {
                            "terms": {
                                "field": "atlasrelease",
                                "size": 1000
                            },
                            "aggs": {
                                "site": {
                                    "terms": {
                                        "field": "computingsite",
                                        "size": 1000
                                    },
                                    "aggs": {
                                        "jtask": {
                                            "terms": {
                                                "field": "jeditaskid",
                                                "size": 100000
                                            },
                                            "aggs": {
                                                "1": {
                                                    "extended_stats": {
                                                        "script": "(doc['nevents'].value) ? doc['cpuconsumptiontime'].value/doc['nevents'].value : 0",
                                                        "lang": "expression"
                                                    }
                                                },
                                                "2": {
                                                    "extended_stats": {
                                                        "script": "(doc['nevents'].value) ? doc['wall_time'].value * doc['corecount'].value / doc['nevents'].value : 0",
                                                        "lang": "expression"
                                                    }
                                                },
                                                "3": {
                                                    "sum": {
                                                        "field": "cpuconsumptiontime"
                                                    }
                                                },
                                                "4": {
                                                    "sum": {
                                                        "script": "doc['wall_time'].value * doc['corecount'].value",
                                                        "lang": "expression"
                                                    }
                                                },
                                                "6": {
                                                    "avg": {
                                                        "field": "corecount"
                                                    }
                                                },
                                                "7": {
                                                    "sum": {
                                                        "field": "nevents"
                                                    }
                                                },
                                                "8": {
                                                    "avg": {
                                                        "script": "doc['cpuconsumptiontime'].value/doc['wall_time'].value/doc['corecount'].value",
                                                        "lang": "expression"
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
res = es.search(index=ind, body=s, request_timeout=12000)

In [3]:
print('took:',res['took'],'\tshards:', res['_shards'],'\thits:',res['hits'])
# print('size returned:',sys.getsizeof(res))

took: 42854 	shards: {'successful': 500, 'total': 500, 'failed': 0} 	hits: {'hits': [], 'total': 16217555, 'max_score': 0.0}
