calculate the core power for BOINC tasks

In [1]:
%matplotlib inline
import datetime
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import re
import time
from elasticsearch import Elasticsearch, helpers

In [2]:
last_days=120
Power=[{'pat':r'.*aws16.*ihep.ac.cn','pvalue':16},{'pat':r'.*lwn14.*ihep.ac.cn','pvalue':20},{'pat':r'.*aws13.*ihep.ac.cn','pvalue':8}]

In [3]:
es = Elasticsearch([{'host':'atlas-kibana.mwt2.org', 'port':9200}],timeout=60)

indices = es.cat.indices(index="jobs_archive_*", h="index", request_timeout=600).split('\n')

indices = sorted(indices)
indices = [x for x in indices if x != '' and not x.endswith('_reindexed')]
#print(indices)
def time_filter(indices, last_days=1):
    if last_days == 0:
        return ["jobs_archive_*"]
    today = datetime.date.today()
    filtered = []
    datefmt = '%Y-%m-%d'
    for i in indices:
        day = re.sub(r'jobs_archive_', '', i).rstrip()
        day = datetime.datetime.strptime(day, datefmt).date()
        diff = today - day
        if diff.days < last_days:
            filtered.append(i.rstrip())
    return filtered

ind = time_filter(indices, last_days)
print ("number of indices:", len(ind))
#print(ind)
ind = ','.join(ind)
#print(ind)

number of indices: 120


In [4]:
etime=int(time.time()*1000)
stime=etime-3600*24*last_days*1000
my_query={
  "size": 0,
  "aggs": {
    "task": {
      "terms": {
        "field": "jeditaskid",
        "size": 100,
        "order": {
          "_count": "desc"
        }
      }
    }
  },
  "query": {
    "bool": {
      "must": [
        {
          "match_all": {}
        },
        {
          "match_phrase": {
            "computingsite": {
              "query": "BOINC_MCORE"
            }
          }
        },
        {
          "match_phrase": {
            "jobstatus": {
              "query": "finished"
            }
          }
        },
        {
          "range": {
            "modificationtime": {
              "gte": stime,
              "lte": etime,
              "format": "epoch_millis"
            }
          }
        }
      ],
      "must_not": []
    }
   }
}
res = es.search(index=ind, body=my_query, request_timeout=12000)
#print(res)
buckets=res['aggregations']['task']['buckets']
tasks=[]
bs=len(buckets)
for i in range(bs):
    tid=buckets[i]['key']
    cn=buckets[i]['doc_count']
    #print(tid,cn)
    if cn>1000:
        tasks.append(tid)
print(tasks)

[12515739, 12236561, 12678480, 12640557, 12790254, 12577096, 12732603, 12484795, 12484780, 12341011, 12206541, 12206369, 12763289, 12065733, 12065766, 12236680, 12081264, 12189412, 12236631, 12236655, 12189371, 12236607, 12065690, 12858266, 12885762, 12866455, 12065801, 12189391, 12206399, 12236667, 12189348, 12236643, 12081242, 12236619, 12236595, 12206424, 12206453, 12096878, 12096960, 12097005, 12097038, 12097087, 12906983, 12065661, 12096934, 12096853, 12097112, 12096906]


### now we want to find all the jobs of this taskid

In [5]:
def get_avg_boinc_power_by_task(tasks,res):
    (total_cpu,total_events,cn_boinc,ihep_total_cpu,ihep_total_events,cn_ihep)=({},{},{},{},{},{})
    for r in res:
        if r['_source']['cpuconsumptiontime']<1000000 and r['_source']['nevents']!=0 and "modificationhost" in r['_source'] and type(r['_source']['modificationhost']) is str:
            cpuperevent=float(r['_source']['cpuconsumptiontime'])/r['_source']['nevents']
        else:
            continue
        mhost=r['_source']['modificationhost']
        jediid=r['_source']['jeditaskid']
        if jediid in tasks:  
            if jediid not in total_cpu:
                total_cpu[jediid]=0
                total_events[jediid]=0
                cn_boinc[jediid]=0
                ihep_total_cpu[jediid]={}
                ihep_total_events[jediid]={}
                cn_ihep[jediid]={}
            else:
                total_cpu[jediid]+=r['_source']['cpuconsumptiontime']
                total_events[jediid]+=r['_source']['nevents']
                cn_boinc[jediid]+=1
            for P in Power:
                pat=P['pat']
                pvalue=P['pvalue']
                if re.match(pat,mhost):
                    if not pat in ihep_total_cpu[jediid]:
                        ihep_total_cpu[jediid][pat]=0
                        ihep_total_events[jediid][pat]=0
                        cn_ihep[jediid][pat]=0
                    else:
                        ihep_total_cpu[jediid][pat]+=r['_source']['cpuconsumptiontime']
                        ihep_total_events[jediid][pat]+=r['_source']['nevents']
                        cn_ihep[jediid][pat]+=1
    avg_cpuperevent={}
    for jediid in tasks:
        if total_events[jediid]!=0:
            avg_cpuperevent[jediid]=float(total_cpu[jediid])/total_events[jediid]
    ihep_avg_cpuperevent={}
    for jediid in ihep_total_cpu.keys():
        ihep_avg_cpuperevent[jediid]={}
        for pat in ihep_total_cpu[jediid].keys():
            if ihep_total_events[jediid][pat]!=0:
                ihep_avg_cpuperevent[jediid][pat]=float(ihep_total_cpu[jediid][pat])/ihep_total_events[jediid][pat]
            else:
                ihep_avg_cpuperevent[jediid][pat]=0
    
    head=["days","taskid","jobs","core power","avg_cpuperevent","nevents"]
    total_power_task=0
    total_cpu_task=0
    dlist=[]
   
    for jediid in ihep_total_cpu.keys():
        total_cpu_task+=total_cpu[jediid]
        boinc_power={}
        total_power,cn=0,0
        for pat in ihep_total_cpu[jediid]:
            if ihep_avg_cpuperevent[jediid][pat]!=0:
                for P in Power:
                    if pat in P.values():
                        boinc_power[jediid]=P["pvalue"]*ihep_avg_cpuperevent[jediid][pat]/avg_cpuperevent[jediid]
                    #print(pat,boinc_power)
                        total_power+=boinc_power[jediid]
                        cn+=1
        if cn!=0:
            avg_boinc_power=float(total_power)/cn
            total_power_task+=avg_boinc_power*total_cpu[jediid]
            #print("%d days\ttaskid:%s\tjobs:%s\tBOINC power:%.1f\tavg_cpuperevent:%.0f"%(last_days,jediid,cn_boinc[jediid],avg_boinc_power,avg_cpuperevent[jediid]))
            #print("%d\t%s\t%s\t%.1f\t%.0f"%(last_days,jediid,cn_boinc[jediid],avg_boinc_power,avg_cpuperevent[jediid]))
            t=[last_days,jediid,cn_boinc[jediid],avg_boinc_power,avg_cpuperevent[jediid],total_events[jediid]]
            dlist.append(t)
    avg_power=total_power_task/total_cpu_task
    print("Avg. power over all task:%.2f"%avg_power)
    df = pd.DataFrame(dlist,columns=head)
    print(df.sort_values(by="jobs",ascending=False))
    df.to_csv("task.csv")
 
my_query={
    "size": 0,
    "_source": ["modificationhost", "cpuconsumptiontime","nevents","jeditaskid"],
  "query": {
    "bool": {
      "must": [
        {
          "term": {"jobstatus": "finished"},
          "term": {"computingsite":"BOINC_MCORE"}
        }
      ]
    }
  }
}
#for task in tasks:
res = helpers.scan(es, query=my_query, index=ind, scroll='5m', timeout="5m", size=1000)
get_avg_boinc_power_by_task(tasks,res)


Avg. power over all task:12.09
    days    taskid    jobs  core power  avg_cpuperevent   nevents
5    120  12236561  197957   13.235872       201.939923   9897850
11   120  12515739  187477   12.534762       120.570257   9373850
31   120  12678480  124479    9.522250       266.448247   6223950
18   120  12640557   71670    9.706369       272.601105   3583500
42   120  12790254   53190   12.031117       170.687845  10638000
20   120  12732603   36520    9.972693       271.388710   7303900
45   120  12484795   29839   13.953039       224.853699   1491950
17   120  12484780   29836   13.874798       216.367153   1491800
26   120  12577096   24971   12.348318       160.341266   4994200
6    120  12341011   20023   14.457501       159.586599   1001150
28   120  12206541   19612   14.729783       247.086508    980600
8    120  12866455   19353   11.999282       349.905526   3870550
13   120  12206369   19169   14.439697        86.653812    958450
9    120  12763289   18451   11.217264       