In [3]:
import seaborn as sns
import pandas as pd
import befaas.logentry as le
from befaas.fileutil import log_entry_iterator

filepath = "../dumps/"
logdumps = ["dump_webservice_aws_23.json","dump_webservice_google_23.json","dump_webservice_azure_23.json"]
oufile = "../img/computeNetworkDbEcomm.pkl"

print("Everything is set up.")

Everything is set up.


In [4]:
plotdata = []

for dump in logdumps:
    print(f"Include dump {dump} ...")
    # Count entries to get some progress feedback
    entries = 0
    contexts = {}
    otherUserAgentContextList = []
    relevantContextList = []

    for entry in log_entry_iterator(filepath + dump):
        entries = entries + 1
        if (entries % 10000 == 0):
            print(f"(I)  processed {entries} entries")

        logentry = le.cast_log_type(entry)
        if isinstance(logentry, le.RequestLog):
            userAgent = logentry.event['request']['headers']['user-agent']
            if not str(userAgent).startswith("node-fetch") | str(userAgent).startswith("Artillery"):
                print(f"Other user agent, will filter: {userAgent}")
                otherUserAgentContextList.append(logentry.context_id)

        if logentry.function == "addcartitem" and not (logentry.context_id in relevantContextList):
            relevantContextList.append(logentry.context_id)

    # Init stats for dump
    entries = 0
    measurements = {}


    for entry in log_entry_iterator(filepath + dump):
        entries = entries + 1
        if (entries % 10000 == 0):
            print(f"(II) processed {entries} entries")

        logentry = le.cast_log_type(entry)
        id = logentry.context_id
        if isinstance(logentry, le.PerfLog) and id in relevantContextList:
            context_ok = True
            fnName = logentry.function
            if fnName == "addcartitem" or fnName == "cartkvstorage":
                if id in otherUserAgentContextList:
                    context_ok = False
                    print(f"context filter applied to context {id}")

                if context_ok:
                    # Create context if there isn't one
                    if not (id in contexts):
                        contexts[id] = []
                        measurements[id] = {}
                        measurements[id]["compute"] = 0.0
                        measurements[id]["network"] = 0.0
                        measurements[id]["db"] = 0.0
                        measurements[id]["op_cpu"] = 0
                        measurements[id]["op_nw"] = 0
                        measurements[id]["op_db"] = 0

                    # Check if there is already the same entry for this context (duplicate)
                    for checkEntry in contexts[id]:
                        if str(checkEntry) == str(entry):
                            # duplicate -> do not evaluate
                            context_ok = False
                            break

                if context_ok and logentry.type == "measure":
                    # context is ok => add to context list and add to stats
                    contexts[id].append(entry)

                    duration = logentry.perf["duration"]
                    if duration < 0:
                        print(f"ERROR: negative duration for entry {entry}")

                    plattform = logentry.platform


                    if logentry.perf_type[1] == "rpcIn":
                        #Type is complete (inner function) call (but not the root one) (e.g., D or E)
                        measurements[id]["compute"] += duration
                        measurements[id]["op_cpu"] += 1

                        if fnName != "addcartitem":
                            # not a root call
                            measurements[id]["network"] -= duration
                            measurements[id]["op_nw"] += 1

                    elif logentry.perf_type[1] == "rpcOut":
                        # Type is partcall (e.g., B or C)
                        measurements[id]["compute"] -= duration
                        measurements[id]["network"] += duration
                        measurements[id]["op_cpu"] += 1
                        measurements[id]["op_nw"] += 1
                    elif logentry.perf_type[1] == "dbOut":
                        #type is DB call (e.g., F)
                        measurements[id]["compute"] -= duration
                        measurements[id]["db"] += duration
                        measurements[id]["op_cpu"] += 1
                        measurements[id]["op_db"] += 1
                    else:
                        # type is something else -> Print
                        print("unhandled type, pls check:")
                        for perfType in logentry.perf_type:
                            print(f"type is {perfType}")

    # Write measurements in plotdata
    for ctx_id in measurements:
        if measurements[ctx_id]["compute"] < 0:
            print(f"negative computing duration for context {ctx_id}")
            continue
        if measurements[ctx_id]["network"] < 0:
            print(f"negative network duration for context {ctx_id}")
            continue
        if measurements[ctx_id]["db"] < 0:
            print(f"negative database duration for context {ctx_id}")
            continue

        row = {}
        row["id"] = ctx_id
        row["plattform"] = plattform
        row["class"] = "compute"
        row["ops"] = measurements[ctx_id]["op_cpu"]
        row["duration"] = measurements[ctx_id]["compute"]
        plotdata.append(row)

        row = {}
        row["id"] = ctx_id
        row["plattform"] = plattform
        row["class"] = "network"
        row["ops"] = measurements[ctx_id]["op_nw"]
        row["duration"] = measurements[ctx_id]["network"]
        plotdata.append(row)

        row = {}
        row["id"] = ctx_id
        row["plattform"] = plattform
        row["class"] = "db"
        row["ops"] = measurements[ctx_id]["op_db"]
        row["duration"] = measurements[ctx_id]["db"]
        plotdata.append(row)

Include dump dump_webservice_aws_23.json ...
Iterate over ../dumps/dump_webservice_aws_23.json ...


FileNotFoundError: [Errno 2] No such file or directory: '../dumps/dump_webservice_aws_23.json'

In [None]:
df_calls = pd.DataFrame(plotdata)
df_calls.head()

In [None]:
df_calls.to_pickle(oufile)