In [None]:
# Table with Number of apps domains, subdomains reconstructed, validated subdomains, subdomains and domains per app
import importlib

from typing import List

import util.util as util

#import util.util
#importlib.reload(util.util)
import util.domainUtil as domainUtil

In [None]:
# TODO: set the path to your result folder
# set the first argument to the base path of the result folder, second argument is th folder name, third argument a mapping file - that tells for apps in multiple sub datasets which app to include

verified = util.get_verified_dataset("//", "/2023_04_06/", "/verified_dataset/result.json")


In [None]:
# TODO: set the path to load your data
generalDataset = util.loadAllData("/gp_2022/", True)


In [None]:
class CompleteData:
    def __init__(self, iotScope: util.AppAnalysis, dataset_path: str):
        self.iotScope = iotScope
        app = iotScope.app.replace(".json", ".xml")

        if "neupane" in iotScope.path:
            dataset_path = f"{dataset_path}/neupane"
        elif "iotspotter" in iotScope.path:
            dataset_path = f"{dataset_path}/iotspotter"
        elif "iotprofiler" in iotScope.path:
            dataset_path = f"{dataset_path}/iotprofiler"

        self.bl_to_icc = f"{dataset_path}/bl_to_icc_or_sink/{app}"
        self.bl_from_icc = f"{dataset_path}/bl_from_icc_to_sink/{app}"

        self.gen_to_icc = f"{dataset_path}/general_to_icc_or_sink/{app}"
        self.gen_from_icc = f"{dataset_path}/general_from_icc_to_sink/{app}"
        self.local_to_icc = f"{dataset_path}/local_to_icc_or_sink/{app}"
        self.local_from_icc = f"{dataset_path}/local_from_icc_to_sink/{app}"
        self.cr_both_apache = f"{dataset_path}/both/web_apache/{app}"
        self.cr_both_okhttp3 = f"{dataset_path}/both/web_okhttp3/{app}"
        self.cr_both_udp = f"{dataset_path}/both/web_udp_data/{app}"

        self.cr_sc_amqp = f"{dataset_path}/source_connection/amqp_rabbitmq/{app}"
        self.cr_sc_coap = f"{dataset_path}/source_connection/coap_californium/{app}"
        self.cr_sc_aws = f"{dataset_path}/source_connection/mqtt_aws/{app}"
        self.cr_sc_fusesource = f"{dataset_path}/source_connection/mqtt_fusesource/{app}"
        self.cr_sc_paho = f"{dataset_path}/source_connection/mqtt_paho/{app}"
        self.cr_sc_tuya = f"{dataset_path}/source_connection/mqtt_tuya/{app}"
        self.cr_sc_java = f"{dataset_path}/source_connection/web_java/{app}"

        self.cr_sic_amqp = f"{dataset_path}/sink_connection/amqp_rabbitmq/{app}"
        self.cr_sic_coap = f"{dataset_path}/sink_connection/coap_californium/{app}"
        self.cr_sic_aws = f"{dataset_path}/sink_connection/mqtt_aws/{app}"
        self.cr_sic_fusesource = f"{dataset_path}/sink_connection//mqtt_fusesource/{app}"
        self.cr_sic_paho = f"{dataset_path}/sink_connection/mqtt_paho/{app}"
        self.cr_sic_tuya = f"{dataset_path}/sink_connection/mqtt_tuya/{app}"
        self.cr_sic_java = f"{dataset_path}/sink_connection/web_java/{app}"

In [None]:
def get_complete_runs(dataset, path):
    result = []
    for app in dataset:
        result.append(CompleteData(app, path))

    return result

In [None]:
import statistics

def getRunTime(jsonData):
    initTime = 0
    solveTime = 0
    totalTime = 0

    if 'initTime' in jsonData:
        initTime = int(jsonData['initTime'])/1000

    if 'solveTime' in jsonData:
        solveTime = int(jsonData['solveTime'])/1000

    if initTime > -1 and solveTime > -1:
        totalTime = initTime + solveTime

    return initTime, solveTime, totalTime

def runtimeStats(jsonFiles):
    totalRunTime = []
    initTime = []
    solveTime = []
    for j in jsonFiles:
        init, solve, total = getRunTime(j)
        if total > 0:
            totalRunTime.append(total)
        else:
            totalRunTime.append(0)
        if solve > 0:
            solveTime.append(solve)
        else:
            solveTime.append(0)
        if init > 0:
            initTime.append(init)
        else:
            initTime.append(0)
    return totalRunTime, initTime, solveTime

def get_vsa_time_app(app):
    amqpInitTime, amqpSolveTime, amqpRunTime = getRunTime(app.amqp)
    mqttInitTime,mqttSolveTime,mqttRunTime = getRunTime(app.mqtt)
    coapInitTime,coapSolveTime,coapRunTime = getRunTime(app.coap)
    endpointsInitTime,endpointsSolveTime,endpointsRunTime = getRunTime(app.endpoints)
    xmppInitTime,xmppSolveTime,xmppRunTime = getRunTime(app.xmpp)
    sourcesInitTime,sourcesSolveTime,sourcesRunTime = getRunTime(app.sources)
    requestsInitTime,requestsSolveTime,requestsRunTime = getRunTime(app.sinks)
    udpInitTime,udpSolveTime,udpRunTime = getRunTime(app.udp)
    webviewInitTime,webviewSolveTime,webviewRunTime = getRunTime(app.webview)
    cryptoInitTime,cryptoSolveTime,cryptoRunTime = getRunTime(app.crypto)


    initTime = (amqpInitTime + mqttInitTime + coapInitTime + endpointsInitTime + xmppInitTime + sourcesInitTime + requestsInitTime + udpInitTime + webviewInitTime + cryptoInitTime)
    solveTime = (amqpSolveTime + mqttSolveTime + coapSolveTime + endpointsSolveTime + xmppSolveTime + sourcesSolveTime + requestsSolveTime + udpSolveTime + webviewSolveTime + cryptoSolveTime)
    runTime =(amqpRunTime + mqttRunTime + coapRunTime + endpointsRunTime + xmppRunTime + sourcesRunTime + requestsRunTime + udpRunTime + webviewRunTime + cryptoRunTime)


    return initTime, solveTime, runTime



In [None]:
from xml.etree.ElementTree import Element, tostring, parse, ParseError
import os
import statistics


def get_flowdroid_stats_app(app: str):
    call_graph_construction: int = 0
    taint_propagation: int = 0
    path_reconstruction: int = 0
    total_runtime: int = 0
    max_memory: int = 0
    termination_state: str = None
    no_taint_propagation = True

    try:

        xml_data: Element = parse(f"{app}").getroot()
        state = xml_data.get("TerminationState")
        if state is not None:
            termination_state = state
            #print(termination_state)
        #else:
        #    print(xml_data)

        performance_data = xml_data.find("PerformanceData")
        if performance_data is not None:
            for performance_entry in performance_data.findall("PerformanceEntry"):
                if performance_entry.get("Name") == "CallgraphConstructionSeconds":
                    call_graph_construction = (int(performance_entry.get("Value")))
                elif performance_entry.get("Name") == "TaintPropagationSeconds":
                    taint_propagation = (int(performance_entry.get("Value")))
                    no_taint_propagation = False
                elif performance_entry.get("Name") == "PathReconstructionSeconds":
                    path_reconstruction = (int(performance_entry.get("Value")))
                    no_taint_propagation = False
                elif performance_entry.get("Name") == "TotalRuntimeSeconds":
                    total_runtime = (int(performance_entry.get("Value")))
                elif performance_entry.get("Name") == "MaxMemoryConsumption":
                    max_memory = (int(performance_entry.get("Value")))
    except ParseError:
        pass
    except FileNotFoundError:
        pass

    if no_taint_propagation:
        # if there is no taint propagation there was no source -> we skipped such analysis at some point to avoid the callgraph contstruction
        # for a fair comparison we also set them to 0
        return 0, 0, 0, 0, 0, termination_state

    return call_graph_construction, taint_propagation, path_reconstruction, total_runtime, max_memory, termination_state


def used_timeout(app):
    return get_flowdroid_stats_app(app.bl_to_icc)[5] == "DataFlowTimeout" or  get_flowdroid_stats_app(app.bl_from_icc)[5] == "DataFlowTimeout" or  get_flowdroid_stats_app(app.gen_to_icc)[5] == "DataFlowTimeout" or  get_flowdroid_stats_app(app.gen_from_icc)[5] == "DataFlowTimeout" or   get_flowdroid_stats_app(app.local_to_icc)[5] == "DataFlowTimeout" or get_flowdroid_stats_app(app.local_from_icc)[5] == "DataFlowTimeout"  or get_flowdroid_stats_app(app.cr_both_udp)[5] == "DataFlowTimeout" or get_flowdroid_stats_app(app.cr_both_okhttp3)[5] == "DataFlowTimeout" or   get_flowdroid_stats_app(app.cr_both_apache)[5] == "DataFlowTimeout" or  get_flowdroid_stats_app(app.cr_sc_java)[5] == "DataFlowTimeout" or    get_flowdroid_stats_app(app.cr_sc_amqp)[5] == "DataFlowTimeout" or get_flowdroid_stats_app(app.cr_sc_coap)[5] == "DataFlowTimeout" or     get_flowdroid_stats_app(app.cr_sc_paho)[5] == "DataFlowTimeout" or get_flowdroid_stats_app(app.cr_sc_aws)[5] == "DataFlowTimeout" or     get_flowdroid_stats_app(app.cr_sc_tuya)[5] == "DataFlowTimeout" or get_flowdroid_stats_app(app.cr_sc_fusesource)[5] == "DataFlowTimeout" or  get_flowdroid_stats_app(app.cr_sic_java)[5] == "DataFlowTimeout" or get_flowdroid_stats_app(app.cr_sic_amqp)[5] == "DataFlowTimeout" or get_flowdroid_stats_app(app.cr_sic_coap)[5] == "DataFlowTimeout" or   get_flowdroid_stats_app(app.cr_sic_paho)[5] == "DataFlowTimeout" or      get_flowdroid_stats_app(app.cr_sic_aws)[5] == "DataFlowTimeout" or    get_flowdroid_stats_app(app.cr_sic_tuya)[5] == "DataFlowTimeout" or      get_flowdroid_stats_app(app.cr_sic_fusesource)[5] == "DataFlowTimeout"


def used_call_graph_timeout(app, timeout_trashhold):
    return get_flowdroid_stats_app(app.bl_to_icc)[0] > timeout_trashhold or  get_flowdroid_stats_app(app.bl_from_icc)[0]  > timeout_trashhold or  get_flowdroid_stats_app(app.gen_to_icc)[0]  > timeout_trashhold or  get_flowdroid_stats_app(app.gen_from_icc)[0]  > timeout_trashhold or   get_flowdroid_stats_app(app.local_to_icc)[0]  > timeout_trashhold or get_flowdroid_stats_app(app.local_from_icc)[0]  > timeout_trashhold  or get_flowdroid_stats_app(app.cr_both_udp)[0]  > timeout_trashhold or get_flowdroid_stats_app(app.cr_both_okhttp3)[0]  > timeout_trashhold or   get_flowdroid_stats_app(app.cr_both_apache)[0]  > timeout_trashhold or  get_flowdroid_stats_app(app.cr_sc_java)[0]  > timeout_trashhold or    get_flowdroid_stats_app(app.cr_sc_amqp)[0]  > timeout_trashhold or get_flowdroid_stats_app(app.cr_sc_coap)[0]  > timeout_trashhold or     get_flowdroid_stats_app(app.cr_sc_paho)[0]  > timeout_trashhold or get_flowdroid_stats_app(app.cr_sc_aws)[0]  > timeout_trashhold or     get_flowdroid_stats_app(app.cr_sc_tuya)[0]  > timeout_trashhold or get_flowdroid_stats_app(app.cr_sc_fusesource)[0]  > timeout_trashhold or  get_flowdroid_stats_app(app.cr_sic_java)[0]  > timeout_trashhold or get_flowdroid_stats_app(app.cr_sic_amqp)[0]  > timeout_trashhold or get_flowdroid_stats_app(app.cr_sic_coap)[0]  > timeout_trashhold or   get_flowdroid_stats_app(app.cr_sic_paho)[0]  > timeout_trashhold or      get_flowdroid_stats_app(app.cr_sic_aws)[0]  > timeout_trashhold or    get_flowdroid_stats_app(app.cr_sic_tuya)[0]  > timeout_trashhold or      get_flowdroid_stats_app(app.cr_sic_fusesource)[0]  > timeout_trashhold

def get_app_flowdroid_stats(app):
    total = 0
    total = total + get_flowdroid_stats_app(app.bl_to_icc)[3]
    total = total + get_flowdroid_stats_app(app.bl_from_icc)[3]
    total = total + get_flowdroid_stats_app(app.gen_to_icc)[3]
    total = total + get_flowdroid_stats_app(app.gen_from_icc)[3]
    total = total + get_flowdroid_stats_app(app.local_to_icc)[3]
    total = total + get_flowdroid_stats_app(app.local_from_icc)[3]
    total = total + get_flowdroid_stats_app(app.cr_both_udp)[3]
    total = total + get_flowdroid_stats_app(app.cr_both_okhttp3)[3]
    total = total + get_flowdroid_stats_app(app.cr_both_apache)[3]


    total = total + get_flowdroid_stats_app(app.cr_sc_java)[3]
    total = total + get_flowdroid_stats_app(app.cr_sc_amqp)[3]
    total = total + get_flowdroid_stats_app(app.cr_sc_coap)[3]
    total = total + get_flowdroid_stats_app(app.cr_sc_paho)[3]
    total = total + get_flowdroid_stats_app(app.cr_sc_aws)[3]
    total = total + get_flowdroid_stats_app(app.cr_sc_tuya)[3]
    total = total + get_flowdroid_stats_app(app.cr_sc_fusesource)[3]

    total = total + get_flowdroid_stats_app(app.cr_sic_java)[3]
    total = total + get_flowdroid_stats_app(app.cr_sic_amqp)[3]
    total = total + get_flowdroid_stats_app(app.cr_sic_coap)[3]
    total = total + get_flowdroid_stats_app(app.cr_sic_paho)[3]
    total = total + get_flowdroid_stats_app(app.cr_sic_aws)[3]
    total = total + get_flowdroid_stats_app(app.cr_sic_tuya)[3]
    total = total + get_flowdroid_stats_app(app.cr_sic_fusesource)[3]



    return total


def get_flowdroid_stats_folder_(dataset: List[str], base_path: str, ending: str):
    call_graph_construction: List[int] = []
    taint_propagation: List[int] = []
    path_reconstruction: List[int] = []
    total_runtime: List[int] = []
    max_memory: List[int] = []
    termination_state: List[str] = []

    for app in dataset:
        if app.endswith(ending):
            call_graph_construction_app, taint_propagation_app, path_reconstruction_app, total_runtime_app, max_memory_app, termination_state_app =get_flowdroid_stats_app(f"{base_path}/{app}")
            call_graph_construction.append(call_graph_construction_app)
            taint_propagation.append(taint_propagation_app)
            path_reconstruction.append(path_reconstruction_app)
            total_runtime.append(total_runtime_app)
            max_memory.append(max_memory_app)
            termination_state.append(termination_state_app)

    return call_graph_construction, taint_propagation, path_reconstruction, total_runtime, max_memory


def get_stats(info_list):
    return max(info_list), min(info_list), statistics.median(info_list) ,  statistics.stdev(info_list), statistics.mean(info_list)



In [None]:
def get_stats_dataset(dataset):
    time_flowdroid = []
    time_vsa = []
    time_total = []
    for app in dataset:
        #print((app.bl_to_icc))
        fd = get_app_flowdroid_stats(app)
        vsa = get_vsa_time_app(app.iotScope)[2]
        #print(fd)
        #print(vsa)
        time_flowdroid.append(fd)
        time_vsa.append(vsa)
        time_total.append(vsa + fd)

    return time_flowdroid, time_vsa, time_total

In [None]:
#flowdroid, vsa, total

In [None]:
verified_complete =  get_complete_runs(verified, "/iotflow/")

In [None]:
gp_2022_complete =  get_complete_runs(generalDataset, "/iotflow/gp_2022")

In [None]:
gp_stats = get_stats_dataset(gp_2022_complete)
verified_stats = get_stats_dataset(verified_complete)

In [None]:
gp_stats_flowdroid = get_stats(gp_stats[0])
gp_stats_vsa = get_stats(gp_stats[1])
gp_stats_total = get_stats(gp_stats[2])

In [None]:
ver_stats_flowdroid = get_stats(verified_stats[0])
ver_stats_vsa = get_stats(verified_stats[1])
ver_stats_total = get_stats(verified_stats[2])

In [None]:
# max(info_list), min(info_list), statistics.median(info_list) ,  statistics.stdev(info_list), statistics.mean(info_list)


In [None]:
def seconds_to_minutes(time_in_s):
    return str(int(time_in_s/60)) + ":" + str(int((time_in_s/60 - int(time_in_s/60))* 60))

In [None]:
data = []



# Table with Number of apps domains, subdomains reconstructed, validated subdomains, subdomains and domains per app
row = ['IoT-VER',"9,889", seconds_to_minutes(ver_stats_vsa[2]) , seconds_to_minutes(ver_stats_vsa[4]), seconds_to_minutes(ver_stats_vsa[3]), seconds_to_minutes(ver_stats_flowdroid[2]) , seconds_to_minutes(ver_stats_flowdroid[4]), seconds_to_minutes(ver_stats_flowdroid[3]), seconds_to_minutes(ver_stats_total[2]) , seconds_to_minutes(ver_stats_total[4]), seconds_to_minutes(ver_stats_total[3])]
data.append(row)

row = ['General apps',"947", seconds_to_minutes(gp_stats_vsa[2]) , seconds_to_minutes(gp_stats_vsa[4]), seconds_to_minutes(gp_stats_vsa[3]), seconds_to_minutes(gp_stats_flowdroid[2]) , seconds_to_minutes(gp_stats_flowdroid[4]), seconds_to_minutes(gp_stats_flowdroid[3]), seconds_to_minutes(gp_stats_total[2]) , seconds_to_minutes(gp_stats_total[4]), seconds_to_minutes(gp_stats_total[3])]
data.append(row)


In [None]:
import pandas as pd
df = pd.DataFrame(data, columns = ['Dataset', 'number_apps', 'Median-VSA', 'Avg-VSA', 'STD-VSA', 'Median-FD', 'Average-FD', 'STD-FD','Median-Total', 'Average-Total', "STD-Total"])


In [None]:
df = df.set_index('Dataset')

In [None]:
 df = df.round(decimals=2)

In [None]:
df

In [None]:
print(df.to_latex())

In [None]:
df.to_csv("runtime_stats_ccs.csv")

In [None]:
def timeoutBackwardUsed(dataJson):
    timeoutBackwardUsed = False
    if dataJson != None and dataJson != "":
        initTime = -1
        timeoutbackward = -1

        if 'initTime' in dataJson:
            initTime = int(dataJson['initTime'])/1000 # -> also seconds if div 1k


        if 'timeoutbackward' in dataJson:
            timeoutbackward = int(dataJson['timeoutbackward']) # seconds = TIMEOUT_BACKWARDS
        else:
            timeoutbackward = 900


        if initTime != -1 and timeoutbackward != -1:
            if timeoutbackward - initTime < 0:
                timeoutBackwardUsed = True




    return timeoutBackwardUsed


def timeoutForwardUsed(dataJson):
    timeoutForwardUsed = False
    if dataJson != None and dataJson != "":
        solveTime = -1
        timeoutforward = -1


        if 'solveTime' in dataJson:
            solveTime = int(dataJson['solveTime'])/1000 # -> also seconds if div 1k

        if 'timeoutforward' in dataJson:
            timeoutforward = int(dataJson['timeoutforward']) # seconds = TIMEOUT_BACKWARDS
        else:
            timeoutbackward = 1800

        if timeoutforward != -1 and solveTime != -1:
            #print(timeoutforward - solveTime)
            if timeoutforward - solveTime < 0:
                timeoutForwardUsed = True



    return timeoutForwardUsed

def getTimeouts(dataset):
    backward = 0
    forward = 0

    for app in dataset:
        if timeoutBackwardUsed(app.amqp) or timeoutBackwardUsed(app.mqtt) or timeoutBackwardUsed(app.coap) or timeoutBackwardUsed(app.endpoints) or timeoutBackwardUsed(app.xmpp) or timeoutBackwardUsed(app.crypto) or timeoutBackwardUsed(app.sources) or timeoutBackwardUsed(app.udp) or timeoutBackwardUsed(app.webview) or timeoutBackwardUsed(app.sinks):
            backward = backward + 1

        if timeoutForwardUsed(app.amqp) or timeoutForwardUsed(app.mqtt) or timeoutForwardUsed(app.coap) or timeoutForwardUsed(app.endpoints) or timeoutForwardUsed(app.xmpp) or timeoutForwardUsed(app.crypto) or timeoutForwardUsed(app.sources) or timeoutForwardUsed(app.udp) or timeoutForwardUsed(app.webview) or timeoutForwardUsed(app.sinks):
            forward = forward + 1

    return backward, forward



In [None]:
gen_vsa_timeouts = getTimeouts(generalDataset)

In [None]:
ver_vsa_timeouts = getTimeouts(verified)

In [None]:

def get_flowdoird_timeouts_dataset(dataset, trashhold):
    cg = 0
    df = 0
    for app in dataset:
        if used_timeout(app):
            df = df + 1

        if used_call_graph_timeout(app, trashhold):
            cg = cg + 1



    return cg, df

In [None]:
gen_flow_analysis_timeouts = get_flowdoird_timeouts_dataset(gp_2022_complete, 450)

In [None]:
ver_flow_analysis_timeouts = get_flowdoird_timeouts_dataset(verified_complete, 450)

In [None]:
print(gen_vsa_timeouts)
print(f"{gen_vsa_timeouts[0]/949*100:.2f} - {gen_vsa_timeouts[1]/949*100:.2f}")
print(gen_flow_analysis_timeouts)
print(f"{gen_flow_analysis_timeouts[0]/949*100:.2f} - {gen_flow_analysis_timeouts[1]/949*100:.2f}")




In [None]:
print(ver_vsa_timeouts)
print(f"{ver_vsa_timeouts[0]/9889*100:.2f} - {ver_vsa_timeouts[1]/9889*100:.2f}")
print(ver_flow_analysis_timeouts)

print(f"{ver_flow_analysis_timeouts[0]/9889*100:.2f} - {ver_flow_analysis_timeouts[1]/9889*100:.2f}")


In [None]:
print(gen_vsa_timeouts[1] + ver_vsa_timeouts[1])


In [None]:
304/(9889+947) * 100



In [None]:
11/(9889+947) * 100


In [None]:
ver_flow_analysis_timeouts[0] + gen_flow_analysis_timeouts[0]

In [None]:
2432/(9889+947) * 100

In [None]:
ver_flow_analysis_timeouts[1] + gen_flow_analysis_timeouts[1]

In [None]:
3004/(9889+947) * 100