In [13]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

from constants import EXTENSION_METHODS, RVSEC_ERRORS, REPETITIONS, TIMEOUTS, TOOLS, SUMMARY, METHOD_COVERAGE, \
    METHODS_JCA_COVERAGE, ACTIVITIES_COVERAGE

In [14]:
fdroid_spreadsheet = "fdroid/final_apps_to_download.csv"

# https://drive.google.com/drive/folders/1wcA_9Z7GBtWxcRUaUTkbMu_O_fCorQuw?usp=drive_link
exp01_generic_instrument_errors_file = "exp01_generic_instrument_errors.json"
exp01_generic_results_file = "exp01_generic_results.json"

exp01_jca_instrument_errors_file = "exp01_jca_instrument_errors.json"
exp01_jca_results_file = "exp01_jca_results.json"



# https://drive.google.com/drive/folders/1vmSQ6JAohG8FRCOL1CXUR6EV1rwvEK9m?usp=drive_link
exp02_generic_instrument_errors_file = ""
exp02_generic_results_file = ""

exp02_jca_instrument_errors_file = "exp02_jca_instrument_errors.json"
exp02_jca_results_file = "exp02_jca_results.json"

# TOTALS

In [None]:
def get_totals(results_file: str, instrument_errors_file: str, print_debug=False):
    if not results_file or not instrument_errors_file:
        return None
    
    total_instrumented = 0
    total_instrument_errors = 0

    apks_with_errors = set()

    with open(results_file, "r") as f:
        result = json.load(f)
        total_instrumented = len(result)
        for apk in result:
            for rep in result[apk][REPETITIONS]:
                for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                    for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                        tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                        errors = tool_result[RVSEC_ERRORS]
                        if len(errors) > 0:
                            apks_with_errors.add(apk)
                            continue
                        
    intstrumentation_tool_error = {"dex2jar":0, "ajc": 0, "d8": 0}
    with open(instrument_errors_file, "r") as f:
        result = json.load(f)
        total_instrument_errors = len(result)
        for apk in result:
            tool = result[apk]["tool"]
            intstrumentation_tool_error[tool] += 1        

    total_experiment_apks = total_instrumented + total_instrument_errors
    instrumented_pct = (total_instrumented * 100) / total_experiment_apks
    apks_com_erros_pct = (len(apks_with_errors) * 100) / total_experiment_apks
    instrumented_apks_com_erros_pct = (len(apks_with_errors) * 100) / total_instrumented
    total_apks_with_errors = len(apks_with_errors)

    inst_tool_error_dex2jar_total = intstrumentation_tool_error["dex2jar"]
    inst_tool_error_dex2jar_pct = 0 if total_instrument_errors==0 else (inst_tool_error_dex2jar_total * 100) / total_instrument_errors
    inst_tool_error_ajc_total = intstrumentation_tool_error["ajc"]
    inst_tool_error_ajc_pct = 0 if total_instrument_errors==0 else (inst_tool_error_ajc_total * 100) / total_instrument_errors
    inst_tool_error_d8_total = intstrumentation_tool_error["d8"]
    inst_tool_error_d8_pct = 0 if total_instrument_errors==0 else (inst_tool_error_d8_total * 100) / total_instrument_errors

    if print_debug:
        print("FDROID: 4162")
        print("TOTAL: {} (apks que usam jca)".format(total_experiment_apks))
        print("INSTRUMENTADOS: {}".format(total_instrumented))
        print("INSTRUMENTADOS (%): {}".format(instrumented_pct))
        print("ERROS_INSTRUMENTACAO: {}".format(total_instrument_errors))
        print("  - dex2jar: {} ({} %)".format(inst_tool_error_dex2jar_total, inst_tool_error_dex2jar_pct))
        print("  - ajc: {} ({} %)".format(inst_tool_error_ajc_total, inst_tool_error_ajc_pct))
        print("  - d8: {} ({} %)".format(inst_tool_error_d8_total, inst_tool_error_d8_pct))
        print("APKS_COM_ERROS_ENCONTRADOS: {}".format(total_apks_with_errors))
        print("APKS_COM_ERROS_ENCONTRADOS (% do TOTAL): {}".format(apks_com_erros_pct))
        print("APKS_COM_ERROS_ENCONTRADOS (% do INSTRUMENTADOS): {}".format(instrumented_apks_com_erros_pct))
    
    return {
        "total": total_experiment_apks,
        "instrumented": total_instrumented,
        "instrumentation_errors": {
            "total": total_instrument_errors,
            "dex2jar": inst_tool_error_dex2jar_total,
            "ajc": inst_tool_error_ajc_total,
            "d8": inst_tool_error_d8_total
        },
        "apks_with_errors": total_apks_with_errors
    }

def get_totals_df():
    header = ["name", "total", "instrumented", "inst_err_total", "inst_err_dex2jar", "inst_err_ajc", "inst_err_d8", "apks_with_errors"]
    data = []

    add_to_data("exp01_jca", get_totals(exp01_jca_results_file, exp01_jca_instrument_errors_file), data)
    add_to_data("exp01_generic", get_totals(exp01_generic_results_file, exp01_generic_instrument_errors_file), data)
    add_to_data("exp02_jca", get_totals(exp02_jca_results_file, exp02_jca_instrument_errors_file), data)
   
    return pd.DataFrame(data, columns=header)

    
def add_to_data(name: str, total_dict: dict, data: list):
    if total_dict:
        data.append([name, total_dict["total"], total_dict["instrumented"], 
                     total_dict["instrumentation_errors"]["total"], 
                     total_dict["instrumentation_errors"]["dex2jar"],
                     total_dict["instrumentation_errors"]["ajc"],
                     total_dict["instrumentation_errors"]["d8"],
                     total_dict["apks_with_errors"]])

In [None]:
# get_totals(exp01_jca_results_file, exp01_jca_instrument_errors_file, print_debug=True)
# get_totals(exp01_generic_results_file, exp01_generic_instrument_errors_file, print_debug=True)
# get_totals(exp02_jca_results_file, exp02_jca_instrument_errors_file, print_debug=True)

df = get_totals_df()
print(df)

# COVERAGE

In [None]:
planilha_fdroid_path = "fdroid/final_apps_to_download.csv"

# TODO: copiar manualmente esses arquivos para a raiz de onde executa o notebook
#results_file = "merged_results_analysis.json"
results_file = "final_results_analysis_jca.json"
# results_file = "final_results_analysis_generic.json"
instrument_errors_file = "merged_instrument_errors.json"
# True se for o experimento com specs jca ... False se for o experimento com as novas specs
is_jca = True

# Totals

In [None]:
total_instrumented = 0
total_instrument_errors = 0

apks_with_errors = set()

with open(results_file, "r") as f:
    result = json.load(f)
    total_instrumented = len(result)
    for apk in result:
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                    errors = tool_result[RVSEC_ERRORS]
                    if len(errors) > 0:
                        apks_with_errors.add(apk)
                        continue
                    
intstrumentation_tool_error = {"dex2jar":0, "ajc": 0, "d8": 0}
with open(instrument_errors_file, "r") as f:
    result = json.load(f)
    total_instrument_errors = len(result)
    for apk in result:
        tool = result[apk]["tool"]
        intstrumentation_tool_error[tool] += 1        

total_experiment_apks = total_instrumented + total_instrument_errors
instrumented_pct = (total_instrumented * 100) / total_experiment_apks
apks_com_erros_pct = (len(apks_with_errors) * 100) / total_experiment_apks
instrumented_apks_com_erros_pct = (len(apks_with_errors) * 100) / total_instrumented
total_apks_with_errors = len(apks_with_errors)

inst_tool_error_dex2jar_total = intstrumentation_tool_error["dex2jar"]
inst_tool_error_dex2jar_pct = (inst_tool_error_dex2jar_total * 100) / total_instrument_errors
inst_tool_error_ajc_total = intstrumentation_tool_error["ajc"]
inst_tool_error_ajc_pct = (inst_tool_error_ajc_total * 100) / total_instrument_errors
inst_tool_error_d8_total = intstrumentation_tool_error["d8"]
inst_tool_error_d8_pct = (inst_tool_error_d8_total * 100) / total_instrument_errors

print("FDROID: 4162")
print("TOTAL: {} (apks que usam jca)".format(total_experiment_apks))
print("INSTRUMENTADOS: {}".format(total_instrumented))
print("INSTRUMENTADOS (%): {}".format(instrumented_pct))
print("ERROS_INSTRUMENTACAO: {}".format(total_instrument_errors))
print("  - dex2jar: {} ({} %)".format(inst_tool_error_dex2jar_total, inst_tool_error_dex2jar_pct))
print("  - ajc: {} ({} %)".format(inst_tool_error_ajc_total, inst_tool_error_ajc_pct))
print("  - d8: {} ({} %)".format(inst_tool_error_d8_total, inst_tool_error_d8_pct))
print("APKS_COM_ERROS_ENCONTRADOS: {}".format(total_apks_with_errors))
print("APKS_COM_ERROS_ENCONTRADOS (% do TOTAL): {}".format(apks_com_erros_pct))
print("APKS_COM_ERROS_ENCONTRADOS (% do INSTRUMENTADOS): {}".format(instrumented_apks_com_erros_pct))

In [4]:
def parse_nova_spec(texto: str):
    s01 = texto.split(":::")
    tmp = s01[0].split("(")[0]

    last_dot_idx = tmp.rfind('.')
    clazz = tmp[:last_dot_idx].strip()
    method = tmp[last_dot_idx+1:].strip()

    a = s01[1].strip()
    spec = a.split(" ")[0].strip()
    msg = a.strip()

    return clazz, method, spec, msg

def parse_jca(texto: str):
    s01 = texto.split(",")
    spec = s01[0].strip()
    clazz = s01[1].strip()
    method = s01[3].strip()

    x = s01[4]
    idx = texto.rfind(x)
    msg = texto[idx+len(x)+1:].strip()

    return clazz, method, spec, msg

# Quantidade de erros por SPEC

In [None]:
with open(results_file, "r") as f:
    data = {}
    result = json.load(f)
    for apk in result:
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                    errors = tool_result[RVSEC_ERRORS]
                    for error in errors:
                        if is_jca:
                            clazz, method, spec, msg = parse_jca(error)
                        else:
                            clazz, method, spec, msg = parse_nova_spec(error)
                        
                        if spec not in data:
                            data[spec] = {"errors": set(),
                                         "total": 0}
                        data[spec]["total"] = data[spec]["total"] + 1
                        data[spec]["errors"].add(msg)

print("Quantidade de SPECS que encontraram algum erro: {}".format(len(data)))

specs_totals = []
for spec in data:
    specs_totals.append([spec, data[spec]["total"], len(data[spec]["errors"]) ])

df = pd.DataFrame(specs_totals, columns=['Spec', 'Cont', 'Errors'])
df = df.sort_values(by=['Cont'], ascending=False, ignore_index=True)
print(df)

df.plot(x="Spec", y='Cont', kind='bar', title='titulo', grid=True, xlabel="xxx", ylabel="yyy", legend=False)
plt.show()


print("**** Erros unicos por spec:")
df = df.sort_values(by=['Errors'], ascending=False, ignore_index=True)
print(df)
for ind in df.index:
    spec =  df['Spec'][ind]
    print("{} ({})".format(spec, len(data[spec]["errors"])))
    errors = sorted(list(data[spec]["errors"]))
    for err in errors:
        print("\t- {}".format(err))

# Quantidade de erros por APK

In [None]:
with open(results_file, "r") as f:
    data = {}
    result = json.load(f)
    for apk in result:
        if apk not in data:
            data[apk] = {"errors": set(),
                                         "total": 0}
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                    errors = tool_result[RVSEC_ERRORS]
                    for error in errors:
                        if is_jca:
                            clazz, method, spec, msg = parse_jca(error)
                        else:
                            clazz, method, spec, msg = parse_nova_spec(error)
               
                        data[apk]["total"] = data[apk]["total"] + 1
                        data[apk]["errors"].add(msg)

print("Quantidade de APKS que encontraram algum erro: {}".format(len(data)))

specs_totals = []
for spec in data:
    specs_totals.append([spec, data[spec]["total"], len(data[spec]["errors"]) ])

df = pd.DataFrame(specs_totals, columns=['Spec', 'Cont', 'Errors'])
df = df.sort_values(by=['Cont'], ascending=False, ignore_index=True)
print(df)

print("**** Erros unicos por APK:")
df = df.sort_values(by=['Errors'], ascending=False, ignore_index=True)
print(df)
for ind in df.index:
    spec =  df['Spec'][ind]
    print("{} ({})".format(spec, len(data[spec]["errors"])))
    errors = sorted(list(data[spec]["errors"]))
    for err in errors:
        print("\t- {}".format(err))

### Quantidade de erros por ferramenta

In [None]:
with open(results_file, "r") as f:
    data = {}
    result = json.load(f)
    for apk in result:
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    if tool not in data:
                        data[tool] = {"errors": set(), "total": 0}
                    tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                    errors = tool_result[RVSEC_ERRORS]
                    for error in errors:
                        if is_jca:
                            clazz, method, spec, msg = parse_jca(error)
                        else:
                            clazz, method, spec, msg = parse_nova_spec(error)

                        data[tool]["total"] = data[tool]["total"] + 1
                        data[tool]["errors"].add(msg)

print("Quantidade de Ferramentas que encontraram algum erro: {}".format(len(data)))

tools_totals = []
for tool in data:
    tools_totals.append([tool, data[tool]["total"], len(data[tool]["errors"])])

df = pd.DataFrame(tools_totals, columns=['Tool', 'Cont', 'Errors'])
df = df.sort_values(by=['Cont'], ascending=False, ignore_index=True)
print(df)

df.plot(x="Tool", y='Cont', kind='bar', title='titulo', grid=True, xlabel="xxx", ylabel="yyy", legend=False)
plt.show()

# TODO analisar esse resultado: "UnsafeAlgorithm,expecting one of PKIX,SunX509 but found PKIX."
print("**** Erros unicos por ferramenta:")
df = df.sort_values(by=['Errors'], ascending=False, ignore_index=True)
print(df)
for ind in df.index:
    spec = df['Tool'][ind]
    print("{} ({})".format(spec, len(data[spec]["errors"])))
    errors = sorted(list(data[spec]["errors"]))
    for err in errors:
        print("\t- {}".format(err))

### Erros por ferramenta/timeout

In [None]:
data = {}
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        for rep in result[apk][REPETITIONS]:
            if rep not in data:
                data[rep] = {}
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                if timeout not in data[rep]:
                    data[rep][timeout] = {}
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    if tool not in data[rep][timeout]:
                        data[rep][timeout][tool] = {"errors": set(), "total": 0}
                    tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                    errors = tool_result[RVSEC_ERRORS]
                    for error in errors:
                        data[rep][timeout][tool]["total"] = data[rep][timeout][tool]["total"] + 1
                        data[rep][timeout][tool]["errors"].add(error)

tmp = []
for rep in data:
    for timeout in data[rep]:
        for tool in data[rep][timeout]:
            tmp.append([str(tool), int(rep), int(timeout), len(data[rep][timeout][tool]["errors"]), data[rep][timeout][tool]["total"]])

df = pd.DataFrame(tmp, columns=['tool', 'rep', 'timeout', 'errors', 'total'])
df = df.sort_values(by=['timeout'], ascending=False, ignore_index=True)
# print(df)
# df.query("rep == 1 and tool == 'droidbot' ").plot(x="timeout", y=["errors"])

df.set_index('timeout', inplace=True)
# group data by tool and display errors as line chart
df = df.query("rep == 1").groupby('tool', group_keys=True)['errors']
print(df.describe())
# df = df.query("rep == '1'").groupby(['timeout'],  group_keys=True)['errors']
#df.plot(title='titulo', grid=True, xlabel="timeout", ylabel="quantidade", legend=True)
df.plot(grid=True, xlabel="timeout", ylabel="quantidade de erros únicos", legend=True)
plt.xticks([60, 120, 180, 300])
plt.show()


print("TOTAL")
df = pd.DataFrame(tmp, columns=['tool', 'rep', 'timeout', 'errors', 'total'])
df = df.sort_values(by=['timeout'], ascending=False, ignore_index=True)
df.set_index('timeout', inplace=True)
# group data by tool and display errors as line chart
df = df.query("rep == 1").groupby('tool', group_keys=True)['total']
print(df.describe())
# df = df.query("rep == '1'").groupby(['timeout'],  group_keys=True)['errors']
#df.plot(title='titulo', grid=True, xlabel="timeout", ylabel="quantidade", legend=True)
df.plot(grid=True, xlabel="timeout", ylabel="quantidade total de erros", legend=True)
plt.xticks([60, 120, 180, 300])
plt.show()


#df = pd.DataFrame(tmp, columns=['tool', 'rep', 'timeout', 'errors'])
#df = df.sort_values(by=['timeout'], ascending=False, ignore_index=True)
#pd.pivot_table(df.reset_index(),
#                index='timeout', columns='rep', values='errors'
#                ).plot(subplots=True)
#plt.xticks([60, 120, 180, 300])
#plt.show()

In [None]:
#alteracao no de cima ....

data = {}
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                if timeout not in data:
                    data[timeout] = {}
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    if tool not in data[timeout]:
                        data[timeout][tool] = {"errors": set(), "total": 0}
                    tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                    errors = tool_result[RVSEC_ERRORS]
                    for error in errors:
                        data[timeout][tool]["total"] = data[timeout][tool]["total"] + 1
                        data[timeout][tool]["errors"].add(error)

tmp = []
for rep in data:
    for timeout in data:
        for tool in data[timeout]:
            tmp.append([str(tool), int(timeout), len(data[timeout][tool]["errors"]), data[timeout][tool]["total"]])

df = pd.DataFrame(tmp, columns=['tool', 'timeout', 'errors', 'total'])
df = df.sort_values(by=['timeout'], ascending=False, ignore_index=True)
# print(df)
# df.query("rep == 1 and tool == 'droidbot' ").plot(x="timeout", y=["errors"])

df.set_index('timeout', inplace=True)
# group data by tool and display errors as line chart
#df = df.query("rep == 1").groupby('tool', group_keys=True)['errors']
df = df.groupby('tool', group_keys=True)['errors']
print(df.describe())
# df = df.query("rep == '1'").groupby(['timeout'],  group_keys=True)['errors']
#df.plot(title='titulo', grid=True, xlabel="timeout", ylabel="quantidade", legend=True)
df.plot(grid=True, xlabel="timeout", ylabel="quantidade de erros únicos", legend=True)
plt.xticks([60, 120, 180, 300])
plt.show()


print("TOTAL")
df = pd.DataFrame(tmp, columns=['tool', 'timeout', 'errors', 'total'])
df = df.sort_values(by=['timeout'], ascending=False, ignore_index=True)
df.set_index('timeout', inplace=True)
# group data by tool and display errors as line chart
df = df.groupby('tool', group_keys=True)['total']
print(df.describe())
# df = df.query("rep == '1'").groupby(['timeout'],  group_keys=True)['errors']
#df.plot(title='titulo', grid=True, xlabel="timeout", ylabel="quantidade", legend=True)
df.plot(grid=True, xlabel="timeout", ylabel="quantidade total de erros", legend=True)
plt.xticks([60, 120, 180, 300])
plt.show()


#df = pd.DataFrame(tmp, columns=['tool', 'rep', 'timeout', 'errors'])
#df = df.sort_values(by=['timeout'], ascending=False, ignore_index=True)
#pd.pivot_table(df.reset_index(),
#                index='timeout', columns='rep', values='errors'
#                ).plot(subplots=True)
#plt.xticks([60, 120, 180, 300])
#plt.show()

### Erros apk/ferramenta (true/false)

In [None]:
with open(results_file, "r") as f:
    data = {}
    result = json.load(f)
    for apk in result:
        data[apk] = {}
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    if tool not in data[apk]:
                        data[apk][tool] = False
                    errors = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool][RVSEC_ERRORS]
                    if len(errors) > 0:
                        data[apk][tool] = True
sorted_tools = []
apks_totals = []
for apk in data:
    line = [apk]
    sorted_tools = list(data[apk].keys())
    sorted_tools.sort()
    sorted_dict = {i: data[apk][i] for i in sorted_tools}
    for tool in sorted_dict:
        line.append(data[apk][tool])
    apks_totals.append(line)
print(apks_totals)

headers = ['APK']
for tool in sorted_tools:
    headers.append(tool)
df = pd.DataFrame(apks_totals, columns=headers)

print(df)

In [10]:
### Erros apk/ferramenta (heatmap erros unicos)

In [None]:
with open(results_file, "r") as f:
    data = {}
    result = json.load(f)
    for apk in result:
        data[apk] = {}
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    if tool not in data[apk]:
                        data[apk][tool] = {"total":0, "errors": set()}
                    errors = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool][RVSEC_ERRORS]
                    errors = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool][RVSEC_ERRORS]
                    for error in errors:
                        if is_jca:
                            clazz, method, spec, msg = parse_jca(error)
                        else:
                            clazz, method, spec, msg = parse_nova_spec(error)

                        data[apk][tool]["total"] = data[apk][tool]["total"] + 1
                        data[apk][tool]["errors"].add(msg)
sorted_tools = []
apks_totals = []
for apk in data:
    line = []#[apk]
    sorted_tools = list(data[apk].keys())
    sorted_tools.sort()
    sorted_dict = {i: data[apk][i] for i in sorted_tools}
    for tool in sorted_dict:
        #line.append(len(data[apk][tool]["errors"]))
        line.append(data[apk][tool]["total"])
    apks_totals.append(line)
#print(apks_totals)

headers = []#['APK']
for tool in sorted_tools:
    headers.append(tool)
df = pd.DataFrame(apks_totals, columns=headers)
#print(df)

# fig = px.imshow(df, aspect="auto", template="ggplot2")
fig = px.imshow(df,
                labels=dict(x="Tool", y="APK", color="Errors"),
                aspect="auto")#, color_continuous_scale='blues')
fig.show()

# FDROID

In [6]:
from csv import DictReader

def read_fdroid(planilha_fdroid_path):
    apps = {}
    with open(planilha_fdroid_path, 'r') as f:
        dict_reader = DictReader(f)
        list_of_dict = list(dict_reader)
        for app in list_of_dict:
            app['mop'] = False if app['mop'] == 'No' else True
            app['package'] = False if app['package'] == 'False' else True

            apps[app['file']] = app
    return apps

### Aplicativos que usam o mesmo pacote declarado no manifest

In [None]:
cont_same_package = 0

fdroid = read_fdroid(planilha_fdroid_path)

for apk in fdroid:    
    if fdroid[apk]['package']:
        cont_same_package += 1

print("TOTAL APKS: {}".format(total_experiment_apks))
print("APKS_SAME_PACKAGE: {}".format(cont_same_package))

pct = (cont_same_package * 100) / total_experiment_apks
print("APKS_SAME_PACKAGE (%): {}".format(pct))


#apenas s instrumentados
cont_same_package = 0
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        if fdroid[apk]['package']:
            cont_same_package += 1

print("INSTRUMENTADOS: {}".format(total_instrumented))
print("APKS_INSTRUMENTADOS_SAME_PACKAGE: {}".format(cont_same_package))
#pct_total = (cont_same_package * 100) / total_experiment_apks
pct = (cont_same_package * 100) / total_instrumented
#print("APKS_INSTRUMENTADOS_SAME_PACKAGE (% do TOTAL): {}".format(pct_total))
print("APKS_INSTRUMENTADOS_SAME_PACKAGE (% do INSTRUMENTADOS): {}".format(pct))
        
    

### Aplicativos por categoria

In [None]:
def read_csv(csv_path):
    with open(csv_path, 'r') as f:
        dict_reader = DictReader(f)
        list_of_dict = list(dict_reader)
        return list_of_dict


def get_categories(app):
    categories = []
    cat_str = app["categories"].replace('[', '').replace(']', '').replace("'", "")
    cat_split = cat_str.split(',')
    for cat in cat_split:
        if cat not in categories:
            categories.append(cat.strip())
    return categories


data = {}

apps_fdroid = read_csv(planilha_fdroid_path)
apps_fdroid_by_apk = {}
for app in apps_fdroid:
    apps_fdroid_by_apk[app["file"]] = app
    if app["mop"] == 'No':
        continue
    categories = get_categories(app)
    for c in categories:
        if c not in data:
            data[c] = {"cont": 0, "errors_total": 0, "errors": set()}
        data[c]["cont"] = data[c]["cont"] + 1

with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                    errors = tool_result[RVSEC_ERRORS]
                    # errors contem uma lista de erros unicos ... EX: o erro XXX pode aparecer N vezes no log, mas so conta como um XXX
                    for error in errors:
                        categories = get_categories(apps_fdroid_by_apk[apk])
                        for cat in categories:
                            data[cat]["errors_total"] = data[cat]["errors_total"] + 1
                            data[cat]["errors"].add(error)

categories = []
for cat in data:
    categories.append([cat, data[cat]["cont"], data[cat]["errors_total"], len(data[cat]["errors"])])

print("Quantidade de categorias: {}".format(len(categories)))

df = pd.DataFrame(categories, columns=['Category', 'Cont', 'Errors_total', 'errors'])

df1 = df.sort_values(by=['Cont'], ascending=False, ignore_index=True)
print(df1)

df2 = df.sort_values(by=['Errors_total'], ascending=False, ignore_index=True)
print(df2)

df2.plot(x="Category", y='Errors_total', kind='bar', title='titulo', grid=True, xlabel="category", ylabel="quantidade total de erros", legend=False)
plt.show()

### Aplicativos por min_sdk, target_sdk, lastUpdated

In [None]:
fdroid = read_fdroid(planilha_fdroid_path)

data = []
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        if len(result[apk][RVSEC_ERRORS]) > 0:
            data.append([int(fdroid[apk]["min_sdk"]), int(fdroid[apk]["target_sdk"]), pd.to_datetime(fdroid[apk]["lastUpdated"]) ])

df = pd.DataFrame(data, columns=['min', 'target', 'update'])

print(df['min'].describe())
print(df['target'].describe())
print(df['update'].describe())
#print("lastUpdated (median): {}".format(df['update'].median()))

### Aplicativos (com erro) por min_sdk

In [None]:
min_sdk_bruto = {}
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        if len(result[apk][RVSEC_ERRORS]) > 0:
            tmp = int(fdroid[apk]["min_sdk"])
            if tmp not in min_sdk_bruto:
                min_sdk_bruto[tmp] = 0
            min_sdk_bruto[tmp] = min_sdk_bruto[tmp] + 1


min_sdk = []
for tmp in min_sdk_bruto:
    value = min_sdk_bruto[tmp]
    min_sdk.append([tmp, value, ((value*100)/total_apks_with_errors)])
df = pd.DataFrame(min_sdk, columns=['min', 'cont', 'pct'])
# print(df.describe())

df1 = df.sort_values(by=['cont'], ascending=False, ignore_index=True)
df1.plot(x="min", y='cont', kind='bar', title='titulo', grid=True, xlabel="category",
         ylabel="quantidade", legend=False)

df2 = df.sort_values(by=['pct'], ascending=False, ignore_index=True)
df2.plot(x="min", y='pct', kind='bar', title='titulo', grid=True, xlabel="category",
         ylabel="percentual (do total com erro)", legend=False)

df3 = df.sort_values(by=['min'], ascending=False, ignore_index=True)
df3.plot(x="min", y='pct', kind='bar', title='titulo', grid=True, xlabel="category",
         ylabel="percentual (do total com erro)", legend=False)

#hist = df["min"].hist()#bins=10)
#plt.title('Histogram for Length Column') 
#plt.xlabel('min_sdk') 
#plt.ylabel('Frequency') 

plt.show()

### Aplicativos (com erro) por lastUpdated

In [None]:
updated_bruto = {}
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        if len(result[apk][RVSEC_ERRORS]) > 0:
            tmp = pd.to_datetime(fdroid[apk]["lastUpdated"])
            year = tmp.year
            if year not in updated_bruto:
                updated_bruto[year] = 0
            updated_bruto[year] = updated_bruto[year] + 1

updated = []
for tmp in updated_bruto:
    value = updated_bruto[tmp]
    updated.append([tmp, value, ((value * 100) / total_apks_with_errors)])
df = pd.DataFrame(updated, columns=['updated', 'cont', 'pct'])
# print(df.describe())
df1 = df.sort_values(by=['cont'], ascending=False, ignore_index=True)
df1.plot(x="updated", y='cont', kind='bar', title='titulo', grid=True, xlabel="ano",
         ylabel="quantidade", legend=False)
df2 = df.sort_values(by=['pct'], ascending=False, ignore_index=True)
df2.plot(x="updated", y='pct', kind='bar', title='titulo', grid=True, xlabel="ano",
         ylabel="percentual (do total com erro)", legend=False)
df3 = df.sort_values(by=['updated'], ascending=False, ignore_index=True)
df3.plot(x="updated", y='pct', kind='bar', title='titulo', grid=True, xlabel="ano",
         ylabel="percentual (do total com erro)", legend=False)

plt.show()

# Cobertura

### Media total de coberturas ...

In [None]:
activity = []
method = []
mop = []
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    summary = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool][SUMMARY]
                    activity.append(summary[ACTIVITIES_COVERAGE])
                    method.append(summary[METHOD_COVERAGE])
                    mop.append(summary[METHODS_JCA_COVERAGE])
print("ATIVIDADE: {}".format(np.mean(np.array(activity))))                    
print("METODO: {}".format(np.mean(np.array(method)))) 
print("METODO_JCA: {}".format(np.mean(np.array(mop)))) 

### Media de cobertura por aplicativo

In [None]:
data = {}
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        data[apk] = {'activity': [],
                     'method': [],
                     'mop': []}
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    summary = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool][SUMMARY]
                    data[apk]['activity'].append(summary[ACTIVITIES_COVERAGE])
                    data[apk]['method'].append(summary[METHOD_COVERAGE])
                    data[apk]['mop'].append(summary[METHODS_JCA_COVERAGE])
coverage_by_apk = []
for apk in data:
    coverage_by_apk.append([apk, np.mean(np.array(data[apk]['activity'])), np.mean(np.array(data[apk]['method'])), np.mean(np.array(data[apk]['mop']))])

df = pd.DataFrame(coverage_by_apk, columns=['apk', 'activity', 'method', 'mop'])

df = df.sort_values(by=['activity'], ascending=False, ignore_index=True)
#print(df)

# df.plot()
# df["activity"].plot(kind='hist')
# df.plot(x="apk", y=['activity', 'method', 'mop'])
df.query('activity > 0').sample(n=10).plot(x="apk", y=['activity', 'method', 'mop'], kind='bar')

plt.show()

In [20]:
### Media de cobertura por ferramenta

In [None]:
data = {}
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:        
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    if tool not in data:
                        data[tool] = {'activity': [],
                         'method': [],
                         'mop': []}
                    summary = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool][SUMMARY]
                    data[tool]['activity'].append(summary[ACTIVITIES_COVERAGE])
                    data[tool]['method'].append(summary[METHOD_COVERAGE])
                    data[tool]['mop'].append(summary[METHODS_JCA_COVERAGE])
coverage_by_apk = []
for tool in data:
    coverage_by_apk.append([tool, np.mean(np.array(data[tool]['activity'])), np.mean(np.array(data[tool]['method'])), np.mean(np.array(data[tool]['mop']))])
#    coverage_by_apk.append([tool, np.median(np.array(data[tool]['activity'])), np.median(np.array(data[tool]['method'])), np.median(np.array(data[tool]['mop']))])

df = pd.DataFrame(coverage_by_apk, columns=['tool', 'activity', 'method', 'mop'])

df = df.sort_values(by=['activity'], ascending=False, ignore_index=True)
#print(df)

df.plot(x="tool", y=['activity', 'method', 'mop'], kind='bar')

plt.show()

#Media de cobertura por timeout

In [None]:
data = {}
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                if timeout not in data:
                    data[timeout] = {'activity': [],
                                  'method': [],
                                  'mop': []}
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    summary = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool][SUMMARY]
                    #if summary[ACTIVITIES_COVERAGE] > 0:
                    data[timeout]['activity'].append(summary[ACTIVITIES_COVERAGE])
                    data[timeout]['method'].append(summary[METHOD_COVERAGE])
                    data[timeout]['mop'].append(summary[METHODS_JCA_COVERAGE])
coverage_by_apk = []
for timeout in data:
    coverage_by_apk.append(
        [int(timeout), np.mean(np.array(data[timeout]['activity'])), np.mean(np.array(data[timeout]['method'])),
         np.mean(np.array(data[timeout]['mop']))])

df = pd.DataFrame(coverage_by_apk, columns=['timeout', 'activity', 'method', 'mop'])

df = df.sort_values(by=['timeout'], ascending=False, ignore_index=True)
# print(df)
# print(df.dtypes)

df.plot(x="timeout", y=['activity', 'method', 'mop'],grid=True, xlabel="timeout", ylabel="media de cobertura (%)", legend=True)
# plt.xticks([60, 120, 180, 300])

plt.show()