In [4]:
import json
import pandas as pd
import matplotlib.pyplot as plt

from constants import EXTENSION_METHODS, RVSEC_ERRORS, REPETITIONS, TIMEOUTS, TOOLS, SUMMARY, METHOD_COVERAGE, \
    METHODS_JCA_COVERAGE, ACTIVITIES_COVERAGE

In [5]:
results_file = "merged_results_analysis.json"
instrument_errors_file = "merged_instrument_errors.json"
is_jca = True

# Totals

In [15]:
total_instrumented = 0
total_instrument_errors = 0

apks_with_errors = set()

with open(results_file, "r") as f:
    result = json.load(f)
    total_instrumented = len(result)
    for apk in result:
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                    errors = tool_result[RVSEC_ERRORS]
                    if len(errors) > 0:
                        apks_with_errors.add(apk)
                        continue
                    

with open(instrument_errors_file, "r") as f:
    result = json.load(f)
    total_instrument_errors = len(result)

total_experiment_apks = total_instrumented + total_instrument_errors
instrumented_pct = (total_instrumented * 100) / total_experiment_apks
apks_com_erros_pct = (len(apks_with_errors) * 100) / total_experiment_apks

print("FDROID: 4162")
print("TOTAL: {} (apks que usam jca)".format(total_experiment_apks))
print("INSTRUMENTADOS: {}".format(total_instrumented))
print("INSTRUMENTADOS (%): {}".format(instrumented_pct))
print("ERROS_INSTRUMENTACAO: {}".format(total_instrument_errors))
print("APKS_COM_ERROS_ENCONTRADOS: {}".format(len(apks_with_errors)))
print("APKS_COM_ERROS_ENCONTRADOS (%): {}".format(apks_com_erros_pct))

FDROID: 4162
TOTAL: 557 (apks que usam jca)
INSTRUMENTADOS: 193
INSTRUMENTADOS (%): 34.64991023339318
ERROS_INSTRUMENTACAO: 364
APKS_COM_ERROS_ENCONTRADOS: 88
APKS_COM_ERROS_ENCONTRADOS (%): 15.798922800718133


In [7]:
def parse_nova_spec(texto: str):
    s01 = texto.split(":::")
    tmp = s01[0].split("(")[0]

    last_dot_idx = tmp.rfind('.')
    clazz = tmp[:last_dot_idx].strip()
    method = tmp[last_dot_idx+1:].strip()

    a = s01[1].strip()
    spec = a.split(" ")[0].strip()
    msg = a.strip()

    return clazz, method, spec, msg

def parse_jca(texto: str):
    s01 = texto.split(",")
    spec = s01[0].strip()
    clazz = s01[1].strip()
    method = s01[3].strip()

    x = s01[4]
    idx = texto.rfind(x)
    msg = texto[idx+len(x)+1:].strip()

    return clazz, method, spec, msg

# Quantidade de erros por SPEC

In [8]:
with open(results_file, "r") as f:
    data = {}
    result = json.load(f)
    for apk in result:
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                    errors = tool_result[RVSEC_ERRORS]
                    for error in errors:
                        if is_jca:
                            clazz, method, spec, msg = parse_jca(error)
                        else:
                            clazz, method, spec, msg = parse_nova_spec(error)
                        
                        if spec not in data:
                            data[spec] = {"errors": set(),
                                         "total": 0}
                        data[spec]["total"] = data[spec]["total"] + 1
                        data[spec]["errors"].add(msg)

print("Quantidade de SPECS que encontraram algum erro: {}".format(len(data)))

specs_totals = []
for spec in data:
    specs_totals.append([spec, data[spec]["total"], len(data[spec]["errors"]) ])

df = pd.DataFrame(specs_totals, columns=['Spec', 'Cont', 'Errors'])
df = df.sort_values(by=['Cont'], ascending=False, ignore_index=True)
print(df)

print("**** Erros unicos por spec:")
df = df.sort_values(by=['Errors'], ascending=False, ignore_index=True)
print(df)
for ind in df.index:
    spec =  df['Spec'][ind]
    print("{} ({})".format(spec, len(data[spec]["errors"])))
    errors = sorted(list(data[spec]["errors"]))
    for err in errors:
        print("\t- {}".format(err))

Quantidade de SPECS que encontraram algum erro: 15
                       Spec  Cont  Errors
0            SSLContextSpec  4410       3
1         MessageDigestSpec  4343       7
2         SecretKeySpecSpec  1766       2
3                CipherSpec  1012       1
4              KeyStoreSpec   823       2
5       IvParameterSpecSpec   482       2
6          SecureRandomSpec   248       1
7                   MacSpec   182       1
8                PBEKeySpec   160       4
9   TrustManagerFactorySpec   154       2
10            SignatureSpec    63       1
11         PBEParameterSpec    40       2
12    KeyManagerFactorySpec    39       1
13              KeyPairSpec    20       1
14           PBEKeySpecSpec    20       1
**** Erros unicos por spec:
                       Spec  Cont  Errors
0         MessageDigestSpec  4343       7
1                PBEKeySpec   160       4
2            SSLContextSpec  4410       3
3         SecretKeySpecSpec  1766       2
4              KeyStoreSpec   823      

# Quantidade de erros por APK

In [9]:
with open(results_file, "r") as f:
    data = {}
    result = json.load(f)
    for apk in result:
        if apk not in data:
            data[apk] = {"errors": set(),
                                         "total": 0}
        for rep in result[apk][REPETITIONS]:
            for timeout in result[apk][REPETITIONS][rep][TIMEOUTS]:
                for tool in result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS]:
                    tool_result = result[apk][REPETITIONS][rep][TIMEOUTS][timeout][TOOLS][tool]
                    errors = tool_result[RVSEC_ERRORS]
                    for error in errors:
                        if is_jca:
                            clazz, method, spec, msg = parse_jca(error)
                        else:
                            clazz, method, spec, msg = parse_nova_spec(error)
               
                        data[apk]["total"] = data[apk]["total"] + 1
                        data[apk]["errors"].add(msg)

print("Quantidade de APKS que encontraram algum erro: {}".format(len(data)))

specs_totals = []
for spec in data:
    specs_totals.append([spec, data[spec]["total"], len(data[spec]["errors"]) ])

df = pd.DataFrame(specs_totals, columns=['Spec', 'Cont', 'Errors'])
df = df.sort_values(by=['Cont'], ascending=False, ignore_index=True)
print(df)

print("**** Erros unicos por APK:")
df = df.sort_values(by=['Errors'], ascending=False, ignore_index=True)
print(df)
for ind in df.index:
    spec =  df['Spec'][ind]
    print("{} ({})".format(spec, len(data[spec]["errors"])))
    errors = sorted(list(data[spec]["errors"]))
    for err in errors:
        print("\t- {}".format(err))

Quantidade de APKS que encontraram algum erro: 193
                                       Spec  Cont  Errors
0    org.mosad.seil0.projectlaogai_6000.apk  1634       6
1      io.github.domi04151309.home_1100.apk  1216       4
2           eu.bubu1.fdroidclassic_1110.apk   420       3
3        net.sf.andhsli.hotspotlogin_20.apk   400       3
4            fr.kwiatkowski.ApkTrack_24.apk   355       3
..                                      ...   ...     ...
188  com.soumikshah.investmenttracker_3.apk     0       0
189      com.Bisha.TI89EmuDonation_1133.apk     0       0
190     com.andybotting.tramhunter_1300.apk     0       0
191     com.aidinhut.simpletextcrypt_14.apk     0       0
192        net.momodalo.app.vimtouch_25.apk     0       0

[193 rows x 3 columns]
**** Erros unicos por APK:
                                            Spec  Cont  Errors
0         org.mosad.seil0.projectlaogai_6000.apk  1634       6
1                          com.akop.bach_120.apk   280       6
2        org.

# FDROID

In [16]:
from csv import DictReader

def read_fdroid(planilha_fdroid_path):
    apps = {}
    with open(planilha_fdroid_path, 'r') as f:
        dict_reader = DictReader(f)
        list_of_dict = list(dict_reader)
        for app in list_of_dict:
            app['mop'] = False if app['mop'] == 'No' else True
            app['package'] = False if app['package'] == 'False' else True

            apps[app['file']] = app
    return apps

### Aplicativos que usam o mesmo pacote declarado

In [17]:
planilha_fdroid_path = "fdroid/final_apps_to_download.csv"

fdroid = read_fdroid(planilha_fdroid_path)

cont_same_package = 0
with open(results_file, "r") as f:
    result = json.load(f)
    for apk in result:
        if fdroid[apk]['package']:
            cont_same_package += 1

print("TOTAL APKS: {}".format(total_experiment_apks))
print("APKS_SAME_PACKAGE: {}".format(cont_same_package))

pct = (cont_same_package * 100) / total_experiment_apks
print("APKS_SAME_PACKAGE (%): {}".format(pct))
    

TOTAL APKS: 557
APKS_SAME_PACKAGE: 172
APKS_SAME_PACKAGE (%): 30.879712746858168
