# Result analysis

You cannot run this as you have not our dataset file


This jupyter notebook aim to parse and aggregate the result of our experiments.
Due to our agreements with the data providers, we can neither provide the source of the experiment neither the results which contains proprietary data.

## Imports

In [2]:
import enum
import pandas as pd
import json
from collections import Counter

## Utility functions and class

In [3]:
FILEPATH_TO_DATASET = "../.local/sanitized-and-merged-dnsviz.txt"

class instr_enum(enum.Enum):
    SYNC_SERVERS = "SYNC_SERVERS"
    GENERATE_KSK = "GENERATE_KSK"
    GENERATE_ZSK = "GENERATE_ZSK"
    GENERATE_KEY_PAIR = "GENERATE_KEY_PAIR"
    GENERATE_DS = "UPLOAD_DS"
    UPLOAD_DS = "UPLOAD_DS"
    SIGN_PARENT = "SIGN_PARENT"
    REMOVE_DS = "REMOVE_DS"
    REVOKE_KEY = "REMOVE_REVOKED_KEY"
    SIGN_ZONE = "SIGN_ZONE"
    REGENERATE_DS = "UPLOAD_DS"
    REDUCING_TTL = "REDUCING_TTL"

class bind_cmd():
    def __init__(self, cmd:instr_enum, priority:int):
        self.cmd = cmd
        self.priority = priority

    def __str__(self):
        return f'{self.cmd.value}_{self.priority}'

def extract_instruction(gt_instrs):
    res = list()
    for i, instr in enumerate(gt_instrs):
        if instr.startswith("Parent zone"):
            continue

        elif "Configure the erroneous servers to pull from the master" in instr:
            res.append(bind_cmd(instr_enum.SYNC_SERVERS, i))
        elif "Generate a new KSK key pair" in instr:
            res.append(bind_cmd(instr_enum.GENERATE_KSK, i))
        elif "Generate a new ZSK key pair" in instr:
            cmd = instr.split("BIND command: ")[1][1:-1]
            res.append(bind_cmd(instr_enum.GENERATE_ZSK, i))
        elif "Generate a new key pair" in instr:
            res.append(bind_cmd(instr.GENERATE_KEY_PAIR, i))
        elif "Generate the corresponding DS record" in instr:
            res.append(bind_cmd(instr_enum.GENERATE_DS, i))
        elif (
            "Upload DS record in the parent zone" in instr
            or "Upload the DS record(s)" in instr
        ):
            res.append(bind_cmd(instr_enum.UPLOAD_DS, i))
        elif (
            "Remove the DS record(s)" in instr
            or "Remove these extraneous DS record(s)" in instr
            or "Remove these incorrect DS record(s)" in instr  # tested
        ):
            res.append(bind_cmd(instr_enum.REMOVE_DS, i))
        elif "remove the revoked dnskey(s)" in instr.lower():
            res.append(bind_cmd(instr_enum.REVOKE_KEY, i))
        elif "Resign the zone" in instr or "Sign the zone" in instr:
            res.append(bind_cmd(instr_enum.SIGN_ZONE, i))
        elif (
            "Resigning the zone should resolve the issue" in instr
            or "Resigning the zone by explicitly setting the iteration count to 0"
            in instr
        ):
            res.append(bind_cmd(instr_enum.SIGN_ZONE, i))
        elif "resign the parent zone which should typically resolve the issue" in instr:
            res.append(bind_cmd(instr_enum.SIGN_ZONE, i))
        elif "Generate the correct DS record(s)" in instr:
            res.append(bind_cmd(instr_enum.REGENERATE_DS, i))
        elif "Upload the correct DS record(s)" in instr:
            res.append(bind_cmd(instr_enum.UPLOAD_DS, i))
        elif "Your record TTL is" in instr and "Your signature TTL is" in instr:
            pass
        elif "reducing your zone/record ttl" in instr.lower():
            res.append(bind_cmd(instr_enum.REDUCING_TTL, i))
            res.append(bind_cmd(instr_enum.SIGN_ZONE, i))

    return list(r.__str__() for r in res)

In [4]:
latex_export = dict()

In [5]:
### Identical as src/utils/commons.py

In [6]:
KEY2ALGO_MAPPING = {
    1: "RSAMD5",
    3: "DSASHA1",
    5: "RSASHA1",
    6: "DSANSEC3SHA1",
    7: "NSEC3RSASHA1",
    8: "RSASHA256",
    10: "RSASHA512",
    13: "ECDSAP256SHA256",
    14: "ECDSAP384SHA384",
    15: "ED25519",
    16: "ED448",
}
DEFAULT_ALGORITHM_TEXT = "ECDSAP256SHA256"
DEFAULT_ALGORITHM_NUMBER = 13
keysize_required_algorithms = {
    "RSAMD5",
    "DSASHA1",
    "RSASHA1",
    "DSANSEC3SHA1",
    "RSASHA1",
    "NSEC3RSASHA1",
    "RSASHA256",
    "RSASHA512",
}
CAT = {
    "DoE": {
        "MISSING_NSEC_FOR_NODATA",
        "MISSING_NSEC_FOR_NXDOMAIN",
        "MISSING_NSEC_FOR_WILDCARD",
        "NO_NSEC_MATCHING_SNAME",
        "NO_NSEC3_MATCHING_SNAME",
        "SNAME_COVERED",
        "SNAME_NOT_COVERED",
        "WILDCARD_COVERED",
        "WILDCARD_NOT_COVERED",
        "EXISTING_NAME_COVERED",
        "INCONSISTENT_NXDOMAIN_ANCESTOR",
        "NO_CLOSEST_ENCLOSER",
        "NEXT_CLOSEST_ENCLOSER_NOT_COVERED",
        "OPT_OUT_FLAG_NOT_SET",
        "EXISTING_TYPE_NOT_IN_BITMAP",
        "REFERRAL_WITHOUT_NS",
        "REFERRAL_WITH_DS",
        "REFERRAL_WITH_SOA",
        "INVALID_NSEC3_HASH",
        "INVALID_NSEC3_OWNER_NAME",
        "LAST_NSEC_NEXT_NOT_ZONE",
        "STYPE_IN_BITMAP",
        "NONZERO_NSEC3_ITERATION_COUNT",
        "UNSUPPORTED_NSEC3_ALGORITHM",
    },
    "RRSIG": {
        "RRSIG_BAD_LENGTH_ECDSA256",
        "RRSIG_BAD_LENGTH_ECDSA384",
        "NO_SEP",
        "SIGNER_NOT_ZONE",
        "RRSIG_LABELS_EXCEED_RRSET_OWNER_LABELS",
        "SIGNATURE_INVALID",
        "MISSING_RRSIG",
        "MISSING_RRSIG_FOR_ALG_DNSKEY",
        "MISSING_RRSIG_FOR_ALG_DS",
    },
    "Server": {
        "SERVER_UNRESPONSIVE_TCP",
        "SERVER_UNRESPONSIVE_UDP",
        "UNABLE_TO_RETRIEVE_DNSSEC_RECORDS",
        "DNSSEC_DOWNGRADE_DO_CLEARED",
        "DNSSEC_DOWNGRADE_EDNS_DISABLED",
        "ERROR_WITH_EDNS",
        "ERROR_WITH_EDNS_FLAG",
        "ERROR_WITH_EDNS_OPTION",
        "ERROR_WITHOUT_REQUEST_FLAG",
        "RECURSION_NOT_AVAILABLE",
        "SERVER_INVALID_RESPONSE_TCP",
        "SERVER_INVALID_RESPONSE_UDP",
        "NOT_AUTHORITATIVE",
        "SERVER_NOT_AUTHORITATIVE",
        "UPWARD_REFERRAL",
        "REFERRAL_FOR_DS_QUERY",
    },
    "TTL": {
        "ORIGINAL_TTL_EXCEEDED_RRSET",
        "ORIGINAL_TTL_EXCEEDED_RRSIG",
        "TTL_BEYOND_EXPIRATION",
    },
    "DNSKEY": {
        "DNSKEY_REVOKED_RRSIG",
        "DNSKEY_BAD_LENGTH_ECDSA256",
        "DNSKEY_BAD_LENGTH_ECDSA384",
        "DNSKEY_MISSING_FROM_SERVERS",
        "DNSKEY_REVOKED_DS",
        "DNSKEY_REVOKED_RRSIG",
        "REVOKED_NOT_SIGNING",
        "DNSKEY_ZERO_LENGTH",
        "NO_TRUST_ANCHOR_SIGNING",
    },
    "Timing": {"INCEPTION_IN_FUTURE", "EXPIRATION_IN_PAST"},
    "DS": {"MISSING_SEP_FOR_ALG", "DIGEST_INVALID"},
    "NS": set([]),
    "SOA": set([]),
    "CNAME": set([]),
}

DNSSECRelatedErrors = []
for key in CAT:
    if key == "Server":
        continue
    for err in CAT[key]:
        if err == "NO_TRUST_ANCHOR_SIGNING" :
            continue
        DNSSECRelatedErrors.append(err)


codes_to_ignore = CAT["Server"]

def data_from_line(line,src=""):
    js = json.loads(line)
    intended = set(js.get("intended_errcodes", [])).intersection(set(DNSSECRelatedErrors))
    generated = set(js.get("generated_errcodes", [])).intersection(set(DNSSECRelatedErrors))
    exception = js.get("exception", None)
    diff = intended - generated - codes_to_ignore
    if len(intended) == 0:
        intended = None
    if len(generated) == 0:
        generated = None
    if len(diff) == 0:
        if exception is not None:
            diff.add("EXCEPTION_IN_CODE")
        else:
            diff = None
    res =  {"id": int(js.get("id")),
                  "name" : js.get("zone_name"),
                  "intended" : sorted(list(intended)) if intended is not None else None,
                  "generated": sorted(list(generated)) if generated is not None else None ,
                  "difference" : sorted((list(diff))) if diff is not None else None,

                    "exceptions": sorted(list(exception)) if exception is not None else None ,
                  "diff_str":json.dumps(sorted(list(diff)) if diff is not None else []),
                  "number_of_steps_to_fix": js.get("fix_itterations", None),
            "src":src
            }
    for i, fix in enumerate(js.get("fix_transition_errcodes", [])):
        res[f'after_fix_{i+1}'] = fix["errors_after_fix"] if len(fix["errors_after_fix"]) > 0 else None
        fix = fix["fixes"] if len(fix["fixes"]) > 0 else None
        cmd = list()
        if fix is not None:
            for j,d in enumerate(fix):
                ins = d.get("instructions", [])
                cmd.extend([f"{j}_{e}" for e in extract_instruction(ins)])
        sorted_by_fix_step = [ "_".join(e.split("_")[1:]) for e in sorted(cmd, key=lambda x: int(x.split("_")[0]))]
        unique_sorted_by_fix_step = []
        for e in sorted_by_fix_step:
            if e not in unique_sorted_by_fix_step:
                unique_sorted_by_fix_step.append(e)
        sorted_by_cmd = [ "_".join(e.split("_")[:-1]) for e in sorted(unique_sorted_by_fix_step, key=lambda x: int(x.split("_")[-1]))]
        executed_sort = []
        for e in sorted_by_cmd:
            if e not in executed_sort:
                executed_sort.append(e)
        if len(executed_sort) > 0:
            res[f"cmd_{i+1}"] = str(executed_sort)
    return res


# Importing dataset results

In [7]:
datas = []
nb_line = 0
with open(FILEPATH_TO_DATASET) as f: # OK
    for line in f:
        datas.append(data_from_line(line, src="dnsviz-full"))
        nb_line += 1

df = pd.DataFrame(datas)

Dataframe manipulations

In [8]:
df["generated_str"] = df["generated"].apply(lambda x : str(x))
df["intended_str"] =df["intended"].apply(lambda x : str(x))
df["cmd_1_str"] = df["cmd_1"].apply(lambda x : str(x))
df["cmd_2_str"] = df["cmd_2"].apply(lambda x : str(x))
df["cmd_3_str"] = df["cmd_3"].apply(lambda x : str(x))
df["cmd_4_str"] = df["cmd_4"].apply(lambda x : str(x))
#df.replace("['SYNC_SERVERS', 'SIGN_ZONE']", "['SIGN_ZONE', 'SYNC_SERVERS']", inplace=True)

# Those commands are the same, as whe sign zone when we remove a ds.

df.replace("['REMOVE_DS', 'SIGN_ZONE']", "['REMOVE_DS']", inplace=True)
df.replace("['REMOVE_DS', 'SIGN_ZONE', 'SYNC_SERVERS']", "['REMOVE_DS', 'SYNC_SERVERS']", inplace=True)

df["only_nzic"] = df["intended"].apply(lambda x: False if x is None and str(x) != "['NONZERO_NSEC3_ITERATION_COUNT']" else True)
df["not_only_nzic"] = df["intended"].apply(lambda x: True if x is not None and str(x) != "['NONZERO_NSEC3_ITERATION_COUNT']" else False)

df_nzic_only = df[df["only_nzic"] == True ]
df_not_nzic_only = df[df["not_only_nzic"] == True]

## Extracted information used in Dynamic Numbers

### Extract simple numbers (not complex DataFrame manipulation)

In [9]:
total_with_error = df[df["intended"].notna()]["id"].size
total_with_generation = df[df["generated"].notna()]["id"].size
total_where_all_generated = df[(df["generated"].notna()) & (df["difference"].isna())]["id"].size
total_with_error_nssic = df_not_nzic_only["id"].size
total_with_generation_nssic = df_not_nzic_only[(df_not_nzic_only["generated"].notna())]["id"].size
total_where_all_generated_nssic = df_not_nzic_only[(df_not_nzic_only["generated"].notna()) & df_not_nzic_only["difference"].isna()]["id"].size

set_of_errors = set()
for errors in df[df["intended"].notna()]["intended"]:
    for err in errors:
        set_of_errors.add(err)

set_of_generation_errors = set()
for errors in df[df["generated"].notna()]["generated"]:
    for err in errors:
        set_of_generation_errors.add(err)

current_df = df

cannot_fix_with_nzsic = current_df[(current_df["intended"].notna()) &
   ((current_df["difference"].isna())) &
    ( (current_df["cmd_1"].isna()) |
      (current_df["after_fix_1"].notna()) & (current_df["cmd_2"].isna()) |
      (current_df["after_fix_2"].notna()) & (current_df["cmd_3"].isna()) |
      (current_df["after_fix_3"].notna()) & (current_df["cmd_4"].isna()) |
      (current_df["after_fix_4"].notna())
      )
].shape[0]

current_df = df_not_nzic_only

cannot_fix_without_nzsic = current_df[(current_df["intended"].notna()) &
    ((current_df["difference"].isna())) &
    ( (current_df["cmd_1"].isna()) |
      (current_df["after_fix_1"].notna()) & (current_df["cmd_2"].isna()) |
      (current_df["after_fix_2"].notna()) & (current_df["cmd_3"].isna()) |
      (current_df["after_fix_3"].notna()) & (current_df["cmd_4"].isna()) |
      (current_df["after_fix_4"].notna())
      )
].shape[0]

latex_export["nb_files_with_errors"] = total_with_error
latex_export["nb_were_we_generated_errors"] = total_with_generation
latex_export["nb_where_generated_errors_covers_intended"] = total_where_all_generated
latex_export["nb_files_with_errors_nnsic"] = total_with_error_nssic
latex_export["nb_were_we_generated_errors_nnsic"] = total_with_generation_nssic
latex_export["nb_where_generated_errors_covers_intended_nnsic"] = total_where_all_generated_nssic
latex_export["errors_type_encountered"] =  len(set_of_errors)
latex_export["errors_type_generated"] =  len(set_of_generation_errors)

latex_export["case_cannot_be_fixed"] = cannot_fix_with_nzsic
latex_export["case_cannot_be_fixed_without_nzic"] =  cannot_fix_without_nzsic
latex_export["errors_combination"] =  len(set(df[df["intended"].notna()]["intended"].apply(lambda x: str(x))))
latex_export["errors_combination_fully_covered"] =  len(set(df[df["difference"].notna()]["intended"].apply(lambda x: str(x))))
latex_export["errors_combination_covered"] =  len(set(df[df["generated"].notna()]["intended"].apply(lambda x: str(x))))


### Complex DataFrame manipulation


### DFixer evaluation (Table 6)

In [10]:
# Pandas cannot do comparison on lis (non hashable)

def str_to_list(l):
    res= []

    js = json.loads(l.replace("'", '"'))
    for e in js:
        res.append(e)

    return res

current_df = df_not_nzic_only.copy()
current_df = current_df[current_df["difference"].isna()]



cc1 = Counter(
    list(
        current_df[(current_df["generated"].notna())& (current_df["cmd_1"].notna())]["cmd_1"].apply(lambda x :str_to_list(x)).explode()
    )
)

cc2 = Counter(
    list(
        current_df[(current_df["after_fix_1"].notna())& (current_df["cmd_2"].notna())]["cmd_2"].apply(lambda x :str_to_list(x)).explode()
    )
)

cc3 = Counter(
    list(
        current_df[(current_df["after_fix_2"].notna())& (current_df["cmd_3"].notna())]["cmd_3"].apply(lambda x :str_to_list(x)).explode()
    )
)

cc4 = Counter(
    list(
        current_df[(current_df["after_fix_3"].notna())& (current_df["cmd_4"].notna())]["cmd_4"].apply(lambda x :str_to_list(x)).explode()
    )
)

c1 = dict()
nb_c1 = 0
for obj in cc1.most_common():
    c1[str(obj[0])] = obj[1]
    nb_c1 += obj[1]

c2 = dict()
nb_c2 = 0
for obj in cc2.most_common():
    c2[str(obj[0])] = obj[1]
    nb_c2 += obj[1]

c3 = dict()
nb_c3 = 0
for obj in cc3.most_common():

    c3[str(obj[0])] = obj[1]
    nb_c3 += obj[1]

c4 = dict()
nb_c4 = 0
for obj in cc4.most_common():
    c4[str(obj[0])] = obj[1]
    nb_c4 += obj[1]

# Select features to show in the table (10 most commons fixes)
to_show = set()
to_show = to_show.union(set([str(r[0]) for r in  cc1.most_common(10) ]))
'''to_show = to_show.union(set([str(r[0]) for r in  cc2.most_common(10) ]))
to_show = to_show.union(set([str(r[0]) for r in  cc3.most_common(10) ]))
to_show = to_show.union(set([str(r[0]) for r in  cc4.most_common(10) ]))'''
#to_show.add("nan")

total = set()
total = total.union(set([str(r[0] if r[0] is not None or str(r[0]) == "nan" else "nan") for r in  cc1.most_common() ]))
total = total.union(set([str(r[0]) for r in  cc2.most_common() ]))
total = total.union(set([str(r[0]) for r in  cc3.most_common() ]))
total = total.union(set([str(r[0]) for r in  cc4.most_common() ]))



c = {"cmd_1": c1, "cmd_2": c2, "cmd_3": c3, "cmd_4": c4}

res = dict()

other = {"cmd" : "other", "cmd_1": 0 , "cmd_2": 0, "cmd_3":0, "cmd_4":0}
for command in ["cmd_1", "cmd_2", "cmd_3", "cmd_4"]:
    for k in total:
        if k not in to_show:
            try:
                other[command] += c[command].get(k, 0)
            except Exception as e:
                print()
                print(k, e)
                raise e
        else:
            if k not in res :
                res[k] = dict()
            res[k][command] = c[command].get(k)

datas = []
for k in res.keys():
    tmp = res[k]
    if k is None or k == "nan":
        tmp["cmd"] = "No command to apply"
    else :
        tmp["cmd"] = k
    datas.append(tmp)


#datas.append({"cmd" : "total", "cmd_1": sc1, "cmd_2": sc2, "cmd_3": sc3, "cmd_4": sc4})
#datas.append(other)
df2 = pd.DataFrame(datas)
df2 = df2.fillna(0)

df2.cmd_1 = df2.cmd_1.astype(int)
df2.cmd_2 = df2.cmd_2.astype(int)
df2.cmd_3 = df2.cmd_3.astype(int)
df2.cmd_4 = df2.cmd_4.astype(int)


sc1 = df2["cmd_1"].sum()
sc2 = df2["cmd_2"].sum()
sc3 = df2["cmd_3"].sum()
sc4 = df2["cmd_4"].sum()


df2["c1p"] = df2["cmd_1"].apply(lambda x : round((x/sc1*100),2) )
df2["c2p"] = df2["cmd_2"].apply(lambda x : round((x/sc2*100),2) )
df2["c3p"] = df2["cmd_3"].apply(lambda x : round((x/sc3*100),2) )
df2["c4p"] = df2["cmd_4"].apply(lambda x : round((x/sc4*100),2) )


df2.sort_values("cmd_1", ascending=False, inplace=True)
df3 = pd.DataFrame([{"cmd" : "total", "cmd_1": sc1, "cmd_2": sc2, "cmd_3": sc3, "cmd_4": sc4}])
df3 = pd.concat([df2, df3])
df3.to_csv("generated/steps_with_fixes.csv")

print("NumberOfStep1 = " , sc1)
print("NumberOfStep2 = " , sc2)
print("NumberOfStep3 = " , sc3)
print("NumberOfStep4 = " , sc4)
latex_export["pct_sign_first_iter"] = list(df3[df3["cmd"] == "SIGN_ZONE" ]["c1p"])[0]
latex_export["pct_sign_second_iter"] = list(df3[df3["cmd"] == "SIGN_ZONE" ]["c2p"])[0]
latex_export["pct_sign_third_iter"] = list(df3[df3["cmd"] == "SIGN_ZONE" ]["c3p"])[0]
latex_export["pct_removeds_first_iter"] = list(df3[df3["cmd"] == "REMOVE_DS" ]["c1p"])[0]
latex_export["pct_removeds_second_iter"] = list(df3[df3["cmd"] == "REMOVE_DS" ]["c2p"])[0]
latex_export["pct_removeds_third_iter"] = list(df3[df3["cmd"] == "REMOVE_DS" ]["c3p"])[0]


NumberOfStep1 =  149773
NumberOfStep2 =  15387
NumberOfStep3 =  1846
NumberOfStep4 =  36


In [11]:
df3

Unnamed: 0,cmd_1,cmd_2,cmd_3,cmd_4,cmd,c1p,c2p,c3p,c4p
0,62406,13845,1148,7,SIGN_ZONE,41.67,89.98,62.19,19.44
6,46242,1319,668,29,REMOVE_DS,30.87,8.57,36.19,80.56
1,14066,117,12,0,UPLOAD_DS,9.39,0.76,0.65,0.0
4,13148,83,0,0,GENERATE_KSK,8.78,0.54,0.0,0.0
5,11391,0,0,0,SYNC_SERVERS,7.61,0.0,0.0,0.0
7,1491,0,0,0,GENERATE_ZSK,1.0,0.0,0.0,0.0
2,947,1,0,0,REDUCING_TTL,0.63,0.01,0.0,0.0
3,82,22,18,0,REMOVE_REVOKED_KEY,0.05,0.14,0.98,0.0
0,149773,15387,1846,36,total,,,,


# Export to the file LaTexData.json for use in DynamicNumbers notebook

In [12]:
latex_export["nb_line"] = nb_line

with open("generated/LaTexData.json", "w") as fp:
    json.dump(latex_export, fp, indent="    " )