In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import json
import time
import glob 
import os
import pandas as pd
import numpy as np
import ray
import h2o
h2o.init(nthreads=70, max_mem_size="250G")
#ray.init(num_cpus=29, ignore_reinit_error=True)
import qgrid

from tree import generalize, create_tree_dirs

# Analysis

In [None]:
latest_res = max(glob.glob("data/res-*"), key=os.path.getctime)
res = pd.read_pickle(latest_res)
res.info()

In [None]:
#qgrid.show_grid(res, show_toolbar=True)

## Error analysis

## Generalize data

In [None]:
res.loc[res["url_id"] == 0]

In [None]:
# remove all retested ones (error analysis is done above, if we have two identical tests with different outcomes, this is a problem for the decision trees as they cannot achieve 100% accuracy anymore)
res =  res.loc[res["retest"] == False]

In [None]:
res.loc[(res["inc_method"]=="img") & (res["browser_id"] == 1)][['Status-Code', 'body',
       'X-Content-Type-Options', 'X-Frame-Options', 'Content-Type',
       'Content-Disposition', 'Cross-Origin-Resource-Policy',
       'Cross-Origin-Opener-Policy', 'Location', 'op_el_width', 'event_set']].sample(frac=0.1)

In [None]:
res.loc[(res["inc_method"]=="img") & (res["browser_id"] == 1) & (res["Content-Disposition"] == "attachment") & (res["body"] == "ecocnt_img=width=50,height=50,type=png")][["url_id", "op_el_width"]]

In [None]:
# Downloadbar height is buggy in chromium (the automation bar is flaky), the size of the automation size is 44 max, so we can do a binary split there
res["gp_download_bar_height"] = res["gp_download_bar_height"].astype("int")
res["gp_download_bar_height_bin"] = res["gp_download_bar_height"].apply(lambda x: True if x > 45 else False)
res["gp_download_bar_height_bin"].describe()

In [None]:
# Smooth securitypoicyviolation, in firefox the current URL is included 
res["gp_securitypolicyviolation"] = res["gp_securitypolicyviolation"].apply(lambda x: "URL: https://172.17.0.1:44300/leaks/<redacted>/noauth/ Directive: default-src StatusCode: 0" if "noauth" in x else x)

In [None]:
ray.shutdown()

In [None]:
res.groupby(["inc_method", "browser"])["body"].count()

In [None]:
# Firefox "error/suspend" is buggy, merge it with error as on attacker can just only listen to the error event which is distinct from loadedmetadata in firefox
res["event_set_smooth"] = res["event_set"].apply(lambda x: "['error']" if x == "['error', 'suspend']" else x)
res.groupby("browser")["event_set_smooth"].value_counts()

In [None]:
# Only use the most important StatusCodes: https://www.ionos.com/digitalguide/hosting/technical-matters/the-most-important-http-status-codes-at-a-glance/
#res = res.loc[res["Status-Code"].isin([200, 301, 302, 400, 403, 404, 500, 502, 503, 504])]

# Changes from the default config
# https://docs.h2o.ai/h2o/latest-stable/h2o-docs/data-science/drf.html
config = {
    "h2o_jar": "/home/xsleaker/h2o-3.32.1.3/h2o.jar",
    "base_dir": "/data/data/main/analysis/trees/group_size",
    "ntrees": 1,
    "max_depth": 0, # Limit the depth of the tree (0: unlimited)
    "min_rows": 1, # Minimum number of rows for a leaf node
    "stopping_rounds": 0, 
    "seed": 29,
    "mtries": -2, 
    "sample_rate": 1,
    "min_split_improvement": 0,
}

In [None]:
# Create decision trees and info df/json, for every test_property with the given prediction properties
# For every test_property one global one, and then one for every inclusion method and then one for every inclusion method x browser
import warnings
warnings.filterwarnings("ignore", "Dropping bad") # Ignore the warning that some columns are constant (they will just be ignored)
warnings.filterwarnings("ignore", "Sample rate") # Ignore that we do not have a test dataset (this is what we want)

create_tree_dirs(res.browser_id.unique(), config)

test_properties = ["event_set", "gp_window_onerror", "gp_window_onblur", "gp_window_postMessage", "gp_window_getComputedStyle",
                   "gp_window_hasOwnProperty", "gp_download_bar_height_bin", "gp_securitypolicyviolation",
                   "op_el_height", "op_el_width", "op_el_naturalHeight", "op_el_naturalWidth",
                   "op_el_videoWidth", "op_el_videoHeight", "op_el_duration", "op_el_networkState",
                   "op_el_readyState", "op_el_buffered", "op_el_paused", "op_el_seekable",
                   "op_el_sheet", "op_el_media_error", "op_el_contentDocument", 
                   "op_frame_count", "op_win_window", "op_win_CSS2Properties", "op_win_origin",
                   "op_win_opener", "op_win_history_length", "event_set_smooth", "event_list"]
prediction_properties = ["Status-Code", "body", "X-Content-Type-Options", 
                         "X-Frame-Options", "Content-Type", "Content-Disposition", "Cross-Origin-Resource-Policy",
                         "Cross-Origin-Opener-Policy", "Location", "browser_id", "inc_method"]
inc_methods = res.inc_method.cat.categories
# inc_methods = ["window.open"]



start = time.time()
gen_ids = []
#res_id = ray.put(res)
for test_property in test_properties:
    # gen_ids.append(generalize.remote(res_id, [test_property], prediction_properties, inc_methods, overwrite=True, log=True, config=config))
    gen_ids.append(generalize(res, [test_property], prediction_properties, inc_methods, overwrite=True, log=True, config=config))


# data = ray.get(gen_ids)
data = gen_ids
took = time.time() - start
print(f"took {took} seconds, dot to svg might still be running!")

In [None]:
gen_dict = {}
for dic in data:
    key = list(dic.keys())[0]
    gen_dict[key] = dic[key]

In [None]:
print(json.dumps(gen_dict, indent=4))

In [None]:
reform = {(level1_key, level2_key, level3_key): values
          for level1_key, level2_dict in gen_dict.items()
          for level2_key, level3_dict in level2_dict.items()
          for level3_key, values      in level3_dict.items()}
df = pd.DataFrame(reform).T
df.index = df.index.set_names(["test_property", "inc_method", "browser"])
#qgrid.show_grid(df.reset_index(), show_toolbar=True)  # qgrid has some problems with multiindex
df

In [None]:
tab = df["unique_count"].unstack("inc_method").reset_index()
tab = tab.loc[tab["browser"].isin([1, 2])]
tab["browser"] = tab["browser"].replace({1: "Firefox", 2: "Chrome"})
tab = tab.set_index(["test_property", "browser"])
#display(tab)
tab = tab.where(tab < 2, "\checkmark")
tab = tab.replace({1: "\times"})
#display(tab)
tab = tab.drop("event_set_smooth")
tab = tab.reset_index()
tab["test_property"] = tab["test_property"].str.replace("_", "-")
tab.columns = tab.columns.str.replace("_", "-")
tab = tab.set_index(["test-property", "browser"])

display(tab)
with open("tables/leak_tests.tex", "w") as f:
    f.write(tab.to_latex(escape=False ,header=['\\rotatebox{90}{' + c + '}' for c in tab.columns]))

## Value counts

- Investigate how many different outcomes every leak channel (test_property + inc_method per browser) has
- How likely are the different outcomes?
- How likely it is that two random responses belong to a different group/outcome?
    - Just 1-max value? (Every group has different results, so not getting the max group prob is the prob of getting two different responses if one chooses two??)
    - Problem: our responses space has nothing to do with the real distribution of responses, so this does not make a lot of sense
    - However, it can give more insights into stable/unstable leaks and which work the same??

In [None]:
def expand_frame(row):
    test_property = row.iloc[0]["test_property"]
    inc_method = row.iloc[0]["inc_method"]
    browser = row.iloc[0]["browser"]
    value_counts = pd.DataFrame(row.iloc[0]["value_counts"]).iloc[:-1,:]
    t, i, b, v, c, p = [], [], [], [], [], []
    df = pd.DataFrame(columns=["t", "i", "b", "v", "c", "p"])
    for _, row_v in value_counts.iterrows():
        t.append(test_property)
        i.append(inc_method)
        b.append(browser)
        v.append(row_v.iloc[0])
        c.append(row_v.iloc[1])
        p.append(row_v.iloc[2])
        df.loc[len(df)] = [test_property, inc_method, browser, row_v.iloc[0], row_v.iloc[1], row_v.iloc[2]]
    #return {"t": t, "i": i, "b": b, "v": v, "c": c, "p": p}
    return df

val_counts = df["value_counts"].reset_index().groupby(["browser", "test_property", "inc_method"], group_keys=False).apply(expand_frame)

In [None]:
piv = val_counts.pivot_table(index=["t", "i", "b"], values=["c", "p"], aggfunc="count")
display(piv)
piv = piv.loc[piv["c"] == 1]
# Throw away all rows with only 1 value as they cannot work?
# Throw away all "all" browsers as not really interesting (except for distinguishing browsers)
# 
vals = val_counts.loc[~(val_counts["b"] == "all")]
vals = vals.set_index(["t", "i", "b"])
vals = vals.loc[~(vals.index.isin(piv.index))].reset_index()
with pd.option_context("max_rows", 1000):
    display(vals.set_index(["b", "i", "t"]))
    pass

piv = vals.pivot_table(index=["t", "i", "b"], values=["c", "p"], aggfunc=["max", "min", "count"]).sort_values(("max", "p"))
with pd.option_context("max_rows", 1000):
    display(piv)

In [None]:
# reform = {(level1_key, level2_key, level3_key, level4_key): values
#          for level1_key, level2_dict in check_dict.items()
#          for level2_key, level3_dict in level2_dict.items()
#          for level3_key, level4_dict in level3_dict.items()
#          for level4_key, values      in level4_dict.items()}
# df = pd.DataFrame(reform).T
# df.index = df.index.set_names(["test_property", "inc_method", "browser", "pred_property"])
# qgrid.show_grid(df.reset_index(), show_toolbar=True)  # qgrid has some problems with multiindex

- 

## Playground

In [None]:
# investigate strange results?!, (browsers just behave strange and ignore all kind of headers)
res.loc[(res["op_el_width"] == "50") & (res["browser_id"] == 1)][["op_el_width", "Content-Type", "url_id", "test_id"]]

In [None]:
res.loc[res["test_id"] == 35515]["op_el_width"]

In [None]:
res["url_id"].describe()

In [None]:
# Chromium based-browsers have some problem with about:blank, window.open?

his = res.groupby(["browser_id", "inc_method"])["op_win_history_length"].value_counts().to_frame().rename(columns={"op_win_history_length": "count"})

# qgrid.show_grid(his, show_toolbar=True)
his

In [None]:
#res.query("op_win_history_length == 'error occured' and browser_id == 2 and inc_method == 'window.open'")