In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

## Auto scraping data 

In [2]:
import requests
from bs4 import BeautifulSoup
import json

<IPython.core.display.Javascript object>

In [3]:
# get the lastest version file name
all_version_url = "https://github.com/osquery/osquery-site/tree/source/src/data/osquery_schema_versions"
req = requests.get(all_version_url)

soup = BeautifulSoup(req.text, "html")
all_versions = soup.find_all("a", class_="js-navigation-open Link--primary")
last = all_versions[-1]

<IPython.core.display.Javascript object>

In [4]:
file_name = last.text
data_url = f"https://raw.githubusercontent.com/osquery/osquery-site/source/src/data/osquery_schema_versions/{file_name}"

<IPython.core.display.Javascript object>

In [5]:
data = requests.get(data_url).json()

<IPython.core.display.Javascript object>

In [8]:
# output the latest version json file
with open(f"tables_data/new_schema-{file_name}", "w") as f: 
    #     f.write(data)
    json.dump(data, f)

<IPython.core.display.Javascript object>

# Process graph format data

In [9]:
from functools import reduce

<IPython.core.display.Javascript object>

In [10]:
def process_data(data, os_options):

    # choose which os you want to analyze
    os_tables = list(filter(lambda x: (os_options in x["platforms"]), data))

    # each data : [{hash:(column_name,table_name)]
    tables = [
        [
            {col["name"] + "-" + col["description"]: (col["name"], table["name"])}
            for col in table["columns"]
        ]
        for table in os_tables
    ]
    tables = [x for l in tables for x in l]  # unpacking list of list

    # create hashes set
    unique_hashes = list(
        map(
            lambda x: list(
                map(lambda y: y["name"] + "-" + y["description"], x["columns"])
            ),
            os_tables,
        )
    )
    unique_hashes = list(set([x for l in unique_hashes for x in l]))

    nodes = []
    edges = []

    # nodes list

    # table name
    nodes += [{"id": "t-" + t["name"], "name": t["name"]} for t in os_tables]

    # column name
    nodes += [
        {
            "id": "c-" + list(c.keys())[0],
            "name": list(c.values())[0][0],
            "itemStyle": {"normal": {"color": "blue"}},
        }
        for c in tables
    ]

    # makes nodes only contains unique value
    nodes = reduce(
        lambda acc, elem: acc + [elem] if not elem in acc else acc, nodes, []
    )

    #  edge list

    for h in unique_hashes:
        same_hash_tables = list(
            filter(lambda x: list(x.keys()) == [h], tables)
        )  # 取出相同hash的table資料
        col_id = "c-" + list(same_hash_tables[0].keys())[0]
        for t in same_hash_tables:
            t_id = "t-" + list(t.values())[0][1]
            if {"source": col_id, "target": t_id} not in edges:
                # edge_list
                edges.append({"source": col_id, "target": t_id})

    return nodes, edges

<IPython.core.display.Javascript object>

# Converting & Output
### Convert the data as pyecharts format and ouput as html

In [11]:
import pyecharts.options as opts
from pyecharts.charts import Graph

<IPython.core.display.Javascript object>

In [12]:
with open("tables_data/new_schema-5.3.0.json") as f:
    data = json.load(f)

<IPython.core.display.Javascript object>

In [13]:
(
    Graph(init_opts=opts.InitOpts(width="80vw", height="50vw"))
    .add(
        series_name="",
        nodes=process_data(data, "windows")[0],
        links=process_data(data, "windows")[1],
        layout="force",
        is_roam=True,
        is_focusnode=True,
        is_draggable=True,
        #         is_rotate_label=True,
        label_opts=opts.LabelOpts(is_show=False),
        linestyle_opts=opts.LineStyleOpts(width=0.5, curve=0.3, opacity=0.7),
        gravity=0.3,
        repulsion=60,
    )
    .set_global_opts(title_opts=opts.TitleOpts(title="osquery tables -Windows"))
    .render("osq_tables_windows(git).html")
)

'C:\\Users\\darri\\Desktop\\Projects\\osquery_tables\\osquery_tables-relation\\osq_tables_windows(git).html'

<IPython.core.display.Javascript object>