# VirusTotal Behavior with Microsoft Sysmon Detonation

This notebook uses the VirusTotal API to retrieve detonation information
for a file ID (the SHA256 hash of the file).

There are 5 main sections:
- Setup
- Retrieve basic information about the file ID
- Retrieve and browse denotation data for the file
- Build and view the process tree for the suspect behavior
- Generate KQL filter clauses for subsets of the data to use in Azure Sentinel queries.

In [None]:
# If you don't have msticpy (with the vt3 option) installed, you'll need that
%pip install --upgrade msticpy[vt3]

In [1]:
# initialize msticpy
REQ_PYTHON_VER=(3, 6)
REQ_MSTICPY_VER=(1, 0, 0)

from msticpy.nbtools import nbinit
nbinit.init_notebook(
    namespace=globals(),
);

In [3]:
from msticpy.sectools.vtlookupv3 import VTLookupV3
from msticpy.common.provider_settings import get_provider_settings
import nest_asyncio

nest_asyncio.apply()

# Initialize VT Client
vt_settings = get_provider_settings("TIProviders").get("VirusTotal")

vt = VTLookupV3(vt_settings.args.get("AuthKey"))

# VirusTotal Basic File Data

This section retrieves basic VT data for a file hash.

Running the following cell will show a form in which you can enter
a file ID (the SHA256 hash of the file).

Clicking the **Lookup** button will search VirusTotal for a match and 
populate the summary details in the form. Selecting a result attribute (on the left)
shows the data for that attribute.

The file ID entered here is used in subsequent parts of the notebook
to download the behavior information.

In [75]:
import pprint
import ipywidgets as widgets
import pandas as pd

vt_df: pd.DataFrame = None
_NOT_FOUND = "Not found"

border_layout = widgets.Layout(
    **{
        "width": "90%",
        "border": "solid gray 1px",
        "margin": "1pt",
        "padding": "5pt",
    }
)


def _ts_to_pydate(data):
    """Replace Unix timestamps in VT data with Py/pandas Timestamp."""
    for date_col in (col for col in data.columns if col.endswith("_date")):
        data[date_col] = pd.to_datetime(data[date_col], unit="s", utc=True)
    return data


def get_summary(data=None):
    data = data if data is not None else vt_df
    def_dict = {"sha256": "", "meaningful_name": "", "names": "", "magic": ""}
    if data is None:
        return def_dict
    if (
        "first_submission_date" not in data.columns
        or data.iloc[0]["first_submission_date"] == _NOT_FOUND
    ):
        def_dict["sha256"] = _NOT_FOUND
        return def_dict
    return data[["sha256", "meaningful_name", "names", "magic"]].iloc[0].to_dict()


def summary_html(title, summary):
    return f"""
    <h3>{title}</h3>
    <table>
    <tr>
        <td>ID</td><td>{summary.get('sha256')}</td>
    </tr>
    <tr>
        <td>Names</td><td>{summary.get('names')}</td>
    </tr>
    <tr>
        <td>File Type</td><td>{summary.get('magic')}</td>
    </tr>
    </table>
    """


def lookup_file_id(btn):
    del btn
    global vt_df
    vt_df = vt.get_object(txt_file_id.value, vt_type="file").T
    html_header.value = summary_html(basic_title, get_summary(vt_df))
    if (
        "first_submission_date" not in vt_df.columns
        or vt_df.iloc[0]["first_submission_date"] == _NOT_FOUND
    ):
        data_sel.options = []
        return
    vt_df = _ts_to_pydate(vt_df)
    data_sel.options = vt_df.columns
    

data_sel = widgets.Select(
    description="Attribute",
    layout=widgets.Layout(height="400px")
)

data_view = widgets.Textarea(
    description="Value",
    layout=widgets.Layout(height="400px", width="60%")
)


def _display_attribute(change):
    item = change.get("new")
    if item in vt_df.columns:
        data = vt_df.iloc[0][item]
        data_view.value = pprint.pformat(data)
    else:
        data_view.value = ""
    
data_sel.observe(_display_attribute, names="value")


txt_file_id = widgets.Text(
    description="Enter file ID (hash)",
    layout=widgets.Layout(width="70%"),
    style={"description_width": "150px"},
)

btn_lookup = widgets.Button(description="Lookup")
btn_lookup.on_click(lookup_file_id)

basic_title = "VirusTotal File hash lookup"
html_header = widgets.HTML(summary_html(
    basic_title,
    get_summary()
), layout=border_layout)

hb_file_lookup = widgets.HBox([txt_file_id, btn_lookup], layout=border_layout)
hb_vt_attribs = widgets.HBox([data_sel, data_view], layout=border_layout)
display(widgets.VBox([html_header, hb_file_lookup, hb_vt_attribs]))

VBox(children=(HTML(value='\n    <h3>VirusTotal File hash lookup</h3>\n    <table>\n    <tr>\n        <td>ID</â€¦

# Get Microsoft Detonation Details from Virus Total

This section tries to retrieve detonation behavior details from the
file ID selected in the previous cell.

The output is split into groups of related data items such as file operations, registry operations, etc.
Within each tabbed group, there are individual data sets in their own tabs.

The process tree data is included in this data set but is easier to view in the following
section with the process tree viewer.

In [86]:
# Retrieve Behavior results
import requests
import re
from pprint import pformat

vt_root = "https://www.virustotal.com/api/v3"

file_behavior_uri = f"{vt_root}/files/{{id}}/behaviour_summary"
ms_file_behavior_uri = f"{vt_root}/file_behaviours/{{id}}_Microsoft Sysinternals"

hdr = {"headers": {"X-Apikey": vt_settings.args.get("AuthKey")}}


def get_vt_file_behavior(file_id):
    resp = requests.get(file_behavior_uri.format(id=file_id), **hdr)
    if resp.status_code == 200:
        return resp.json().get("data")


def get_vt_file_behavior_mssi(file_id):
    resp = requests.get(ms_file_behavior_uri.format(id=file_id), **hdr)
    if resp.status_code == 200:
        return resp.json().get("data", {})

# Use the Microsoft/Sysmon data
file_behavior = get_vt_file_behavior_mssi(txt_file_id.value)
# Use the aggregated VT behavior
# file_behavior = get_vt_file_behavior(txt_file_id.value)

if "attributes" in file_behavior:
    categories = file_behavior.get("attributes")
    behavior_links = file_behavior.get("links")
    analysis_date = pd.Timestamp(categories["analysis_date"])
else:
    categories = file_behavior
    behavior_links = file_behavior.get("links")
    analysis_date = vt_df.iloc[0].last_analysis_date

_CAT_PATTERNS = {
    "File": "file.*",
    "Process": "process.*|command.*|module.*",
    "Registry": "registry.*",
    "Network": ".*ips|dns.*|.*urls|ip.*|http.*|tls",
    "System": "mutex.*|calls.*|permissions.*|text.*",
    "Other": ".*"
}


def extract_subcats(pattern, categs):
    return {cat for cat in categs if re.match(pattern, cat)}


def data_format(data_item):
    if not data_item:
        return ""
    if isinstance(data_item, list):
        if isinstance(data_item[0], dict):
            return pd.DataFrame(data_item).style.hide_index().render()
        if isinstance(data_item[0], str):
            return pd.DataFrame(pd.Series(data_item)).style.hide_index().render()
    return f"<pre>{pformat(data_item)}</pre>"
        

groupings = {}
remaining_categories = set(categories)
for name, pattern in _CAT_PATTERNS.items():
    groupings[name] = extract_subcats(pattern, remaining_categories)
    remaining_categories = remaining_categories - groupings[name]

main_tab = widgets.Tab()
child_tabs = {}
for group, sub_cats in groupings.items():

    sub_cat_tab = widgets.Tab()
    tab_content = {
        section: widgets.HTML(value=data_format(items))
        for section, items in categories.items()
        if items and section in sub_cats
    }
    sub_cat_tab.children = list(tab_content.values())
    for idx, section in enumerate(tab_content):
        sub_cat_tab.set_title(idx, section)
    child_tabs[group] = sub_cat_tab

main_tab.children = list(child_tabs.values())
for idx, group_name in enumerate(child_tabs):
    main_tab.set_title(idx, group_name)

html_title = widgets.HTML(summary_html(
    "VT Detonation Details",
    get_summary()
), layout=border_layout)
display(widgets.VBox([html_title, main_tab]))


VBox(children=(HTML(value="\n    <h3>VT Detonation Details</h3>\n    <table>\n    <tr>\n        <td>ID</td><tdâ€¦

# Display Process Tree

## The first cell extracts the behavior tree and builds the process tree dataframe.

In [83]:
from copy import deepcopy, copy
import attr
from pathlib import Path


@attr.s(auto_attribs=True)
class SIProcess:

    process_id: str
    name: str
    cmd_line: str
    parent_id: int = -1
    proc_key: str = None
    parent_key: str = None
    path: str = None
    IsRoot: bool = False
    IsLeaf:bool = False
    IsBranch: bool = False
    children: list = []
    # proc_children: list = []
    time_offset: int = 0


def create_si_proc(raw_proc):
    """Return an SIProcess Object from a raw VT proc definition."""
    # raw_proc = copy(raw_proc)
    name = raw_proc.get("name")
    raw_proc["cmd_line"] = name
    for proc in procs_created:
        if name.lower().endswith(proc):
            raw_proc["name"] = procs_created[proc]
            break
    raw_proc["proc_key"] = raw_proc["process_id"] + "|" + raw_proc["name"]
    # print(name, raw_proc.keys())
    return SIProcess(**raw_proc)

    
def extract_processes(process_data, parent=None):
    """Convert processes_tree attribute to SIProcessObjects."""
    procs = []
    for process in process_data:
        si_proc = create_si_proc(process)
        if parent:
            si_proc.parent_key = parent.proc_key
            si_proc.IsBranch = True
        else:
            si_proc.IsRoot = True
        child_procs_raw = process.get("children", [])
        if child_procs_raw:
            si_proc.children = extract_processes(child_procs_raw, parent=si_proc)
        else:
            si_proc.IsLeaf = True
            si_proc.IsBranch = False
        procs.append(si_proc)
    return procs


# Convert to DF
def procs_to_df(procs):
    """Convert the SIProcess objects to a list."""
    df_list = []
    for proc in procs:
        df_list.append(attr.asdict(proc))
        if proc.children:
            df_list.extend(procs_to_df(proc.children))
    return df_list


proc_tree_raw = deepcopy(categories["processes_tree"])
procs_created = {Path(proc).parts[-1].lower(): proc for proc in categories["processes_created"]}

si_procs = extract_processes(proc_tree_raw)
proc_tree_df = pd.DataFrame(procs_to_df(si_procs)).drop(columns="children")
proc_tree_df.head(4)

Unnamed: 0,process_id,name,cmd_line,parent_id,proc_key,parent_key,path,IsRoot,IsLeaf,IsBranch,time_offset
0,1180,"%CONHOST% ""63884270-409785765-1124711401-907951297-11692469461968206932-2068757506-1933156310","%CONHOST% ""63884270-409785765-1124711401-907951297-11692469461968206932-2068757506-1933156310",-1,"1180|%CONHOST% ""63884270-409785765-1124711401-907951297-11692469461968206932-2068757506-1933156310",,,True,True,False,0
1,1136,"%CONHOST% ""-16106042451564202799-1064727954-1124093528-15773684341340790361861259252-869253858","%CONHOST% ""-16106042451564202799-1064727954-1124093528-15773684341340790361861259252-869253858",-1,"1136|%CONHOST% ""-16106042451564202799-1064727954-1124093528-15773684341340790361861259252-869253858",,,True,True,False,0
2,2804,"%CONHOST% ""-40542629874978953113508502101955653726-2142237983-15632340143136557-1740783057","%CONHOST% ""-40542629874978953113508502101955653726-2142237983-15632340143136557-1740783057",-1,"2804|%CONHOST% ""-40542629874978953113508502101955653726-2142237983-15632340143136557-1740783057",,,True,True,False,0
3,2204,"%CONHOST% ""-291157030-44689599519763476846006265941132675325-6938727871027061752-846573854","%CONHOST% ""-291157030-44689599519763476846006265941132675325-6938727871027061752-846573854",-1,"2204|%CONHOST% ""-291157030-44689599519763476846006265941132675325-6938727871027061752-846573854",,,True,True,False,0


In [87]:
# Try to Match up 'command_executions' commandline data with
# processes.
def try_match_commandlines(command_executions, procs_cmds):
    procs_cmd = procs_cmds.copy()
    procs_cmd["cmd_line"] = np.nan
    weak_matches = 0
    for cmd in command_executions:
        for idx, row in procs_cmd.iterrows():
            # print(row["name"], cmd, row["cmd_line"], isinstance(row["cmd_line"], str))
            if (
                not isinstance(row["cmd_line"], str)
                and np.isnan(row["cmd_line"])
                and row["name"] in cmd
            ):
                # print("Found match:", row["name"], "==", cmd)
                procs_cmd.loc[idx, "cmd_line"] = cmd
                break
    for cmd in command_executions:
        for idx, row in procs_cmd.iterrows():
            # print(row["name"], cmd, row["cmd_line"], isinstance(row["cmd_line"], str))
            if (
                not isinstance(row["cmd_line"], str)
                and np.isnan(row["cmd_line"])
                and Path(row["name"]).stem.lower() in cmd.lower()
            ):
                weak_matches += 1
                # print("Found weak match:", row["name"], "~=", cmd)
                procs_cmd.loc[idx, "cmd_line"] = cmd
                break

    if weak_matches:
        print(
            f"WARNING: {weak_matches} of the {len(command_executions)} commandlines",
            "were weakly matched - some commandlines could be attributed",
            "to the wrong process.",
            end="\n"
        )
    return procs_cmd

proc_tree_cmd_df = try_match_commandlines(categories["command_executions"], proc_tree_df)



## Display the tree

In [85]:
from msticpy.sectools.proc_tree_builder import ProcSchema, _build_proc_tree


# Define a schema to map Df names on to internal ProcSchema
proc_schema = {
    "process_name": "name",
    "process_id": "process_id",
    "parent_id": "parent_id",
    "cmd_line": "cmd_line",
    "time_stamp": "time_stamp",
    "logon_id": "logon_id",
    "path_separator": "\\",
    "user_name": "user_name",
    "host_name_column": "host",
    "event_id_column": "event_id",
}

processes = proc_tree_cmd_df
processes["path"] = np.nan
processes.loc[processes.IsRoot, "path"] = processes[processes.IsRoot].index.astype("str")

# Fill in some required fields with placeholder data
processes["time_stamp"] = analysis_date
processes["host"] = "sandbox"
processes["logon_id"] = "na"
processes["event_id"] = "na"
processes["source_index"] = processes.index.astype("str")

proc_tree = processes.set_index("proc_key")

first_unique = proc_tree.index.duplicated()
proc_tree = proc_tree[~first_unique]
# msticpy function to build the tree
process_tree_df = _build_proc_tree(proc_tree)
process_tree_df

process_tree_df.mp_plot.process_tree(schema=ProcSchema(**proc_schema), legend_col="name", hide_legend=True)

(Figure(id='1457', ...), Row(id='1571', ...))

# KQL Query Clause generator
Use this to generate filter clauses to search for the behaviors.
For example, you can extract the IP addresses or command lines and build
a query to search for matches in your organization.

## Select the category of data that you want to use to generate the query.

In [89]:
sel_category = widgets.Select(
    description="Select a category",
    options=list(categories.keys()),
    layout=widgets.Layout(width="30%", height="200px"),
    style={"description_width": "150px"}
)
txt_values = widgets.HTML(layout=widgets.Layout(width="60%", height="300px"),)
vb_sel_category = widgets.HBox([sel_category, txt_values], layout=border_layout)


def update_cat_values(change):
    cat = change.get("new")
    data = categories.get(cat)
    txt_values.value = data_format(data)


sel_category.observe(update_cat_values, names="value")
update_cat_values({"new": sel_category.value})
display(widgets.VBox([
    widgets.HTML("<h3>Select a category to use in the query builder</h3>"),
    vb_sel_category
]))


VBox(children=(HTML(value='<h3>Select a category to use in the query builder</h3>'), HBox(children=(Select(desâ€¦

## Connect to Azure Sentinel

We need to load an Azure Sentinel query provider and authenticate
in order to retrieve the schema.

In [17]:
qry_prov = QueryProvider("AzureSentinel")
qry_prov.connect(WorkspaceConfig())

Please wait. Loading Kqlmagic extension...done
Connecting... 

connected


## Select table/column and the behavior data values for the query

In [91]:
import re


sel_wgt_layout = widgets.Layout(
    height="300px",
    width="30%",
)
sel_table = widgets.Select(
    description="Table",
    options=list(qry_prov.schema.keys()),
    layout=sel_wgt_layout
)

sel_column = widgets.Select(
    description="Column",
    layout=sel_wgt_layout,
)

def update_columns(change):
    table_name = change.get("new")
    table_schema = qry_prov.schema.get(table_name)
    sel_column.options = list(table_schema.keys())


sel_table.observe(update_columns, names="value")
update_columns({"new": sel_table.value})
lbl_table_column = widgets.Label(value="Choose the table and column to use.")


def get_cat_items(cat):
    """Return data items from the raw data."""
    cat_items = categories.get(cat)
    if not isinstance(cat_items, list):
        return [cat_items]
    if cat == "command_executions":
        return [
            item.strip()[1:-1] if (item.strip()[0] == '"' and item.strip()[-1] == '"') else item.strip()
            for item in cat_items
        ]
    if cat == "files_dropped":
        return [item["path"] for item in cat_items]
    if cat == "ip_traffic":
        return [item["destination_ip"] for item in cat_items]
    if cat == "dns_lookups":
        items = [item["hostname"] for item in cat_items]
        for item in cat_items:
            items.extend(item["resolved_ips"])
        return items
    if not isinstance(cat_items[0], str):
        return [str(item) for item in cat_items]
    return cat_items


sel_subset = nbwidgets.SelectSubset(
    source_items=get_cat_items(sel_category.value),
    default_selected=get_cat_items(sel_category.value),
    auto_display=False
)


KQL_TEMPLATE = """
{table}
| where TimeGenerated > ago(1d)
| where {column} in (\n  {values}\n)
"""

text_query = widgets.Textarea(
    description="KQL query",
    layout=widgets.Layout(width="90%", height="300px"),
)

def create_query_filter(btn):
    del btn
    q_values = [re.sub(r"\\", r"\\\\", val) for val in sel_subset.selected_values]
    text_query.value = KQL_TEMPLATE.format(
        table=sel_table.value,
        column=sel_column.value,
        values=',\n  '.join(f"'{item}'" for item in q_values)
    )

btn_query = widgets.Button(description="Gen KQL query")
btn_query.on_click(create_query_filter)

# Create the widget layout
vb_table_column = widgets.VBox(
    [lbl_table_column, widgets.HBox([sel_table, sel_column])],
    layout=border_layout,
)
lbl_select_items = widgets.Label(value="Select the file detonation values to query")
vb_select_items = widgets.VBox([lbl_select_items, sel_subset.layout], layout=border_layout)

lbl_query = widgets.Label(value="Click the 'Gen KQL query' to generate the template query.")
vb_query = widgets.VBox([lbl_query, btn_query, text_query], layout=border_layout)

hdr_html = widgets.HTML(f"<h3>Create query for '{sel_category.value}' detonation data</h3>")
txt_html = widgets.HTML("To choose data from a different category, return to previous cell to select the category")
display(widgets.VBox([hdr_html, txt_html, vb_table_column, vb_select_items, vb_query]))

VBox(children=(HTML(value="<h3>Create query for 'ip_traffic' detonation data</h3>"), HTML(value='To choose datâ€¦

In [None]:
# paste the query between the quotes
hunting_query = """
// replace this text with the query copied from the KQL query box
"""

results_df = qry_prov.exec_query(hunting_query)
display(results_df)

# TI Additional Lookups

You can use the MSTICPy TI provider to do additional lookups on
values that you see in the behavior data.

By default, TILookup uses all configured providers, including VirusTotal

In [12]:
ioc_txt = widgets.Text(
    description="Enter IoC to lookup",
    layout=widgets.Layout(width="70%"),
    style={"description_width": "150px"},
)

ioc_txt

Text(value='', description='Enter IoC to lookup', layout=Layout(width='70%'), style=DescriptionStyle(descriptiâ€¦

In [13]:
ti = TILookup()
TILookup.browse(TILookup.result_to_df(ti.lookup_ioc(ioc_txt.value)))

Using Open PageRank. See https://www.domcop.com/openpagerank/what-is-openpagerank


VBox(children=(Text(value='', description='Filter:', style=DescriptionStyle(description_width='initial')), Selâ€¦

0,1
OTX,
pulse_count,2
names,"['Baton Rouge Louisiana HoneyPot Tier 2 Indicators:April:2020', 'Baton Rouge Louisiana TPOT 19 Honeypot Events:Jan to April ,2020']"
tags,"[['tsec', 'tpot19', 'honeypot'], ['tsec', 'tpot19', 'honeypot', 'sensor2', 'site2']]"
references,"[['https://github.com/dtag-dev-sec/tpotce'], ['https://github.com/dtag-dev-sec/tpotce']]"

0,1
VirusTotal,
verbose_msg,IP address in dataset
response_code,1
positives,2646
detected_urls,"['https://sanctam.net:58899/assets/txt/resource_url.php?type=xmrig', 'http://sanctam.net/', 'https://sanctam.net:58899/assets/txt/reource_url.php?type=xrig']"
detected_downloaded_samples,[]
detected_communicating_samples,"['174baeb61d7db21bc0034a6add46e2c8b1579b9e0be8bedea96f612debd7b14d', 'a7cfc747d2b20fbbd20f8121c26b46041101e14b0c912afa3e220239f146685e', 'b265df126fbce8762eb7994ef86bbf43569b0d944929e4224631ff50516d6600', 'b2b84dd3a613ca919450f54411dfedd78d9f0d639a63cfa2e3950e24b1905a9c', '81ac2c8a8ff0dc876818fa519f8d68aefa09664895ad0d3d5fa7264c2098cd4c', 'bf6dd18f2b0e3cee849139736f62782e21a3fe612e3624ed83bb6e942e124189', '1ebfb40c2ede81dd3b183fd6dd1d87062f4c259d702cf418775842f47fbc4a70', 'd502159676ef455cea4e4e7eab9041a712cb44ee10d5e46855b679904de0f215', '6e782f3c64ff86c929ca83e6bd1eff5643827f8bdacbe14b837fc3da50fb3af1', 'a2ad1123b64fd19b3baf71bdeed831a285f5f15e659ed5b086e0a5c1305a636b', 'f37be720cca1d26594c0da72ea88097b163d63a82a007a88da84ffd66257f226', '4ea0dcbe01ee1c3cfe00f27b3387718f2bf5e653e5ff5bb1c9f4560cb3ac772d', '494d44992f758d2bf7e7467c4d80cc6e177bfa31c2d3ea0586070ecedc140013', '3cbe259b1979e643b96a54b9cf42c481ea990637b95cddd2ea2678553d655c81', '6c00e0c637807f08989b686166ab69c7c250f2cab452b5a6d88f69cfb79d87b0', '4357e99c0b16f031a460b4f7029d543645b57ef81894ba95a3971db8ca484a74', '620ba6e668c917fa58eca1d431c5fb4387a05efdb7f6ee3d35a5b5b191f56005', 'f5c8076d0500e6ebe329642859ccdfcf9b5a52744b0fdfd6d0f4f373c0d082dc', 'ca2a15f1b30ffcfa8380cd81592efba6a2656cca52ae140dab829c729c4226af', '0ed30d6e1917aacb4c153772048cee51355c4613d8f751f4a106c96bdbe87bb4', 'c3251412361f3c8f1feb1f8714d610e173bee5ee76b528b3ce5b08f5d4e33d42', '1a07287098f02998ba620742af8c93378021c4906fa147221e71261f1d3525c2', '2aae7951764a2b07b3ab0c8dbbe9f9a078aa91423eff13d803d1c0c2b55dd62e', '527240866205fb9efc15176ed80a121949f9fb8653d5c6595dd340784f940fe2', '90d9ff497fbfd8930fa70d8b51a8713f1aba8c14cb67b9974d4ec100e0cdbd40', 'e2f4e09dddeb17cb983b456600baf7911b12f2517a3566ab6bbccd41068e18a7', '11d153d67f1fd06d25efa71acbfec80873cd9ee0e3b39d1c4bb0828dc7a79e3b', '93559ae882e906b2d4742852ee35e8e6ace12466e388bdee96a9823b80c9d3e3', '4382456ae92fe1a07766e5d4e71ddb2438d1df15bf240abd69c3e8d25650fb7a', 'b51d0e4cfd46a35c11b148b290576447696d32b6a5e71c7f1a1f446848a12122', 'fdaef51c05fc328d01d7b447d6b328b663c87420cfb3acb97052a12134c63be4', 'ecee94a7381d376a04a7053a96b705f50801351ff47b1bd1526b4de65f495ea9', 'e44ea3867d4f177bb2a78af566933b4eca8c108231032abc17836c45499f9c7c', '007920ee156050e07b6a26a4a646153a2787cc861d979b3b149dda5efdbb8c9d', '5f9b9420d41c276669212757fc04a562a2823345eec3a64b4a50ce53012e75c3', '13036cccb2f4b3e665d4c44e96294055cc1f9a346bb74f31a12838a4254dc8c6', '485b010335a5694eb91b16a428b4cd1630fb755e32ab91fbff3abbd9777c9ae7', '4bb1227a558f5446811ccbb15a7bfe3e1f93fce5a87450b2f2ea05a0bca36bb2', '206c1935126d36df4168449e3b86cb589dd2ae49d690c8db25336ecefbb9f375', '312ca78402849deb85b5c4cade6f953d462b04b4cb49ceb7cb609ebfd778bca3', '14b6388d1818b598970fe23fd7a402a67f6da55c71eb4d7623ef78aeb22c7c33', 'e9668cbccbe3a79c32e460fc8c776e683cf7684d340f9526d206e83e1f4e3a38', '601a93183e5f8bee310f00649441342d1c92bcbf86a00f277c6c52d94d140ebf', '48f9b4bc2bf75c03449857ff0d6b47c35ab97ed8ba444890ccdd4128d9ae1027', 'af6f6518a5b9d2e63caff650593a465184dd48ccc9a853d9dac06029a98f302a', '694265b2dc50bce5062d298872ff396dfc0e054017ecc209a6083faf8ddee107', '280b30297879e8ab7ee85fa5bf7d569603bb99af2a73b118eb702f90fea910b3', 'f8e357a31685dc13a8e918d0a8c6055816bdefbf39c469ba98b3133a26fed7ee', '7ea1f2272d26497da0feb39526db2d06f93bb378904b6e05f87dd3e256ce9423', 'da374bf0e3e22b934b4f6d0e355887e836a3ddac2a1ded8a186559de69894289', 'c4d056886b6b496a6e19fcf51ff9eac345db8a84185ab98db52d97d39abdb0c9', '783ac8c4306209dbd92f0325cda9fd7b1509ced2c206fe51f613a3395f4a850e', '250f889b8543dd398441cb75a8598856c5857d7d3a68b7510d1016003f13c319', '882996255a6d96033281b6fd3f1a9e159d7e3b2c06d812a49f7ea071f59cd959', '86cc39af1068e4e2ed264a7ca4af13e3e276b36daae7e41cbf8463599bdad74c', '6e33b0e60eb96fa217ace0e1e5c553b761695e10e6dbe45876f15d3ce70f282f', 'fdf690f110af024700212208a25348887b154bea87a942ed98cd3686c8e76794', 'a1ea604ac5c962b538fc3c457c7e48c0aa5d73afd93ba8dca1373649841aba99', '6b6aff74a21bea9a72ddd500ddd5e2cb6eb90225e0a1416e8feef1a3c41b48e4', '910be60f44245852fe795794a3d2b594a59e4d8e46cf9d16162c62bac67c1e71', '31e999c770fd71a221782413099351ebb4dc44aaa458b1ec41d832402ea7a067', '25c424f82dd42aab060068b2919577be3a88bf06ac9e23c077ee27da70477142', '24aceb2c1aca71552f4ee03707f79e9ed0ae5b54dc7441b7d2d5756f6a25960c', '942d28bbc70e857af3241d4e99a95a3c2a255df788dcdf58d02e1da836fbe55d', '71376a715858cc45fc3d088d019ec16785612b4cda2e854dc9cc875a7a07b836', 'a6d26eb10d46076a0e596ef4c627b03f144521bbb05285a8339a82fcfba07a09', 'd2830f6a887ba7bb6edc25caf6d9dc1b6e6e4cc274da2c018e3865a7d9a21818', 'ab4cc6797723718109fca0f1eb08aef86553847d84e5b730ee6b424402b43cf5', 'b75cd2a2c2763a2c90d8934eb9612182a78560858c798dc5f2de55122e2c6629', '0e5d0a682c791b08816c3e68c8727705e67b8ba5aa3448ed165aec095a44362f', 'f0233bd4dad9eb68de18af308af49ba00de09c7b495cacaa9ba6b1e13366d9ea', '849cd964eb748c9858c75534d8a186a69dad9f87b5e9b61b09f86759b653274d', '0ee3e8073450967b218d3d91f6bc6a99092489093151f570d7b6d690cb5081d1', '86d1b027977887b940811a6b0484d285abe41ecae9168984b4270f272ae5b116', 'f14145e38669516955928c4eb58c94f59512ab8dc056a9ae6c3b38eefc2eef20', 'c5483b2acbb352dc5c9a811d9616c4519f0e07c13905552be5ec869613ada775', 'fa32af37f4adac04d84c4cef7d9a8452814e76478dcc943b85d78f1a3833f005', '2d83f9a423fe88ccfceecc4a1a23a03cb1ffde7dddceb62f090e8179231aae02', 'c599b311e39018a16b966c63e7d286114a948b4157123c9ffb3edf27fe101c84', '0f7ceca9ef07e451d3a7a899b52d0d80db5bf69ce5c81f12daa8db9929596eac', '0af56701d1bdaa5ca5b549754160e3dde8bc8e68f77beb9372935b8ce8199aaf', 'f8a06503b503eae44aaaefe4965fc981c9a18a56ddd8a7bcb317f2303c705da9', '8cafe1879f1e830a1b1a9f43a097bf115dfbda91fd5477649fc4feb7f4a949e1', '839d3e4247d50093294e28263bbaeee7deea7391c87e84a36e05b3a5250c350a', 'a974f439ba8be535ef1ecb094dec74d7a956cee6c4d942207d2d2a954078b122', '2932b67f4e78850f22be9c3fbff3807f5feaaab0ea00becb6d2ecd23e6223a31', 'd2114daef0ed7c1835617256f8f30ea168dd913dad066ba00b71f911e746c829', '03bd9a94482f180bb047626cb2f27ccf8daa0e201345480b43585580e09c311b', '18ee2db9dd06c129c62584178d27f6a2d818f50a9080ca3b7783e73e20d32366', '7d1232d4776a3a53d9342e89f2dad41b4aa6e66571da27a5c16f2fcd728ae339', 'c240d2a507221dbb8ecd61600108ca675d892c27ee091f71553d82ed3a816d13', '638442315859fac82d98c297108b2ff26584777ad5ea34471a7d4f326b8d9839', '26b811af99902c4ff20a99b5fad73f2bcd65dcf62b9579fe3fe4cfdfd23a3582', '92b5e70d470d9de26b8503a6b669cc9856b0db6543d545d535cc52e6374bd7cb', '57c2bec81a11c13579ec31fb1299d600300cf4530257a5692e4033968091659b', 'cbd2cbc060ed471c08b0bbea6aa48cb5062b70d61f56b3f79b0de249c62628d7', '6a5ca25caefcc91c0b424ab227b8745ad6357c75fdfebf6eb7ddcf0a2f30606b', 'f36a07ddb65181a86f39879a92e28ee6e35847fb224b3fbbf6303368e19a4ace', '0272f51918c6d57496ae1c9379922282240c3e3c43880c55b432ca5b05eeb0e1', '1fc969a2b0b4f845307d3adb317b1f730ec3ac7cd9c28953dcdb9f40373aa096']"


# Appendix - some supplementary VirusTotal lookups

In [34]:
vt_uri = "https://www.virustotal.com/api/v3/files/{id}/contacted_domains"

hdr = {"headers": {"X-Apikey": vt_settings.args.get("AuthKey")}}
resp = requests.get(vt_uri.format(id=txt_file_id.value), **hdr)

display(HTML("<h2>Contacted domains</h2>"))
for item in resp.json()["data"]:
    for attr in ("whois", "last_dns_records", "last_analysis_stats", "last_analysis_date"):
        print(attr, end=": ")
        if attr.endswith("_data"):
            pd.to_datetime(item["attributes"].get(attr))
        else:
            pprint.pprint(item["attributes"].get(attr))
        

whois: ('Administrative city: REDACTED FOR PRIVACY\n'
 'Administrative country: REDACTED FOR PRIVACY\n'
 'Administrative state: REDACTED FOR PRIVACY\n'
 'Create date: 2021-06-17\n'
 'Domain name: sanctam.net\n'
 'Domain registrar id: 69\n'
 'Domain registrar url: http://domainhelp.opensrs.net\n'
 'Expiry date: 2024-06-17\n'
 'Query time: 2021-06-21 15:30:16\n'
 'Registrant address: ff3a4678d9c7a906\n'
 'Registrant city: ff3a4678d9c7a906\n'
 'Registrant company: ff3a4678d9c7a906\n'
 'Registrant country: Saint Kitts and Nevis\n'
 'Registrant email: 3267309318f7846cs@\n'
 'Registrant fax: ff3a4678d9c7a906\n'
 'Registrant name: ff3a4678d9c7a906\n'
 'Registrant phone: ff3a4678d9c7a906\n'
 'Registrant state: 347e121c541b794d\n'
 'Registrant zip: ff3a4678d9c7a906\n'
 'Technical city: REDACTED FOR PRIVACY\n'
 'Technical country: REDACTED FOR PRIVACY\n'
 'Technical state: REDACTED FOR PRIVACY\n'
 'Update date: 2021-06-17')
last_dns_records: [{'ttl': 21600, 'type': 'NS', 'value': '2-nest.njalla.

In [35]:
vt_uri = "https://www.virustotal.com/api/v3/files/{id}/contacted_urls"

hdr = {"headers": {"X-Apikey": vt_settings.args.get("AuthKey")}}
resp = requests.get(vt_uri.format(id=txt_file_id.value), **hdr)

display(HTML("<h2>Contacted URLs</h2>"))
for item in resp.json()["data"]:
    for attr in ("url", "threat_names", "last_final_url", "last_analysis_stats", "last_analysis_date"):
        print(attr, end=": ")
        if attr.endswith("_data"):
            pd.to_datetime(item["attributes"].get(attr))
        else:
            pprint.pprint(item["attributes"].get(attr))
        

url: 'https://sanctam.net:58899/assets/txt/resource_url.php?type=xmrig'
threat_names: []
last_final_url: 'https://sanctam.net:58899/assets/txt/resource_url.php?type=xmrig'
last_analysis_stats: {'harmless': 74,
 'malicious': 2,
 'suspicious': 2,
 'timeout': 0,
 'undetected': 11}
last_analysis_date: 1633430508
