In [None]:
import pandas as pd
import numpy as np

In [None]:
#pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
#pd.set_option('display.width', 900)
pd.set_option('display.max_rows', None)
pd.set_option('display.min_rows', None)
pd.set_option('display.html.use_mathjax', False)

In [None]:
from surianalytics.connectors import RESTSciriusConnector
from surianalytics.datamining import min_max_scaling

In [None]:
c = RESTSciriusConnector()
c.set_page_size(1000)

In [None]:
if c.tls_verify is False:
    import urllib3
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

In [None]:
import ipywidgets as widgets

## Shared things

### Data containers

In [None]:
import copy

In [None]:
DEFAULT_COLUMNS = ["timestamp", "host", "community_id", "flow_id", "event_type", "proto", "app_proto", "src_ip", "flow.src_ip", "src_port","dest_ip", "flow.dest_ip", "dest_port", "direction",]

In [None]:
DF_EVENTS = pd.DataFrame(columns=DEFAULT_COLUMNS)

In [None]:
DF_FILTER = pd.DataFrame(columns=DEFAULT_COLUMNS)

In [None]:
UNIQ_FIELDS = [f for f in c.get_unique_fields() if not f.startswith("stats.")]

### Functions

In [None]:
def reorder_columns():
    global DF_EVENTS
    cols = list(DF_EVENTS.columns.values)
    core_cols = [c for c in DEFAULT_COLUMNS if c in cols]
    
    cols = sorted([c for c in cols if c not in core_cols])
    cols = core_cols + cols
    DF_EVENTS = DF_EVENTS[cols]

In [None]:
def col_names():
    return [c for c in list(DF_EVENTS.dropna(how="all", axis=1).columns.values) if not c.startswith("stats")]

In [None]:
def col_names_subset():
    return [c for c in list(DF_FILTER.dropna(how="all", axis=1).columns.values) if not c.startswith("stats")]

### Widgets

In [None]:
DROPDOWN_EVENT_TYPE = widgets.Dropdown(
    options=[""] + sorted(list(DF_EVENTS.event_type.unique())),
)

In [None]:
SELECT_AGG_COLUMN = widgets.Combobox(description="Group by")

In [None]:
TEXT_QUERY = widgets.Textarea(
    description='Query filter:',
    value="*",
    continuous_update=True
)

In [None]:
SELECT_COLUMNS = widgets.SelectMultiple(
    values=DEFAULT_COLUMNS,
    options=col_names(), 
    description="Columns",
    rows=20,
    ensure_option=True,
)

In [None]:
SELECT_COLUMN = widgets.Combobox(
    values=DEFAULT_COLUMNS,
    options=col_names(),
    description="Columns",
)

In [None]:
SORT_COLUMNS = widgets.SelectMultiple(
    options=col_names(), 
    description="Columns",
    rows=10,
    ensure_option=True,
)

In [None]:
COLUMN_SELECTION = widgets.SelectMultiple(
    options=col_names(),
    description="Columns subset",
    rows=20,
    ensure_option=True,
)

In [None]:
SLIDER_MAX_DOCS = widgets.IntSlider(
    value=1000,
    min=100,
    max=10000,
    description="Number of documents to pull"
)

In [None]:
from msticpy.transform import IoCExtract

In [None]:
ioc_extractor = IoCExtract()

### Outputs

In [None]:
OUTPUT_DEBUG = widgets.Output()

## Timepicker

In [None]:
from datetime import datetime, timedelta

In [None]:
def update_time(time_from, time_to):
    OUTPUT_DEBUG.clear_output()
    with OUTPUT_DEBUG:
        try:
            c.set_query_timeframe(time_from, time_to)
        except ValueError as err:
            print("ERROR: %s" % err)

In [None]:
INTERACT_TIMEPICKER = widgets.interactive(update_time,
                                          time_from=widgets.DatetimePicker(description='From', 
                                                                           disabled=False),
                                          time_to=widgets.DatetimePicker(description='To', 
                                                                         disabled=False))

## Set up data ingest

In [None]:
TIME_COLS = ["@timestamp", "timestamp", "http.date"]

In [None]:
def handler_pull_events(args):
    c.set_page_size(SLIDER_MAX_DOCS.value)
    OUTPUT_DEBUG.clear_output()
    with OUTPUT_DEBUG:
        global DF_EVENTS
        DF_EVENTS = c.get_events_df(qfilter=TEXT_QUERY.value)
        global DF_FILTER
        DF_FILTER = DF_EVENTS
        reorder_columns()
        for ts in TIME_COLS:
            if ts in list(DF_EVENTS.columns.values):
                DF_EVENTS[ts] = pd.to_datetime(DF_EVENTS[ts])
        with SELECT_COLUMNS.hold_trait_notifications():
            SELECT_COLUMNS.options =  col_names()
        with SELECT_COLUMN.hold_trait_notifications():
            SELECT_COLUMN.options = col_names()
            
        if "event_type" in list(DF_EVENTS.columns.values):
            with DROPDOWN_EVENT_TYPE.hold_trait_notifications():
                DROPDOWN_EVENT_TYPE.options = [""] + sorted(list(DF_EVENTS.event_type.unique()))
        else:
            print("event_type not in DF columns")
            
        print("downloaded {} rows and {} columns".format(len(DF_EVENTS), len(DF_EVENTS.columns.values)))
        df_agg = DF_EVENTS.groupby("event_type").agg({
            "event_type": ["count"],
        }).reset_index()
        df_agg.columns = ["event_type", "count"]
        SORT_COLUMNS.options = list(DF_EVENTS.columns.values)
        display(df_agg)

In [None]:
BUTTON_EVE_PULL = widgets.Button(description="Download EVE")
BUTTON_EVE_PULL.on_click(handler_pull_events)

In [None]:
BOX_DOWNLOAD_EVE = widgets.HBox(
    [
        widgets.VBox([
            INTERACT_TIMEPICKER,
            TEXT_QUERY,
            SELECT_COLUMNS,
        ]),
        widgets.VBox([
            BUTTON_EVE_PULL,
            SLIDER_MAX_DOCS,
            OUTPUT_DEBUG
        ])
    ]
)

## Explore EVE

In [None]:
OUTPUT_EVE_DF = widgets.Output()

In [None]:
import re

In [None]:
def handler_show_eve(limit: int, event_type: str, columns: list, sort: list, find_col: str, find_val: str):
    pd.set_option('display.max_rows', limit)
    pd.set_option('display.min_rows', limit)
    
    df_filter = DF_EVENTS
    df_filter = df_filter.dropna(how="all", axis=1)
    if "event_type" in list(df_filter.columns.values):
        df_filter = df_filter.loc[df_filter.event_type.str.contains(event_type)]
    
    if columns is not None and len(columns) > 0:
        columns = [c for c in columns if c in list(df_filter.columns.values)]
        df_filter = df_filter[list(columns)]
        
    if find_col != "" and find_val != "":
        if find_col in list(df_filter.columns.values):
            df_filter = df_filter.loc[pd.notna(df_filter[find_col])]
            col = df_filter[find_col]
            if col.dtype == "object":
                df_filter = df_filter.loc[col.str.contains(find_val, flags=re.IGNORECASE)]
            elif col.dtype == "int64":
                df_filter = df_filter.loc[col == int(find_val)]
            elif col.dtype == "float64":
                df_filter = df_filter.loc[col.astype(int) == int(find_val)]
            else:
                OUTPUT_DEBUG.clear_output()
                with OUTPUT_DEBUG:
                    print("col {} is {}, supported are string an int".format(find_col, col.dtype))
        else:
            OUTPUT_DEBUG.clear_output()
            with OUTPUT_DEBUG:
                print("col {} not in dataframe".format(find_col))
                
    SELECT_AGG_COLUMN.options = col_names_subset()
    COLUMN_SELECTION.options = col_names_subset()
            
    global DF_FILTER
    DF_FILTER = df_filter
    OUTPUT_EVE_DF.clear_output()
    with OUTPUT_EVE_DF:
        display(df_filter)

In [None]:
INTERACTIVE_EXPLORE_EVE = widgets.interactive(
    handler_show_eve,
    limit=widgets.IntSlider(
        min=10,
        max=1000,
    ),
    event_type=DROPDOWN_EVENT_TYPE,
    columns=SELECT_COLUMNS,
    sort=SORT_COLUMNS,
    find_col=widgets.Combobox(options=list(DF_EVENTS.columns.values), description="Find column"),
    find_val="",
)

In [None]:
BOX_EVE_EXPLORE = widgets.VBox([
    widgets.HBox([
        widgets.VBox([
            INTERACT_TIMEPICKER,
            TEXT_QUERY,
        ]),
        INTERACTIVE_EXPLORE_EVE,
        widgets.VBox([
            OUTPUT_DEBUG
        ])
    ]),
    OUTPUT_EVE_DF
])

## Aggregate

In [None]:
OUTPUT_AGG = widgets.Output()

In [None]:
DF_AGG = pd.DataFrame()

In [None]:
DROPDOWN_AGG_COLS = widgets.Dropdown(description="Explode column")

In [None]:
SLIDER_LIMIT_AGG = widgets.IntSlider(
    min=10,
    max=150,
    description="limit"
)

In [None]:
def handler_aggregate_events(args):
    pd.set_option('display.max_rows', SLIDER_LIMIT_AGG.value)
    pd.set_option('display.min_rows', SLIDER_LIMIT_AGG.value)
    OUTPUT_AGG.clear_output()
    with OUTPUT_AGG:
        global DF_AGG
        DF_AGG = (
            DF_FILTER.fillna("")
            .dropna(axis=1, how="all")
            .groupby(SELECT_AGG_COLUMN.value)
            .agg({
                item: ["min", "max"] if item in TIME_COLS
                #else [lambda x: x.nunique()] if item == "flow_id"
                else ["unique"]
                for item in list(DF_FILTER.columns.values)
                if item != SELECT_AGG_COLUMN.value
            })
        )
        DROPDOWN_AGG_COLS.options = [(t[0], t) for t in list(DF_AGG.columns.values)]
        display(DF_AGG)

In [None]:
BUTTON_EVE_AGG = widgets.Button(description="Aggregate EVE")
BUTTON_EVE_AGG.on_click(handler_aggregate_events)

In [None]:
def handler_explode_agg(args):
    OUTPUT_DEBUG.clear_output()
    with OUTPUT_DEBUG:
        if DF_AGG.empty:
            print("Please call aggregate EVE first, dataframe is empty")
            return
    OUTPUT_AGG.clear_output()
    with OUTPUT_AGG:
        col = DROPDOWN_AGG_COLS.value
        display(DF_AGG.explode(col))

In [None]:
BUTTON_EVE_AGG_EXP = widgets.Button(description="Explode column")
BUTTON_EVE_AGG_EXP.on_click(handler_explode_agg)

In [None]:
BOX_AGGREGATE = widgets.VBox([
    widgets.HBox([
        widgets.VBox([
            INTERACT_TIMEPICKER,
            TEXT_QUERY,
        ]),
        widgets.VBox([
            SLIDER_LIMIT_AGG,
            SELECT_COLUMNS,
            SELECT_AGG_COLUMN,
            DROPDOWN_AGG_COLS,
            BUTTON_EVE_AGG,
            BUTTON_EVE_AGG_EXP
        ]),
        widgets.VBox([
            OUTPUT_DEBUG
        ])
    ]),
    OUTPUT_AGG
])

## Uniq values

In [None]:
SELECT_FIELD = widgets.Combobox(options=UNIQ_FIELDS, value="flow_id", description="Select")

In [None]:
DF_UNIQ = pd.DataFrame()

In [None]:
OUTPUT_UNIQUE = widgets.Output()

In [None]:
DROPDOWN_SORT_UNIQ = widgets.Dropdown(description="Sort", options=["key", "doc_count"], value="key")

In [None]:
SLIDER_SHOW_UNIQ = widgets.IntSlider(
    min=10,
    max=1000,
    description="Limit"
)

In [None]:
TICKBOX_UNIQ_IOC = widgets.Checkbox(
    value=False,
    description='Extract IoC',
)

In [None]:
def handler_pull_data(args):
    pd.set_option('display.max_rows', SLIDER_SHOW_UNIQ.value)
    pd.set_option('display.min_rows', SLIDER_SHOW_UNIQ.value)
    global DF_UNIQ
    DF_UNIQ = pd.DataFrame(c.get_eve_unique_values(counts="yes", field=SELECT_FIELD.value, qfilter=TEXT_QUERY.value))
    OUTPUT_UNIQUE.clear_output()
    with OUTPUT_UNIQUE:
        if TICKBOX_UNIQ_IOC.value is True:
            DF_UNIQ["ioc"] = DF_UNIQ["key"].apply(ioc_extractor.extract)
            DF_IOC = DF_UNIQ.ioc.apply(pd.Series)
            DF_UNIQ = pd.concat([DF_UNIQ, DF_IOC], axis=1)
            DF_UNIQ = DF_UNIQ.drop(columns=["ioc"])
        if len(DF_UNIQ) == 0:
            print("no data")
        else:
            sort = DROPDOWN_SORT_UNIQ.value
            display(DF_UNIQ.sort_values(by=sort, ascending=False if sort == "doc_count" else True))

In [None]:
BUTTON_PULL_UNIQUE = widgets.Button(description="Pull unique values")
BUTTON_PULL_UNIQUE.on_click(handler_pull_data)

In [None]:
def handler_show_simple_uniq(args):
    OUTPUT_UNIQUE.clear_output()
    with OUTPUT_UNIQUE:
        print("\n".join(sorted(list(DF_UNIQ.key.astype(str).unique()))))

In [None]:
BUTTON_SHOW_SIMPLE = widgets.Button(description="Show simple values")
BUTTON_SHOW_SIMPLE.on_click(handler_show_simple_uniq)

In [None]:
BOX_UNIQUE = widgets.VBox([
    widgets.HBox([
        widgets.VBox([
            INTERACT_TIMEPICKER,
            TEXT_QUERY,
        ]),
        widgets.VBox([
            SLIDER_SHOW_UNIQ,
            SELECT_FIELD,
            DROPDOWN_SORT_UNIQ,
            TICKBOX_UNIQ_IOC,
            BUTTON_PULL_UNIQUE,
            BUTTON_SHOW_SIMPLE
        ]),
    ]),
    OUTPUT_UNIQUE
])

## Graph mining

In [None]:
OUTPUT_GRAPH_DEBUG = widgets.Output()

In [None]:
OUTPUT_GRAPH = widgets.Output()

In [None]:
if "FIELDS" not in globals():
    FIELDS = []
if "GRAPH" not in globals():
    GRAPH = None

In [None]:
GRAPH_RESOLUTIONS = [
        "480x360",
        "960x540",
        "1280x720",
        "1600x900",
        "1920x1080",
        "2048x1080",
        "2540x1440",
        "3840x2160"
    ]

In [None]:
DROPDOWN_GRAPH_REZ = widgets.Dropdown(
    description="Graph size",
    options=GRAPH_RESOLUTIONS,
    value=GRAPH_RESOLUTIONS[1]
)

In [None]:
SLIDER_SIZE_GRAPH_COL = widgets.IntSlider(
    value=500,
    min=100,
    max=5000,
    description='Aggregation max size',
    orientation='horizontal',
    readout=True,
)
SLIDER_THRESH_GRAPH_DEGREE = widgets.FloatRangeSlider(
    value=[0, 1],
    min=0,
    max=1,
    step=0.001,
    description="Degree: ",
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.2f',
)

In [None]:
DROPDOWN_FIELDS_GRAPH_SRC = widgets.Combobox(description="Src node", options=UNIQ_FIELDS)
DROPDOWN_FIELDS_GRAPH_DEST = widgets.Combobox(description="Dest node", options=UNIQ_FIELDS)

In [None]:
def handler_pull_graph_data(args):
    OUTPUT_GRAPH_DEBUG.clear_output()
    with OUTPUT_GRAPH_DEBUG:
        display(print("Calling scirius"))
        global GRAPH
        kwargs = {
            "col_src": DROPDOWN_FIELDS_GRAPH_SRC.value,
            "col_dest": DROPDOWN_FIELDS_GRAPH_DEST.value,
            "size_src": SLIDER_SIZE_GRAPH_COL.value,
            "size_dest": SLIDER_SIZE_GRAPH_COL.value,
            "qfilter": TEXT_QUERY.value
        }
        event_type = DROPDOWN_EVENT_TYPE.value
        if event_type not in (None, "", "all", "any"):
            kwargs["event_type"] = event_type
        GRAPH = c.get_eve_fields_graph_nx(**kwargs)
        display("call done, got {} nodes and {} edges".format(len(GRAPH.nodes()), len(GRAPH.edges())))

In [None]:
BUTTON_LOAD_GRAPH_DATA = widgets.Button(description="Pull graph data")
BUTTON_LOAD_GRAPH_DATA.on_click(handler_pull_graph_data)

In [None]:
import networkx as nx
import hvplot.networkx as hvnx
import holoviews as hv
from holoviews import opts
from surianalytics.datamining import min_max_scaling

In [None]:
def handler_draw_graph(args):
    OUTPUT_GRAPH.clear_output()
    with OUTPUT_GRAPH:
        if len(GRAPH.edges) == 0:
            display("Missing graph data, please run query first")
            return

        # use local graph object
        g = GRAPH.copy()

        # drop empty nodes (and connected edges) 
        # means missing eve field, no connection can be made
        if "" in list(g.nodes()):
            g.remove_node("")

        doc_counts = [attr["doc_count"] for (src, dst, attr) in g.edges(data=True)]
        doc_counts = np.log2(doc_counts)
        doc_counts = min_max_scaling(pd.Series(doc_counts))

        # add scaled doc counts to edges to serve as weights
        for i, (src, dst, attr) in enumerate(g.edges(data=True)):
            attr["scaled_doc_count"] = doc_counts[i]

        # discover node degree and scale the values
        degree = [g.degree(n) for n in g.nodes()]
        degree = min_max_scaling(pd.Series(degree))

        to_remove = []
        # drop nodes that do not match filtering criteria
        for i, n in enumerate(g.nodes()):
            if degree[i] > SLIDER_THRESH_GRAPH_DEGREE.value[1] or degree[i] < SLIDER_THRESH_GRAPH_DEGREE.value[0]:
                to_remove.append(n)

        for n in to_remove:
            g.remove_node(n)

        # calculate total number of docs for edges
        doc_count_total_edge = np.sum([attr["doc_count"] for (src, dst, attr) in g.edges(data=True)])

        # locate source nodes
        n_src = [i for i, (u, a) in enumerate(g.nodes(data=True)) if a["kind"] == "source"]
        # locate destination nodes
        n_dst = [i for i, (u, a) in enumerate(g.nodes(data=True)) if a["kind"] == "destination"]

        # generate layout
        pos = nx.layout.spring_layout(g)
        
        # parse resolution
        rez = DROPDOWN_GRAPH_REZ.value
        rez = rez.split("x")
        width = int(rez[0])
        height = int(rez[1])

        # generate nodes per kind
        nodes_src = hvnx.draw_networkx_nodes(g, pos, nodelist=n_src, node_color='#A0CBE2').opts(width=width, height=height)
        nodes_dst = hvnx.draw_networkx_nodes(g, pos, nodelist=n_dst, node_color="Orange").opts(width=width, height=height)

        # use kwargs to make parameter handling easier
        edge_params = {
            "alpha": 1,
            "edge_color": 'scaled_doc_count', 
            "edge_cmap": 'viridis',
            "edge_width": hv.dim('scaled_doc_count')*5
        }
        
        

        # generate edges
        edges = hvnx.draw_networkx_edges(g, pos, **edge_params).opts(width=width, height=height)

        # overlay nodes and edges
        res = edges * nodes_src * nodes_dst

        component_sizes = [len(c) for c in sorted(nx.connected_components(g), key=len, reverse=True) if len(c) > 1]

        display(print("Number of documents for edges: {}".format(doc_count_total_edge)))
        display(print("Number of clusters: {}".format(len(component_sizes))))
        display(res)

In [None]:
BUTTON_DRAW_GRAPH = widgets.Button(description="Draw graph")
BUTTON_DRAW_GRAPH.on_click(handler_draw_graph)

In [None]:
BOX_GRAPH_MINE = widgets.VBox([
    widgets.HBox([
        widgets.VBox([
            INTERACT_TIMEPICKER,
            TEXT_QUERY,
        ]),
        widgets.VBox([
            DROPDOWN_GRAPH_REZ,
            DROPDOWN_EVENT_TYPE,
            DROPDOWN_FIELDS_GRAPH_SRC,
            DROPDOWN_FIELDS_GRAPH_DEST,
            SLIDER_SIZE_GRAPH_COL,
            SLIDER_THRESH_GRAPH_DEGREE,
            BUTTON_LOAD_GRAPH_DATA,
            BUTTON_DRAW_GRAPH
        ]),
        widgets.VBox([
            OUTPUT_GRAPH_DEBUG
        ])
    ]),
    OUTPUT_GRAPH
])

## Timeline

In [None]:
OUTPUT_TIMELINE = widgets.Output()

In [None]:
from msticpy.vis.timeline import display_timeline, display_timeline_values
from msticpy.vis.timeline_duration import display_timeline_duration

In [None]:
GRAPH_W = [r.split("x")[0] for r in GRAPH_RESOLUTIONS]
GRAPH_H = [r.split("x")[1] for r in GRAPH_RESOLUTIONS]

In [None]:
DROPDOWN_TIMELINE_W = widgets.Dropdown(
    description="Width",
    options=GRAPH_W,
    value=GRAPH_W[1]
)
DROPDOWN_TIMELINE_H = widgets.Dropdown(
    description="Height",
    options=GRAPH_H,
    value=GRAPH_H[0]
)

In [None]:
def handler_draw_timeline(args):
    group_by = SELECT_COLUMN.value
    OUTPUT_TIMELINE.clear_output()
    with OUTPUT_TIMELINE:
        if group_by not in list(DF_FILTER.columns.values):
            print("group by col {} not in filtered set, available are {}".format(group_by, list(DF_FILTER.columns.values)))
            return
        display_timeline(
            DF_FILTER.fillna(""), 
            group_by=group_by, 
            #source_columns=["src_ip", "dest_ip"],
            time_column="timestamp",
            legend="right",
            width=int(DROPDOWN_TIMELINE_W.value),
            height=int(DROPDOWN_TIMELINE_H.value)
        )
        display_timeline_duration(
            DF_FILTER.fillna(""), 
            group_by=group_by, 
            #source_columns=["src_ip", "dest_ip"],
            time_column="timestamp",
            #legend="right",
            width=int(DROPDOWN_TIMELINE_W.value),
            height=int(DROPDOWN_TIMELINE_H.value)
        )

In [None]:
BUTTON_DRAW = widgets.Button(description="Draw timeline")
BUTTON_DRAW.on_click(handler_draw_timeline)

In [None]:
BOX_TIMELINE = widgets.VBox([
    widgets.HBox([
        widgets.VBox([
            INTERACT_TIMEPICKER,
            TEXT_QUERY,
            SELECT_COLUMNS
        ]),
        widgets.VBox([
            DROPDOWN_TIMELINE_W,
            DROPDOWN_TIMELINE_H,
            SELECT_COLUMN,
            BUTTON_DRAW,
        ]),
    ]),
    OUTPUT_TIMELINE
])

## IoC Extract

In [None]:
DF_IOC = pd.DataFrame()

In [None]:
OUTPUT_EXTRACT = widgets.Output()

In [None]:
def apply_extract_ioc(x: str) -> dict:
    if not isinstance(x, str):
        return {}
    return {"raw": x, "ioc": ioc_extractor.extract(x)}

In [None]:
SLIDER_LIMIT_IOC = widgets.IntSlider(
    min=25,
    max=1000,
    description="limit"
)

In [None]:
SLIDER_LIMIT_IOC_LEN = widgets.IntSlider(
    min=50,
    max=1000,
    description="limit IoC length"
)

In [None]:
TICKBOX_RAW_COL = widgets.Checkbox(
    value=False,
    description='Show Raw column',
)

In [None]:
TICKBOX_AGG_VALUES = widgets.Checkbox(
    value=False,
    description='Aggregate values',
)

In [None]:
def extract_ioc() -> pd.DataFrame:
    df = DF_FILTER[list(COLUMN_SELECTION.value)]
    extractions = []
    for col in list(df.columns.values):
        vals = df[col].apply(apply_extract_ioc)
        for item in list(vals):
            if "ioc" not in item or len(item["ioc"]) == 0:
                continue
            for kind, dataset in item["ioc"].items():
                for val in dataset:
                    d = {
                        "field": col,
                        "kind": kind,
                        "value": val,
                    }
                    if TICKBOX_RAW_COL.value is True:
                        d["raw"] = item["raw"]
                    extractions.append(d)
    return pd.DataFrame(extractions)

In [None]:
def handler_extract_ioc(args):
    pd.set_option('display.max_rows', SLIDER_LIMIT_IOC.value)
    pd.set_option('display.min_rows', SLIDER_LIMIT_IOC.value)
    OUTPUT_EXTRACT.clear_output()
    with OUTPUT_EXTRACT:
        print("starting, please be patient")
        
        global DF_IOC
        DF_IOC = pd.DataFrame(extract_ioc())
        
        print("done extracting")
        
        df = DF_IOC.loc[DF_IOC.value.str.len() <= SLIDER_LIMIT_IOC_LEN.value]
        if TICKBOX_AGG_VALUES.value is True:
            display(df.groupby("value").agg({
                "field": "unique",
                "kind": "unique"
            }).reset_index().sort_values(by="value"))
        else:
            display(df.sort_values(by=["kind", "value"]))

In [None]:
BUTTON_EXTRACT_IOC = widgets.Button(description="Extract IoCs")
BUTTON_EXTRACT_IOC.on_click(handler_extract_ioc)

In [None]:
def handler_show_ioc(limit: int):
    pd.set_option('display.max_rows', SLIDER_LIMIT_IOC.value)
    pd.set_option('display.min_rows', SLIDER_LIMIT_IOC.value)
    OUTPUT_EXTRACT.clear_output()
    with OUTPUT_EXTRACT:
        df = DF_IOC.loc[DF_IOC.value.str.len() <= SLIDER_LIMIT_IOC_LEN.value]
        if TICKBOX_AGG_VALUES.value is True:
            display(df.groupby("value").agg({
                "field": "unique",
                "kind": "unique"
            }).reset_index().sort_values(by="value"))
        else:
            display(df.sort_values(by=["kind", "value"]))

In [None]:
INTERACTIVE_SLIDER = widgets.interactive(handler_show_ioc, limit=SLIDER_LIMIT_IOC)

In [None]:
BOX_IOC_EXTRACT = widgets.VBox([
    widgets.HBox([
        widgets.VBox([
            INTERACT_TIMEPICKER,
            TEXT_QUERY,
            SELECT_COLUMNS
        ]),
        widgets.VBox([
            INTERACTIVE_SLIDER,
            SLIDER_LIMIT_IOC_LEN,
            COLUMN_SELECTION,
            TICKBOX_RAW_COL,
            TICKBOX_AGG_VALUES,
            BUTTON_EXTRACT_IOC
        ]),
    ]),
    OUTPUT_EXTRACT
])

In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth', 150)

In [None]:
from surianalytics.widgets.explorer import Explorer

In [None]:
explorer = Explorer()

In [None]:
explorer.display()