# Imports

In [None]:
from surianalytics.connectors import ESQueryBuilder, escape as es_escape
from IPython.display import JSON
from pprint import pprint
import pandas as pd
import plotly.express as px
import mercury as mr
import ipywidgets as widgets
import json
from IPython.display import display
from django.utils import timezone
from datetime import timedelta, timezone as dt_tz
from django.conf import settings

try:
    # allow to use timezone
    settings.configure()
    settings.USE_TZ = True
except:
    pass

# disable insecure warning
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)
# Be careful with max_rows, too much rows leads to freeze browser/os
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', None)

# Build ES query example

In [8]:
builder = ESQueryBuilder()
builder.set_index('logstash-tls-*')
builder.set_page_size(0)
builder.set_tenant(0)

filters = [f'proto: {es_escape("UDP")}', f'proto: {es_escape("TCP")}']
filter2 = 'event_type: tls'
qfilter = ESQueryBuilder.filter_join(filters, operator='OR')
qfilter = ESQueryBuilder.filter_join([filter2, qfilter], operator='AND')
builder.set_qfilter(qfilter)

builder.add_aggs('tls.sni.keyword', order='_count', sort='asc', size=5)
builder.add_aggs('tls.cipher_security.keyword', order='_count', sort='asc', size=5)
builder.add_aggs('tls.toto', order='_count', sort='desc', size=5)

JSON(builder.__dict__(), expanded=True)

<IPython.core.display.JSON object>

# Old TLS versions

In [17]:
# Cipher suite count on degraded / insecure
builder = ESQueryBuilder()
builder.set_index('logstash-tls-*')
builder.set_page_size(0)
builder.set_tenant(0)

filters = [f'tls.cipher_security: {es_escape("insecure")}', f'tls.cipher_security: {es_escape("degraded")}']
filter2 = 'event_type: tls'
qfilter = ESQueryBuilder.filter_join(filters, operator='OR')
qfilter = ESQueryBuilder.filter_join([filter2, qfilter], operator='AND')
builder.set_qfilter(qfilter)

builder.add_aggs('tls.cipher_suite.keyword', order='_count', sort='desc', size=5)

builder.set_from_date('2023-04-10T09:42:49Z')
builder.set_to_date('2023-06-14T09:42:49Z')

r = builder.post()
content = r.json()

res = {'ciphers': [], 'count': []}
for key, val in content.get('aggregations', {}).items():
    for item in val.get('buckets', []):
        res['ciphers'].append(item['key'])
        res['count'].append(item['doc_count'])
        
df = pd.DataFrame({'Cipher Suite': res['ciphers'], 'Count': res['count']})
plot = df.groupby(['Cipher Suite']).sum().plot(kind='pie', y='Count', autopct='%1.0f%%')
df

{'took': 0, 'timed_out': False, '_shards': {'total': 0, 'successful': 0, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 0, 'relation': 'eq'}, 'max_score': 0.0, 'hits': []}}


In [None]:
# Global number of assets using insecure and degraded ciphers
builder = ESQueryBuilder()
builder.set_index('logstash-tls-*')
builder.set_page_size(0)
builder.set_tenant(0)

filters = [f'tls.cipher_security: {es_escape("insecure")}', f'tls.cipher_security: {es_escape("degraded")}']
filter2 = 'event_type: tls'
qfilter = ESQueryBuilder.filter_join(filters, operator='OR')
qfilter = ESQueryBuilder.filter_join([filter2, qfilter], operator='AND')
builder.set_qfilter(qfilter)

builder.add_aggs('tls.cipher_security.keyword', order='_count', sort='desc', size=5)

builder.set_from_date('2023-05-10T09:42:49Z')
builder.set_to_date('2023-05-31T09:42:49Z')

r = builder.post()
content = r.json()

res = {'ciphers': [], 'count': []}
for key, val in content.get('aggregations', {}).items():
    for item in val.get('buckets', []):
        res['ciphers'].append(item['key'])
        res['count'].append(item['doc_count'])
        
df = pd.DataFrame({'Cipher Suite': res['ciphers'], 'Count': res['count']})
df

In [10]:
builder = ESQueryBuilder()
builder.set_index('logstash-tls-*')
builder.set_page_size(0)
builder.set_tenant(0)

filters = [f'tls.cipher_security: {es_escape("insecure")}', f'tls.cipher_security: {es_escape("degraded")}']
filter2 = 'event_type: tls'
qfilter = ESQueryBuilder.filter_join(filters, operator='OR')
qfilter = ESQueryBuilder.filter_join([filter2, qfilter], operator='AND')
builder.set_qfilter(qfilter)

builder.add_aggs('tls.cipher_security.keyword', order='_count', sort='desc', size=5)
builder.add_aggs('tls.cipher_suite.keyword', order='_count', sort='desc', size=5)

builder.set_from_date('2023-05-14T09:42:49Z')
builder.set_to_date('2023-05-31T09:42:49Z')

r = builder.post()
content = r.json()

sunburst = []
tree = []
for key, val in content.get('aggregations', {}).items():
    for item in val.get('buckets', []):
        for idx, sub_item in enumerate(item['2'].get('buckets', [])):
            sunburst.append((item['key'], item['doc_count'], sub_item['key'], sub_item['doc_count']))
            if idx == 0:
                tree.append((item['key'], item['doc_count'], sub_item['key'], sub_item['doc_count']))
            else:
                tree.append(('', '', sub_item['key'], sub_item['doc_count']))

df = pd.DataFrame(tree, columns =['Cipher Security', 'Security Count', 'Cipher Suite', 'Suite Count'])
df

Unnamed: 0,Cipher Security,Security Count,Cipher Suite,Suite Count


In [None]:
df = pd.DataFrame(sunburst, columns =['Cipher Security', 'Security Count', 'Cipher Suite', 'Suite Count'])
px.sunburst(df, path=['Cipher Security', 'Security Count', 'Cipher Suite', 'Suite Count'], width=1000, height=1000)

# Filter out host insight requests (scirius#6112)

In [None]:
builder = ESQueryBuilder()
builder.set_index('host_id-1')
builder.set_page_size(50)
builder.set_tenant(1)
builder.set_time_filter('host_id.services.values.first_seen')
builder.set_from_date('2023-05-10T09:42:49Z')
builder.set_to_date('2023-05-31T09:42:49Z')

date_filter = '2023-05-10T15:06:11.857378+0200'
app_proto_filter = 'dns'

filters = [f'host_id.services.values.first_seen: "{date_filter}"', f'host_id.services.values.app_proto.keyword: {app_proto_filter}']
qfilter = ESQueryBuilder.filter_join(filters, operator='AND')
qfilter = ESQueryBuilder.filter_join([qfilter, 'host_id.tenant: 1'], operator='AND')
builder.set_qfilter(qfilter)

# JSON(builder.__dict__(), expanded=True)
r = builder.post()
content = r.json()

# JSON(content, expanded=True)

filters = {
    'values.first_seen': date_filter,
    'values.app_proto': app_proto_filter
}

dataframe = []
for hit in content.get('hits', {}).get('hits', []):
    services = hit.get('_source', {}).get('host_id', {}).get('services', [])
    services = ESQueryBuilder.clean_host_id(services, **filters)
    
    # build data frame
    for service in services:
        for idx, val in enumerate(service['values']):
            if idx == 0:
                dataframe.append((service['proto'], service['port'], val['first_seen'], val['last_seen'], val['app_proto']))
            else:
                dataframe.append(('', '', val['first_seen'], val['last_seen'], val['app_proto']))

        
# JSON(content, expanded=True)
df = pd.DataFrame(dataframe, columns =['Proto', 'Port', 'First Seen', 'Last Seen', 'App Proto'])
df

In [None]:
builder = ESQueryBuilder()
builder.set_index('host_id-1')
builder.set_page_size(50)
builder.set_tenant(1)
builder.set_time_filter('host_id.services.values.first_seen')
builder.set_from_date('2023-05-10T09:42:49Z')
builder.set_to_date('2023-05-31T09:42:49Z')

qfilter = ESQueryBuilder.filter_join(['host_id.services.proto: udp', 'ip: 10.10.21.1', 'host_id.tenant: 1'], operator='AND')
builder.set_qfilter(qfilter)

# JSON(builder.__dict__(), expanded=True)
r = builder.post()
content = r.json()

# JSON(content, expanded=True)

filters = {'proto': 'udp'}

dataframe = []
for hit in content.get('hits', {}).get('hits', []):
    services = hit.get('_source', {}).get('host_id', {}).get('services', [])
    
    res = ESQueryBuilder.clean_host_id(services, **filters)
    services = res
    
    # build data frame
    for service in services:
        for val in service['values']:
            dataframe.append((hit['_source']['ip'], service['proto'], service['port'], val['first_seen'], val['last_seen'], val['app_proto']))
        
# JSON(dataframe, expanded=True)
# pd.DataFrame?
df = pd.DataFrame(dataframe, columns =['IP', 'Proto', 'Port', 'First Seen', 'Last Seen', 'App Proto'])
df.sort_values('IP')



In [None]:
builder = ESQueryBuilder()
builder.set_index('host_id-1')
builder.set_page_size(50)
builder.set_tenant(1)
builder.set_time_filter('host_id.services.values.first_seen')
builder.set_from_date('2023-05-10T09:42:49Z')
builder.set_to_date('2023-05-31T09:42:49Z')

filter_proto = 'host_id.services.proto: udp'
filter_ja3 = 'host_id.tls.ja3.hash: 6734f37431670b3ab4292b8f60f29984'
filter_first_seen = 'host_id.tls.ja3.first_seen: "2023-05-10T15:34:15.577078+02:00"'

qfilter = ESQueryBuilder.filter_join([filter_proto, filter_ja3, filter_first_seen, 'host_id.tenant: 1'], operator='AND')
builder.set_qfilter(qfilter)

# JSON(builder.__dict__(), expanded=True)
r = builder.post()
content = r.json()

# JSON(content, expanded=True)

filter_proto = {'proto': 'udp', 'port': 53}
filter_ja3 = {'hash': '6734f37431670b3ab4292b8f60f29984', 'first_seen': '2023-05-10T15:34:15.577078+02:00'}

dataframe_proto = []
dataframe_ja3 = []
for hit in content.get('hits', {}).get('hits', []):

    services = hit.get('_source', {}).get('host_id', {}).get('services', [])
    tls_ja3 = hit.get('_source', {}).get('host_id', {}).get('tls.ja3', [])

    services = ESQueryBuilder.clean_host_id(services, **filter_proto)
    tls_ja3 = ESQueryBuilder.clean_host_id(tls_ja3, **filter_ja3)

    # build data frame
    for service in services:
        for val in service['values']:
            dataframe_proto.append((hit['_source']['ip'], service['proto'], service['port'], val['first_seen'], val['last_seen'], val['app_proto']))
            
    for ja3 in tls_ja3:
        dataframe_ja3.append((hit['_source']['ip'], ja3['agent'], ja3['hash'], ja3['first_seen'], ja3['last_seen']))
        
# JSON(content, expanded=True)
# pd.DataFrame?
df = pd.DataFrame(dataframe_proto, columns =['IP', 'Proto', 'Port', 'First Seen', 'Last Seen', 'App Proto'])
df.sort_values('IP')



In [None]:
df = pd.DataFrame(dataframe_ja3, columns =['IP', 'Agent', 'Hash', 'First Seen', 'Last Seen'])
df.sort_values('IP')

# widgets

In [3]:
title = widgets.Label(value='Select time range')
from_date = widgets.DatetimePicker(description='Start date')
to_date = widgets.DatetimePicker(description='End date')

text_from_date = widgets.Text()
text_to_date = widgets.Text()

display(title, from_date, text_from_date, to_date, text_to_date)

# setting a and b avoid a useless output
a = widgets.jslink((from_date, 'value'), (text_from_date, 'value'))
b = widgets.jslink((to_date, 'value'), (text_to_date, 'value'))

Label(value='Select time range')

DatetimePicker(value=None, description='Start date')

Text(value='')

DatetimePicker(value=None, description='End date')

Text(value='')

In [None]:
builder = ESQueryBuilder()
builder.set_index('host_id-1')
builder.set_page_size(50)
builder.set_tenant(1)
builder.set_time_filter('host_id.services.values.first_seen')
builder.set_from_date(from_date.value.isoformat())
builder.set_to_date(to_date.value.isoformat())

# JSON(builder.__dict__(), expanded=True)

builder2 = ESQueryBuilder()
builder2.set_index('host_id-1')
builder2.set_page_size(50)
builder2.set_tenant(1)
builder2.set_time_filter('host_id.services.values.first_seen')
builder2.set_from_date('2023-05-10T09:00:00Z')
builder2.set_to_date('2023-05-30T22:00:00Z')

d = builder.__dict__()
d

d2 = builder2.__dict__()
d2

display(d == d2)

In [None]:
from_date_ts = int((timezone.now() - timedelta(weeks=365)).timestamp())
to_date_ts = int(timezone.now().timestamp())

print(from_date_ts)

title = widgets.Label(value='Select time range')
from_date = widgets.IntSlider(description='Start date', min=from_date_ts, max=to_date_ts, continuous_update=True)
to_date = widgets.IntSlider(description='End date', min=from_date_ts, max=to_date_ts, continuous_update=True)

output = widgets.Label()
def update(change):
    with output:
        print(f'{change["owner"].description}: {datetime.utcfromtimestamp(change["new"]).astimezone(tz=dt_tz.utc).isoformat()}')

# a = widgets.jslink((from_date, 'value'), (output, 'value'))
# a = widgets.jslink((to_date, 'value'), (output, 'value'))

from_date.observe(update, names="value")
to_date.observe(update, names="value")

display(title, from_date, to_date, output)

In [11]:
builder.get_data?

[0;31mSignature:[0m [0mbuilder[0m[0;34m.[0m[0mget_data[0m[0;34m([0m[0mapi[0m[0;34m:[0m [0mstr[0m[0;34m,[0m [0mqParams[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m [0mignore_time[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mFile:[0m      ~/.pyenv/versions/suricata-analytics/lib/python3.11/site-packages/surianalytics/connectors.py
[0;31mType:[0m      method