In [None]:
import os, json
import esparto
import seaborn as sns
import pandas as pd
from datetime import date
from azure_notebook_reporting import KQL, BlobPath

os.environ.update(json.load(open(f"{os.environ['HOME']}/.nbenv.json")))

sns.set_theme(style="white", context="paper", font_scale=0.7, rc={"figure.figsize": (4, 3), "figure.constrained_layout.use": True, "legend.loc": "upper right"})

kp = KQL(BlobPath(os.environ["AZURE_STORAGE_CONTAINER"], os.environ["AZURE_SUBSCRIPTION"])).set_agency("...")


In [None]:
incidents = kp.kql2df("siemhealth/incidentdetail.kql")
alerts = kp.kql2df("siemhealth/alertdetail.kql")
devices = kp.kql2df("siemhealth/operatingsystems.kql")
usage = kp.kql2df("siemhealth/usage.kql", timespan="P7D")


In [None]:
from textwrap import shorten
import matplotlib.dates as mdates


def df2fig(dataframe, title, x, y, split, maxsplit=10, kind="area", quantile=0.9, yclip=10):
    df = dataframe.copy(deep=True)
    splitsizes = df.groupby(split).sum(numeric_only=True).sort_values(y, ascending=False)
    df[split] = df[split].replace({label: "Other" for label in splitsizes[maxsplit:].index})
    upper = splitsizes[y].quantile(quantile)
    yspread = splitsizes[y].max() / upper
    if yspread > yclip:
        splits = splitsizes[y] > upper
        uppersplit, lowersplit = set(splits[splits == True].index), set(splits[splits == False].index)
        dfs = {title: df[df[split].isin(lowersplit)], f"{title} (Outliers > {quantile})": df[df[split].isin(uppersplit)]}
    else:
        dfs = {title: df}
    figures = []
    for title, df in dfs.items():
        df = df.groupby([x, split])[y].sum().unstack()
        df = df[df.sum(numeric_only=True).sort_values(ascending=False).index]
        ax = df.plot(kind=kind, title=title)
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(reversed(handles), reversed(labels), title=split)
        figures.append(ax.figure)
    figures.reverse()
    return figures


In [None]:
report_date, month = date.today().strftime("%B %Y"), date.today().strftime("%b%Y")

exec_summary = kp.nbpath / "exec_summaries" / f"{kp.agency}-{month}.md"
if not exec_summary.exists():
    exec_summary = kp.nbpath / "exec_summaries" / "default.md"

report = esparto.Page(title=f"SIEM Health Report")
report += f"#### {kp.agency_name} - {report_date}\n{exec_summary.open().read()}"
report["Incidents and Alerts"] = df2fig(incidents, "Detections by Tactic", "TimeGenerated", "incidents", "Tactics")
report["Incidents and Alerts"] += df2fig(incidents, "Detections by Rule", "TimeGenerated", "incidents", "Title")
usage["GB"] = pd.to_numeric(usage.IngestionVolume).map(lambda v: v / 1000)
report["Usage"] = df2fig(usage, "Ingestion (GB) by Table", "TimeGenerated", "GB", "Table")
report["Usage"] += df2fig(usage[usage["Billable"] == "True"], "Billable Ingestion (GB) by Table", "TimeGenerated", "GB", "Table")
report.save_html("report.html")
report.save_pdf("report.pdf")
