%pip install --quiet git+https://github.com/wagov/wasoc-notebook.git

General imports and setups - convert the above cell to python code and run once to pull in all the required libraries.

In [None]:
import os, json
import pandas as pd
from string import Template
from textwrap import shorten, dedent
from azure_notebook_reporting import KQL, BlobPath

# Setup for running in Azure ML
# os.environ.update(json.load(open(f"{os.environ['HOME']}/cloudfiles/code/nbenv.json")))
# path = BlobPath(os.environ["AZURE_STORAGE_CONTAINER"], os.environ["AZURE_SUBSCRIPTION"])

# Setup for running using local filesystem
path = BlobPath("..")

report_date, month = pd.Timestamp("today").strftime("%B %Y"), pd.Timestamp("today").strftime("%b%Y")

kp = KQL(path).set_agency(os.environ["REPORT_AGENCY"])
kp_sample = KQL(path).set_agency(os.environ["REPORT_SAMPLE_AGENCY"])
sample_only = False # if True, build report with only mock data
# if False, build report as usual, only substituting missing data with sample data
# sections should 'anonymise' sample data prior to rendering

report = kp.Page(
    title=f"WA SOC Preview Sentinel Report", 
    background = "https://raw.githubusercontent.com/wagov/wasoc-notebook/main/notebooks/background.svg",
    entity = kp.agency_name,
    date = report_date,
    body = "#121212",
    links = "#084595",
    titles = "#CC5733",
    footer = "#808080", 
)
%matplotlib agg

Execute queries against external apis and gather some statistics

In [None]:
%%capture
queries = {
    "Incident Details": "siemhealth/incidentdetail.kql",
    "Alert Details": "siemhealth/alertdetail.kql",
    "Local Admins": "siemhealth/localadminlogons.kql",
    "Email Delivery": "siemhealth/emaildelivery.kql",
    "External Files": "siemhealth/externaldownloads.kql",
    "Guest Tenants": "siemhealth/guestdownloads.kql",
    "On Premise Logons": "siemhealth/identitylogonevents.kql",
    "Azure AD Logons": "siemhealth/signins.kql",
    "Operating Systems": "siemhealth/operatingsystems.kql",
    "Ingestion Detail": "siemhealth/usage.kql",
}
querystats = {}
for key, kql in queries.items():
    if sample_only:
        queries[key] = kp.kql2df((kp.kql / kql).open().read() + "| take 0")
    else:
        queries[key] = kp.kql2df(kql)
    if queries[key].count().max() == 1:
        querystats[key] = [0, f"{queries[key].columns[0]} - {queries[key].iloc[0,0]}"]
        queries[key] = kp_sample.kql2df(kql)
    else:
        querystats[key] = [queries[key].count().max(), len(queries[key].columns)]
querystats = pd.DataFrame(querystats).T.rename(columns={0: "Rows", 1: "Columns"}).sort_values("Rows")

The below two cells are to build an easy to edit set of markdown templates that can be used to populate the report

In [None]:
kp.path

In [None]:
section = "Executive Summary"
report[section] = f"*{kp.agency_name} - {report_date}*"
exec_summary = kp.nbpath / "exec_summaries" / f"{kp.agency}-{month}.md"
if not exec_summary.exists():
    exec_summary = kp.nbpath / "exec_summaries" / "default.md"
report[section] += exec_summary.open().read()

report += dedent("""
    
""")

report += KQL.minitable(querystats)

section = "Tactics and Rules"
report[section] = dedent("""
    
""")

if querystats["Rows"]["Incident Details"] == 0:
    report[section] += "*MISSING DATA: Please confirm there are [analytics rules](https://learn.microsoft.com/en-us/azure/sentinel/detect-threats-built-in) configured on the Analytics page.*"
    report[section] += f"** SAMPLE DATASET BELOW (substituting for {querystats['Columns']['Incident Details']} **"
df = queries["Incident Details"].groupby(["Status", "Classification", "Severity", "Tactics", "Title"])
df = df["IncidentNumber"].nunique().unstack(level=[0,1]).dropna(axis=1, how="all").fillna(0)
df["Total"] = df.sum(numeric_only=True, axis=1)
df = df.sort_values("Total", ascending=False).replace({0: ""}).head(5)
report[section] += KQL.minitable(df)

df, interval = KQL.dfago(queries["Incident Details"], "7D"), "3H"
tactics = df.groupby("Tactics").resample(interval, on="TimeGenerated")["IncidentNumber"].nunique().reset_index()
report[section] += KQL.df2fig(tactics, "Detections by Tactic", "TimeGenerated", "IncidentNumber", "Tactics")
rules = df.groupby("Title").resample(interval, on="TimeGenerated")["IncidentNumber"].nunique().reset_index().rename(columns={"Title": "Rule"})
report[section] += KQL.df2fig(rules, "Detections by Rule", "TimeGenerated", "IncidentNumber", "Rule")

In [None]:
section = "Cost Optimisation"
report[
    section
] = """

"""
df = KQL.dfago(queries["Ingestion Detail"], "5D")
df["GB"] = df.IngestionVolume.map(lambda v: v / 1000)
report[section] += KQL.df2fig(df[df["Billable"] == "True"], "Billable Ingestion (GB) by Table", "TimeGenerated", "GB", "Table")

In [None]:

# report.save_html("report.html")
# (kp.reports / kp.agency / f"{kp.agency}-{month}-siemhealth.html").write_text(open("report.html").read())
# report.save_pdf("report.pdf")
# (kp.reports / kp.agency / f"{kp.agency}-{month}-siemhealth.pdf").write_bytes(open("report.pdf", "rb").read())
html = report.save_pdf("report.pdf", return_html=True)
open("report.pdf.html", "w").write(html)
from IPython.display import IFrame
IFrame("report.pdf", width=1200, height=800)