## Usage
Make sure azure_notebook_reporting is installed as a first step, run below using a code cell once off

```python
%pip install --quiet git+https://github.com/wagov/wasoc-notebook.git
```

To run in Azure ML
```python
import os, json, pandas as pd
from azure_notebook_reporting import KQL, BlobPath
os.environ.update(json.load(open(f"{os.environ['HOME']}/cloudfiles/code/nbenv.json")))
path = BlobPath(os.environ["AZURE_STORAGE_CONTAINER"], os.environ["AZURE_SUBSCRIPTION"])
kp = KQL(path, template="markdown/report-sentinel.md")
```

To run locally
```python
import os, json, pandas as pd
from azure_notebook_reporting import KQL, BlobPath
path = BlobPath("..")
kp = KQL(path, template="markdown/report-sentinel.md")
```

In [None]:
import os, json, pandas as pd
from azure_notebook_reporting import KQL, BlobPath
from IPython import display

def show(rpitem):
    return display.HTML(rpitem.to_html(notebook_mode=True))

%matplotlib agg

path = BlobPath("..")
template = "markdown/report-sentinel.md"
queries = {
    "Users and Azure AD Logins": "siemhealth/signins.kql",
    "Email Delivery": "siemhealth/emaildelivery.kql",
    "Incident Details": "siemhealth/incidentdetail.kql",
    "On Premise Logins": "siemhealth/identitylogonevents.kql",
    "External Files": "siemhealth/externaldownloads.kql",
    "Guest Tenants": "siemhealth/guestdownloads.kql",
    "Operating Systems": "siemhealth/operatingsystems.kql",
    "Admin Logins (device)": "siemhealth/localadminlogons.kql",
    "Ingestion Detail": "siemhealth/usage.kql",
}

kp = KQL(path, template)
kp.set_agency(os.environ["REPORT_AGENCY"], sample_agency=os.environ["REPORT_SAMPLE_AGENCY"], sample_only=False)
kp.load_queries(queries)

kp.init_report(
    background = "https://raw.githubusercontent.com/wagov/wasoc-notebook/main/notebooks/background.svg",
    entity = kp.agency_name,
    date = kp.today.strftime("%B %Y"),
    body = "#121212",
    links = "#084595",
    titles = "#CC5733",
    footer = "#808080"
)

rp = kp.report

section = "Executive Summary"
exec_summary = kp.nbpath / f"markdown/exec_summaries/{kp.agency}-{kp.today.strftime('%b%Y')}.md"
if not exec_summary.exists():
    exec_summary = kp.nbpath / "markdown/exec_summaries/default.md"

rp[section] = kp.report_sections["Executive Summary"].substitute(
    agency = kp.agency_name,
    date = kp.today.strftime("%B %Y"),
    summary = exec_summary.open().read()
)

rp[section] += kp.querystats
show(rp[section])

## Composing a report

Setup each section, loading markdown fragments from the kp.report_sections dict and dataframes from the kp.queries dict

In [None]:
section = "Users and Azure AD Logins"
signins = kp.queries[section][1]
distinct_users = signins["UserPrincipalName"].count()
rp[section] = kp.report_sections[section].substitute(users=distinct_users)

if kp.querystats["Rows"][section] == 0:
    rp[section] += "*MISSING DATA: Please verify the [Microsoft Defender for Endpoint connection](https://learn.microsoft.com/en-us/azure/sentinel/connect-microsoft-365-defender?tabs=MDE)*"
    rp[section] += f"** SAMPLE DATASET BELOW (replacing {kp.querystats['Columns'][section]}) **"


df = signins.replace(0, None).rename(columns={"TableName": "SignIns"}).groupby("SignIns").count()
df = df.T.rename(columns={"PrimaryResult": "Percent of Users"}).sort_values("Percent of Users").drop("UserPrincipalName")
df = df / distinct_users * 100
rp[section] += df.plot(kind="barh", title="Azure AD SignIn types over past 30 days").figure
show(rp[section])

In [None]:
section = "Email Delivery"
rp[section] = ""
emails = kp.queries[section][1]

if kp.querystats["Rows"][section] == 0:
    rp[section] += "*MISSING DATA: Please verify the [Microsoft Defender for Office 365 connection](https://learn.microsoft.com/en-us/microsoft-365/security/office-365-security/step-by-step-guides/connect-microsoft-defender-for-office-365-to-microsoft-sentinel?view=o365-worldwide)*"
    rp[section] += f"** SAMPLE DATASET BELOW (Sentinel Table {kp.querystats['Columns'][section]}) **"
    emails["DeliveryAction"] = emails["DeliveryAction"].apply(hash)

df = emails.groupby(["DeliveryAction", "EmailDirection"]).agg({"Count": "sum"})
df["Count"] = df["Count"] / 1000
df = df.unstack("EmailDirection").sort_values(("Count", "Inbound"), ascending=False)
rp[section] += df.plot(kind="barh", y="Count", stacked=True, title="Total email delivered (thousands) in the past 30 days").figure

df = emails.groupby(["EmailDirection", "DeliveryAction"]).resample("6H", on="TimeGenerated").agg({"Count": "sum"}).sort_values("Count").unstack("DeliveryAction").unstack("EmailDirection")
rp[section] += df.plot(kind="area", y="Count", colormap=KQL.sns.color_palette("viridis", as_cmap=True), title="Email Delivery over past 30 days, 6 hour intervals").figure

df = KQL.latest_data(emails, "7D")
df = df.groupby(["EmailDirection", "DeliveryAction"]).resample("1H", on="TimeGenerated").agg({"Count": "sum"}).sort_values("Count").unstack("DeliveryAction").unstack("EmailDirection")
rp[section] += df.plot(kind="area", y="Count", colormap=KQL.sns.color_palette("viridis", as_cmap=True), title="Email Delivery over past 7 days, 1 hour intervals").figure

show(rp[section])

In [None]:
section = "Tactics and Rules"
rp[section] = kp.report_sections[section].substitute()

if kp.querystats["Rows"]["Incident Details"] == 0:
    rp[section] += "*MISSING DATA: Please confirm there are [analytics rules](https://learn.microsoft.com/en-us/azure/sentinel/detect-threats-built-in) configured on the convert_dtypespage.*"
    rp[section] += f"** SAMPLE DATASET BELOW (substituting for {kp.querystats['Columns']['Incident Details']} **"
    
incidents = kp.queries["Incident Details"][1]
incidents["TriageHours"] = pd.to_numeric(incidents["TriageHours"].replace({"None": '0'}))
    
df = incidents.groupby(["Status", "Classification", "Severity", "Tactics", "Rule"])[["IncidentNumber", "TriageHours", "OpenHours"]]
df = df.agg({"IncidentNumber": "nunique", "TriageHours": "sum", "OpenHours": "sum"})
df["TriageHours"] = df["TriageHours"] / df["IncidentNumber"]
df["OpenHours"] = df["OpenHours"] / df["IncidentNumber"]
df.groupby(["Status", "Classification", "Severity", "Tactics", "Rule"]).agg('sum')
df = df.rename(columns={"IncidentNumber": "Incidents"}).convert_dtypes()
df = df.convert_dtypes().sort_values("Incidents", ascending=False)

rp[section] += "### Top 10 created incidents over past 30 days"
rp[section] += df.head(10).round(2)

noisyrule = ""
if (df["Incidents"][0].sum() / df["Incidents"][1].sum()) > 5:
    df, noisyrule = df[1:], df.index.values[0][4]
    noisyrule = f' excluding outlier "{noisyrule}"'

rp[section] += df["TriageHours"].head(10).unstack(level=[0,1,2]).plot(kind="barh", stacked=False, width=1, title=f'Average Triage Hours over last 30 days{noisyrule}').figure
rp[section] += df["OpenHours"].head(10).unstack(level=[0,1,2]).plot(kind="barh", stacked=False, width=1, title=f'Average Open Hours over last 30 days{noisyrule}').figure
rp[section] += df["Incidents"].head(10).unstack(level=[0,1,2]).plot(kind="barh", stacked=False, width=1, title=f'Average Detections over last 30 days{noisyrule}').figure
show(rp[section])

In [None]:
section = "Admin Logins (device)"
admins = kp.queries[section][1]
admincount = admins[admins["Devices"] > 5]["AccountName"].nunique()
rp[section] = kp.report_sections[section].substitute(admincount = admincount)

if kp.querystats["Rows"][section] == 0:
    rp[section] += "*MISSING DATA: Please verify the [Microsoft Defender for Endpoint connection](https://learn.microsoft.com/en-us/azure/sentinel/connect-microsoft-365-defender?tabs=MDE)*"
    rp[section] += f"** SAMPLE DATASET BELOW (replacing {kp.querystats['Columns'][section]}) **"
    

rp[section] += admins.head(10).plot(kind="barh",x="AccountName", title="Top 10 admin user signins over past 30 days").figure
show(rp[section])



In [None]:
section = "External Files"
rp[section] = ""
spfiles = kp.queries[section][1]

if kp.querystats["Rows"][section] == 0:
    rp[section] += "*MISSING DATA: Please verify the [Microsoft Defender for Endpoint connection](https://learn.microsoft.com/en-us/azure/sentinel/connect-microsoft-365-defender?tabs=MDE)*"
    rp[section] += f"** SAMPLE DATASET BELOW (replacing {kp.querystats['Columns'][section]}) **"

rp[section] += spfiles.head(10).iloc[:,:-1].set_index("FileUrl", drop=True).head(10)
show(rp[section])

In [None]:
section = "Guest Tenants"
rp[section] = ""
tenants = kp.queries[section][1]

if kp.querystats["Rows"][section] == 0:
    rp[section] += "*MISSING DATA: Please verify the [Microsoft Defender for Endpoint connection](https://learn.microsoft.com/en-us/azure/sentinel/connect-microsoft-365-defender?tabs=MDE)*"
    rp[section] += f"** SAMPLE DATASET BELOW (replacing {kp.querystats['Columns'][section]}) **"

rp[section] += tenants.iloc[:,:-1].set_index("Guest Domain", drop=True).head(10)
show(rp[section])

In [None]:
section = "Cost Optimisation"
rp[section] = kp.report_sections[section].substitute()

ingestion = kp.queries["Ingestion Detail"][1]
ingestion["GB"] = ingestion["IngestionVolume"].map(lambda v: v / 1000)
df = pd.DataFrame(ingestion.pivot(columns="Table")["GB"].sum())
df["Daily GB"], df["30 days"] = df[0] / 30, df[0]
df = df.drop(0, axis=1)
df.loc["Total"] = df.sum()
df = df.sort_values(by="Daily GB", ascending=False)
rp[section] += df.head(11).round(1)

title = "Top 10 Ingestion tables over the past 30 days, daily average"
rp[section] += pd.DataFrame(df["Daily GB"].head(11)[1:]).plot(kind="barh", title=title, rot=0).figure

df = KQL.latest_data(ingestion, "4D")
df = KQL.label_size(df, "Table", "GB")
df.groupby(["TimeGenerated", "Table"])["GB"].agg("sum").unstack("Table")

oversized_tables = ", ".join(df[df.oversized]["Table"].unique())
title = "Ingestion by table over the past 4 days, 1 hour intervals"
if oversized_tables:
    oversized = df[df.oversized].groupby(["TimeGenerated", "Table"])["GB"].agg("sum").unstack("Table").iloc[:, ::-1]
    rp[section] += oversized.plot(kind="area", stacked=True, title=f"High Volume {title}").figure
    df = df[df.oversized == False]
df = df.groupby(["TimeGenerated", "Table"])["GB"].agg("sum").unstack("Table").iloc[:, ::-1]
rp[section] += df.plot(kind="area", title=title, stacked=True).figure
show(rp[section])

In [None]:
kp.report_pdf()