#  MITRE ATT&CK for Azure Sentinel
 <details>
     <summary> <u>Details...</u></summary>

 **Notebook Version:** 1.0<br>
 **Python Version:** Python 3.6 (including Python 3.6 - AzureML)<br>
 **Required Packages**: msticpy, plotly, ruamel.yaml, requests_html, attackcti, pandas, numpy, matplotlib, ipywidgets, ipython<br>
 **Platforms Supported**:
 - Azure Notebooks ML Studio
 - Azure Notebooks DSVM
 - OS Independent

 <br> 

 </details>

This Notebooks brings together analysis and visualization of all analytics queries (hunting and detection) in Azure Sentinel community Github. Optionally, you can also connect to your own workspace to gather similar data enabled in your environment using REST API. The notebook further clean, process and enrich the data and convert into structured tabular data. The final data can be visualized no of ways with example visualization such as Heatmaps, Radar charts, embeeded ATT&CK Navigator as demonstrated in this notebook. 

## Setup

In [None]:
#imports 
import requests
import zipfile
import io
import re
import glob
import IPython
import pandas as pd
from ipywidgets import widgets, Layout
from IPython.display import display, HTML
from pathlib import Path

import json
import csv
import os

import plotly.graph_objects as go
import matplotlib.pyplot as plt
import sys


REQ_PYTHON_VER=(3, 6)
REQ_MSTICPY_VER=(0, 6, 0)

display(HTML("<h3>Starting Notebook setup...</h3>"))
if Path("./utils/nb_check.py").is_file():
    from utils.nb_check import check_python_ver, check_mp_ver

    check_python_ver(min_py_ver=REQ_PYTHON_VER)
    try:
        check_mp_ver(min_msticpy_ver=REQ_MSTICPY_VER)
    except ImportError:
        !pip install --upgrade msticpy
        if "msticpy" in sys.modules:
            importlib.reload(sys.modules["msticpy"])
        else:
            import msticpy
        check_mp_ver(REQ_MSTICPY_VER)

# If not using Azure Notebooks, install msticpy with
# !pip install msticpy

from msticpy.nbtools import nbinit
extra_imports = [
    "msticpy.data.uploaders.loganalytics_uploader, LAUploader",
    "plotly.subplots, make_subplots",
    "pandas, json_normalize",
    "ruamel.yaml, YAML",
    "requests_html, HTMLSession",
    "datetime, date",
    "attackcti, attack_client",
]
nbinit.init_notebook(
    namespace=globals(),
    additional_packages=["plotly", "ruamel.yaml", "requests_html", "attackcti", "qgrid"],
    extra_imports=extra_imports,
);

WIDGET_DEFAULTS = {
    "layout": Layout(width="95%"),
    "style": {"description_width": "initial"},
}

#Set pandas options
pd.get_option('max_rows',10)
pd.set_option('max_colwidth',50)

## Data Acqusition

You can retrieve Detections and hunting Queries via below 2 methods.

### Download From Github
In the first method, you can directly download from public Azure Sentinel GitHub to download templates. The templates are available withing Analytics pane and must be explicitly enabled and onboarded. 

Alternatively, you can also point it to your private GitHub repository if available. If you want to read more about Azure Sentinel DevOps process, refer the blog Deploying and Managing Azure Sentinel as Code.

In [None]:
def get_sentinel_queries_from_github(git_url, outputdir):
    r = requests.get(git_url)

    repo_zip = io.BytesIO(r.content)

    archive = zipfile.ZipFile(repo_zip, mode="r")

    # Only extract Detections and Hunting Queries Folder
    for file in archive.namelist():
        if file.startswith(
            (
                "Azure-Sentinel-master/Detections/",
                "Azure-Sentinel-master/Hunting Queries/",
            )
        ):
            archive.extract(file, path=outputdir)
    print("Downloaded and Extracted Files successfully")


def parse_yaml(parent_dir, child_dir):

    sentinel_repourl = "https://github.com/Azure/Azure-Sentinel/blob/master"

    # Collect list of files recusrively uinder a folder
    yaml_queries = glob.glob(f"{parent_dir}/{child_dir}/**/*.yaml", recursive=True)
    df = pd.DataFrame()

    # Parse and load yaml
    parsed_yaml = YAML(typ="safe")

    # Recursively load yaml Files and append to dataframe
    for query in yaml_queries:
        with open(query, "r", encoding="utf-8", errors="ignore") as f:
            parsed_yaml_df = json_normalize(parsed_yaml.load(f))
            parsed_yaml_df["GithubURL"] = query.replace(parent_dir, sentinel_repourl)
            df = df.append(parsed_yaml_df, ignore_index=True, sort=True)

    df["DetectionType"] = child_dir
    df["DetectionService"] = "Azure Sentinel Community Github"

    return df


def clean_and_preprocess_data(df):

    columns = [
        "DetectionType",
        "DetectionService",
        "id",
        "name",
        "description",
        "query",
        "queryFrequency",
        "queryPeriod",
        "triggerOperator",
        "triggerThreshold",
        "tactics",
        "relevantTechniques",
        "requiredDataConnectors",
        "severity",
        "GithubURL",
        "IngestedDate",
    ]

    # Reording columns
    df = df[columns]

    # Inserting additional columns to list at specific index for later use
    columns.insert(5, "connectorId")
    columns.insert(6, "dataTypes")

    # Separate dataframes for Built-in and Custom DataConnectors
    builtin_dataconnectors = (
        df[df.requiredDataConnectors.notnull()].reset_index().drop("index", axis=1)
    )
    custom_dataconnectors = (
        df[df.requiredDataConnectors.isnull()].reset_index().drop("index", axis=1)
    )

    ## Pre-Processing Built-in Data Connectors dataframe
    # Exploding columns to flatten the table
    columns_to_expand = ["tactics", "relevantTechniques", "requiredDataConnectors"]
    for column in columns_to_expand:
        builtin_dataconnectors = builtin_dataconnectors.explode(column).reset_index(
            drop=True
        )
    # Apply Data wrangling to derive columns from Json response
    builtin_dataconn_expanded = pd.DataFrame(
        builtin_dataconnectors["requiredDataConnectors"].values.tolist()
    )
    # Concatenate 2 dataframs vertically to create Final Dataframe
    builtin_dataconn_df = pd.concat(
        [builtin_dataconnectors, builtin_dataconn_expanded], axis=1
    )
    # Exploding dataTypes column
    builtin_dataconn_df = builtin_dataconn_df.explode("dataTypes").reset_index(
        drop=True
    )
    builtin_dataconn_df = builtin_dataconn_df[columns]

    # Populate new column Platform based on custom mapping
    builtin_dataconn_df["Platform"] = builtin_dataconn_df.connectorId.map(
        platform_mapping
    )
    builtin_dataconn_df = builtin_dataconn_df.explode("Platform").reset_index(drop=True)

    ## Pre-Processing Custom or Missing Data Connectors dataframe
    # Exploding columns to flatten the table
    columns_to_expand = ["tactics", "relevantTechniques"]
    for column in columns_to_expand:
        custom_dataconnectors = custom_dataconnectors.explode(column).reset_index(
            drop=True
        )

    custom_dataconnectors["connectorId"] = [
        x.split("/")[-2] for x in custom_dataconnectors.GithubURL
    ]
    custom_dataconnectors["dataTypes"] = (
        custom_dataconnectors.connectorId.map(str) + "_CL"
    )

    # Populate new column Platform based on custom mapping
    custom_dataconnectors["Platform"] = custom_dataconnectors.connectorId.map(
        platform_mapping
    )
    custom_dataconnectors = custom_dataconnectors.explode("Platform").reset_index(
        drop=True
    )

    custom_dataconn_df = custom_dataconnectors[columns]

    # Concatenate 2 dataframs vertically
    result = pd.concat([builtin_dataconn_df, custom_dataconn_df], axis=0)
    result = result.drop(["requiredDataConnectors"], axis=1)

    return result


def get_mitre_matrix_flat():
    # Create the client
    lift = attack_client()

    # Pull all the techniques without STIX Format
    all_techniques = lift.get_techniques(stix_format=False)
    techniques_normalized = json_normalize(all_techniques)

    # Selecting specific columns of output dataset and reindex the dataframe
    techniques = techniques_normalized.reindex(
        ["matrix", "tactic", "technique", "technique_id"], axis=1
    )
    mitre_attack = techniques["matrix"] == "mitre-attack"
    mitre_attack_df = techniques[mitre_attack]
    mitre_attack_df = mitre_attack_df.reset_index(drop=True)

    # Exploding Platform and Tactic Columns to flatten the table
    mitre_attack_df = mitre_attack_df.explode("tactic").reset_index(drop=True)

    # Creae a new column by removing subtechniques
    mitre_attack_df["technique_id_parsed"] = mitre_attack_df["technique_id"].str.split(
        ".", n=1, expand=True
    )[0]

    return mitre_attack_df


platform_mapping = {
    "AWS": ["AWS"],
    "AWSS3": ["AWS", "SaaS"],
    "AzureActiveDirectory": ["Azure", "Azure AD"],
    "AzureActivity": ["Azure", "SaaS"],
    "AzureDevOpsAuditing": ["Azure", "SaaS"],
    "AzureMonitor": ["SaaS"],
    "AzureMonitor(IIS)": ["Azure"],
    "AzureMonitor(Keyvault)": ["Azure"],
    "AzureMonitor(Query Audit)": ["Azure"],
    "AzureMonitor(VMInsights)": ["Azure", "Windows", "Linux"],
    "AzureMonitor(WindowsEventLogs)": ["Azure", "Windows"],
    "AzureMonitor(WireData)": ["Azure", "Windows", "Linux"],
    "AzureNetworkWatcher": ["Azure"],
    "AzureSecurityCenter": ["Azure", "SaaS"],
    "Barracuda": ["Azure", "Windows", "Linux"],
    "BehaviorAnalytics": ["Azure AD", "Azure", "Windows"],
    "CEF": ["Azure", "Windows", "Linux"],
    "CheckPoint": ["Azure", "Windows", "Linux"],
    "CiscoASA": ["Azure", "Windows", "Linux"],
    "CustomConnector": ["Unknown"],
    "DNS": ["Azure", "Windows", "Linux"],
    "EsetSMC": ["Azure", "Windows", "Linux"],
    "F5": ["Azure", "Windows", "Linux"],
    "Fortinet": ["Azure", "Windows", "Linux"],
    "GitHub": ["SaaS", "Windows", "Linux"],
    "InfobloxNIOS": ["Azure", "Windows", "Linux"],
    "MicrosoftCloudAppSecurity": ["Azure", "AWS", "GCP", "SaaS"],
    "MicrosoftDefenderAdvancedThreatProtection": ["Windows", "Linux"],
    "Office365": ["Office 365"],
    "OktaSSO": ["Azure AD", "AWS", "GCP", "SaaS"],
    "PaloAltoNetworks": ["Azure", "Windows", "Linux"],
    "ProofpointTAP": ["Office 365"],
    "PulseConnectSecure": ["Azure", "Windows", "Linux"],
    "QualysVulnerabilityManagement": ["Azure", "Windows", "Linux", "macOS"],
    "SecurityEvents": ["Windows"],
    "SophosXGFirewall": ["Azure", "Windows", "Linux"],
    "SymantecProxySG": ["Azure", "Windows", "Linux"],
    "SymantecVIP": ["Azure", "Windows", "Linux"],
    "Syslog": ["Linux"],
    "ThreatIntelligence": [
        "Windows",
        "Linux",
        "macOS",
        "Azure",
        "AWS",
        "Azure AD",
        "Office 365",
    ],
    "ThreatIntelligenceTaxii": [
        "Windows",
        "Linux",
        "macOS",
        "Azure",
        "AWS",
        "Azure AD",
        "Office 365",
    ],
    "TeamsLogs": ["Windows", "Linux", "macOS"],
    "TrendMicro": ["Azure", "Windows", "Linux", "macOS"],
    "VMwareCarbonBlack": ["Windows", "Linux", "macOS"],
    "WAF": ["Azure", "SaaS"],
    "Zscaler": ["Azure", "Windows", "Linux"],
    "ZoomLogs": ["SaaS"],
}

In [None]:
def_path = Path.joinpath(Path(os.getcwd()), "mitre-attack")
path_wgt = widgets.Text(value=str(def_path), 
                        description='Path to extract to zipped repo files: ', 
                        layout=Layout(width='50%'),
                        style={'description_width': 'initial'})
path_wgt

In [None]:
# Download the Azure Sentinel Github repo as ZIP
azsentinel_git_url = 'https://github.com/Azure/Azure-Sentinel/archive/master.zip'

get_sentinel_queries_from_github(git_url=azsentinel_git_url, outputdir=path_wgt.value)

In [None]:
QUERIES_PATH = 'Azure-Sentinel-master'
sentinel_root = Path(path_wgt.value) / QUERIES_PATH

display(HTML("<h3>Listings under Detections...</h2>"))
%ls {sentinel_root}/Detections/

display(HTML("<h3>Listings under Hunting Queries...</h2>"))
%ls {sentinel_root}/'Hunting Queries'/

### Retrieve via Azure Sentinel API - Experimental

**Known API Limitations:**
- Hunting Query is currently not part of REST API which is GA, but you can use LogAnalytics REST API saved searches and then filter out Hunting Queries.
- In both cases, the data retrieved from savedSearches and AlertRuleTemplates, only Tactics is available and not techniques. 


In [None]:
#!az login

In [None]:
# from azure.identity import DefaultAzureCredential
# from adal import AuthenticationContext
# from azure.common.credentials import get_azure_cli_credentials
# from datetime import datetime

# # azure.mgmt.resource is an Azure SDK management library
# from azure.mgmt.resource import SubscriptionClient

# from azure.common.credentials import get_cli_profile 

# from urllib.parse import urlparse

# az_creds, _ = get_azure_cli_credentials()

# credential = DefaultAzureCredential()
# subscription_client = SubscriptionClient(az_creds)

In [None]:
# url = urlparse('https://api.loganalytics.io')
# _resource = f"{url.scheme}://{url.hostname}"

# sub_info = {
#     "tenant_id": "<tenant-id>",
#     "subscription_id": "<sub-id>",
#     "resource_group": "<resource-group>",
#     "workspace_id": "<workspace-id>",
#     "workspace_name": "<workspace-name>",
# }

# profile = get_cli_profile()
# credential, _subscription, _tenant = profile.get_raw_token(resource=_resource, subscription=sub_info["subscription_id"], tenant=None)
# token_type, access_token, token = credential


# subscription = sub_info["subscription_id"]
# tenant = sub_info["tenant_id"]
# rt = profile.get_refresh_token(resource=_resource, subscription=subscription)

In [None]:
# Helper methods to refresh token if current one has expired
# def get_token_refresh(credential, res_url):
#     """Get token closure - keeps az_api_creds cached"""
#     az_api_creds = None
#     context = AuthenticationContext(authority=f"https://login.microsoftonline.com/{sub_info['tenant_id']}")
    
#     def _get_token():
#         nonlocal az_api_creds
#         if (
#             az_api_creds is None or
#             pd.to_datetime(az_api_creds["expiresOn"]) < datetime.now()
#         ):
#             az_api_creds = context.acquire_token_with_refresh_token(
#                 credential[2]["refreshToken"],
#                 credential[2]["_clientId"],
#                 res_url
#             )
#         return az_api_creds.get("accessToken")
#     return _get_token


# mgmt_url = 'https://management.azure.com/'
# get_mgmt_token = get_token_refresh(credential, mgmt_url)

# def get_api_headers():
#     """Return authorization header with current token."""
#     return {
#         "Authorization": f"Bearer {get_mgmt_token()}",
#         "Content-Type": "application/json",
#     }
                                    
# def as_api_result_to_df(response):
#     j_resp = response.json()
#     if response.status_code != 200 or not j_resp or "value" not in j_resp:
#         raise ValueError("No valid JSON result in response")
#     queries_raw_df = pd.DataFrame(j_resp["value"])
#     query_props_df = pd.json_normalize(queries_raw_df["properties"])
#     return pd.concat([queries_raw_df, query_props_df], axis=1).drop(columns="properties")

In [None]:
# res_group_url = f"https://management.azure.com/subscriptions/{sub_info['subscription_id']}/resourceGroups/{sub_info['resource_group']}"
# prov_path = f"/providers/Microsoft.OperationalInsights/workspaces/{sub_info['workspace_name']}"
# ss_path = "/savedSearches"
# saved_searches_url = res_group_url + prov_path + ss_path

# params = {'api-version': '2017-04-26-preview'}

# r = requests.get(saved_searches_url, headers=get_api_headers(), params=params)

# queries_df = as_api_result_to_df(r)
# hunting_queries = queries_df[queries_df["Category"] == "Hunting Queries"].copy()
# invalid_chars = re.compile(r'[^A-Za-z0-9_]', flags=re.IGNORECASE)
# hunting_queries["query_name"] = hunting_queries["DisplayName"].str.replace(invalid_chars, "_")

# hunting_queries.head()

In [None]:
# res_group_url = f"https://management.azure.com/subscriptions/{sub_info['subscription_id']}/resourceGroups/{sub_info['resource_group']}"
# prov_path = f"/providers/Microsoft.OperationalInsights/workspaces/{sub_info['workspace_name']}"
# ops_path = "/providers/Microsoft.SecurityInsights/operations"

# alert_rules = "/providers/Microsoft.SecurityInsights/alertRuleTemplates"
# alert_rules_url = res_group_url + prov_path + alert_rules
# params = {'api-version': '2020-01-01'}

# r = requests.get(alert_rules_url, headers=get_api_headers(), params=params)

# alert_rules_df = as_api_result_to_df(r)
# alert_rules_df.head(5)

## Data Cleaning and Enrichment

### YAML Parsing

In [None]:
base_dir = path_wgt.value + "/Azure-Sentinel-master"
detections_df = parse_yaml(parent_dir=base_dir, child_dir="Detections")
hunting_df = parse_yaml(parent_dir=base_dir, child_dir="Hunting Queries")

frames = [detections_df, hunting_df]
sentinel_github_df = pd.concat(frames).reset_index()
sentinel_github_df = sentinel_github_df.copy()
sentinel_github_df["GithubURL"] = sentinel_github_df["GithubURL"].str.replace(
    " ", "%20", regex=True
)
sentinel_github_df["IngestedDate"] = date.today()

# Displaying basic statistics of yaml files
display(HTML("<h3>Azure Sentinel Github Stats...</h3>"))
print(
    f"""Total Queries in Azure Sentinel Github:: {len(sentinel_github_df)} 
    No of Detections :: {len(detections_df)} 
    No of Hunting Queries:: {len(hunting_df)}
    """
)

### Clean and Preprocess Data

In [None]:
result = clean_and_preprocess_data(df=sentinel_github_df)
result.head()

### Enrich with MITRE Matrix

In [None]:
mitre_attack_df = get_mitre_matrix_flat()
mitre_attack_df.head()

### Merge preprocessed dataset with MITRE Dataset

In [None]:
newdf = pd.merge(
    result,
    mitre_attack_df,
    left_on=["relevantTechniques"],
    right_on=["technique_id_parsed"],
    how="outer",
)

# Renaming columns
newdf = newdf.rename(
    columns={
        "matrix": "MITREMatrix",
        "platform": "Platform",
        "id": "DetectionId",
        "name": "DetectionName",
        "description": "DetectionDescription",
        "connectorId": "ConnectorId",
        "dataTypes": "DataTypes",
        "severity": "DetectionSeverity",
        "tactics": "Tactic",
        "relevantTechniques": "TechniqueId",
        "technique": "TechniqueName",
        "query": "Query",
        "queryFrequency": "QueryFrequency",
        "queryPeriod": "QueryPeriod",
        "triggerOperator": "TriggerOperator",
        "triggerThreshold": "TriggerThreshold",
        "GithubURL": "DetectionUrl",
    }
)

# Column Seletion and Ordering
columns = [
    "MITREMatrix",
    "Tactic",
    "TechniqueId",
    "TechniqueName",
    "Platform",
    "DetectionType",
    "DetectionService",
    "DetectionId",
    "DetectionName",
    "DetectionDescription",
    "ConnectorId",
    "DataTypes",
    "Query",
    "QueryFrequency",
    "QueryPeriod",
    "TriggerOperator",
    "TriggerThreshold",
    "DetectionSeverity",
    "DetectionUrl",
    "IngestedDate",
]
newdf = newdf[columns]

# Displaying top 10 records
newdf.head()

In [None]:
#Export the whole dataset
newdf.to_csv('output.csv', header=False, index=False)

#Export the whole dataset with headers
newdf.to_csv('output-with-header.csv', index=False)

## Scraping Detection list from Microsoft Platform Services

In [None]:
def get_azure_defender_alerts():
    alerts_url = (
        "https://docs.microsoft.com/en-us/azure/security-center/alerts-reference"
    )
    list_of_df = pd.read_html(alerts_url)
    providers = [
        "Windows",
        "Linux",
        "Azure App Service",
        "Azure Kubernetes Service clusters",
        "Containers- Host Level",
        "SQL Database and SQL Warehouse",
        "Azure Storage",
        "Azure Cosmos DB (Preview)",
        "Azure Network Layer",
        "Azure Resource Manager (Preview)",
        "Azure Key Vault (Preview)",
        "Azure DDoS Protection",
    ]
    for i in range(12):
        list_of_df[i]["Provider"] = providers[i]

    # Clean-up dataset by renaming some columns
    list_of_df[1] = list_of_df[1].rename(columns={"Alert (Alert Type)": "Alert"})
    list_of_df[2] = list_of_df[2].rename(columns={"Alert (Alert Type)": "Alert"})

    # Merge all the tables
    frames = [
        list_of_df[0],
        list_of_df[1],
        list_of_df[2],
        list_of_df[3],
        list_of_df[4],
        list_of_df[5],
        list_of_df[6],
        list_of_df[7],
        list_of_df[8],
        list_of_df[9],
        list_of_df[10],
        list_of_df[11],
    ]

    azdefender_df = pd.concat(frames).reset_index().dropna().drop("index", axis=1)

    # Add and Rename columns
    azdefender_df["Detection Service"] = "Azure Defender"
    azdefender_df = azdefender_df.rename(columns={"Intent(Learn more)": "Tactic"})
    azdefender_df[["Alert", "Description", "Severity", "Provider", "Tactic"]]

    return azdefender_df


def get_azure_ipc_alerts():
    alerts_url = "https://docs.microsoft.com/en-us/azure/active-directory/identity-protection/concept-identity-protection-risks"
    list_of_df = pd.read_html(alerts_url)

    # Merge All dataframes
    frames = (list_of_df[0], list_of_df[1], list_of_df[2])
    aip_df = pd.concat(frames).reset_index().dropna().drop("index", axis=1)

    # Add and Rename columns
    aip_df["Tactic"] = "N.A."
    aip_df["Severity"] = "N.A."
    aip_df["Provider"] = "N.A."
    aip_df["Detection Service"] = "Azure Identity Proptection Center (IPC)"
    aip_df = aip_df.rename(columns={"Risk detection": "Alert"}).drop(
        "Detection type", axis=1
    )

    return aip_df


def get_azure_defender_identity_alerts():
    alerts_url = "https://docs.microsoft.com/en-us/azure-advanced-threat-protection/suspicious-activity-guide?tabs=external"

    list_of_df = pd.read_html(alerts_url)
    atp_df = list_of_df[0].reset_index().dropna().drop("index", axis=1)
    atp_df["Description"] = "N.A."
    atp_df["Provider"] = "N.A."

    atp_df = atp_df.rename(
        columns={"Security alert name": "Alert", "MITRE ATT&CK Matrix™": "Tactic"}
    ).drop("Unique external ID", axis=1)
    atp_df["Detection Service"] = "Microsoft Defender for Identity"

    atp_df = atp_df[
        ["Alert", "Description", "Tactic", "Severity", "Provider", "Detection Service"]
    ]

    return atp_df


def get_mcas_alerts():
    alerts_url = (
        "https://docs.microsoft.com/en-us/cloud-app-security/investigate-anomaly-alerts"
    )

    session = HTMLSession()

    r = session.get(alerts_url)
    mcas_df = pd.DataFrame(
        re.findall(r"<h3 id=.*>(.*)</h3>", r.text), columns=["Alert"]
    )

    mcas_df["Description"] = "N.A."

    mcas_df["Tactic"] = "N.A."

    for i in range(6):
        mcas_df["Tactic"][i] = "InitialAccess"
    for i in range(6, 9):
        mcas_df["Tactic"][i] = "Execution"
    for i in range(9, 13):
        mcas_df["Tactic"][i] = "Persistence"
    mcas_df["Tactic"][13] = "PrivilegeEscalation"
    mcas_df["Tactic"][14] = "CredentialAccess"
    for i in range(15, 18):
        mcas_df["Tactic"][i] = "Collection"
    for i in range(18, 21):
        mcas_df["Tactic"][i] = "Exfiltration"
    for i in range(21, 24):
        mcas_df["Tactic"][i] = "Impact"

    mcas_df["Severity"] = "N.A."
    mcas_df["Provider"] = "N.A."
    mcas_df["Detection Service"] = "Microsoft Cloud Application Security (MCAS)"

    return mcas_df

### Azure Defender

In [None]:
az_defender_alerts = get_azure_defender_alerts()
az_defender_alerts

### Azure Identity Protection Center

In [None]:
az_ipc_alerts = get_azure_ipc_alerts()
az_ipc_alerts

### Azure Defender for Identity

In [None]:
az_defender_for_identity_alerts = get_azure_defender_identity_alerts()
az_defender_for_identity_alerts

### Microsoft Cloud Application Security (MCAS)

In [None]:
mcas_df = get_mcas_alerts()
mcas_df

### Merge all datasets

In [None]:
frames = [az_defender_alerts, az_ipc_alerts, az_defender_for_identity_alerts, mcas_df]
msft_df = pd.concat(frames)
msft_df

## Data Visualization

In [None]:
Tactics = [
    "InitialAccess",
    "Execution",
    "Persistence",
    "PrivilegeEscalation",
    "DefenseEvasion",
    "CredentialAccess",
    "Discovery",
    "LateralMovement",
    "Collection",
    "CommandAndControl",
    "Exfiltration",
    "Impact",
]

### Azure Sentinel Alerts Table Viewer - Jupyter Widget

In [None]:
import qgrid

columns = [
    "Tactic",
    "TechniqueId",
    "Platform",
    "DetectionType",
    "DetectionName",
    "ConnectorId",
    "DataTypes",
]

df = newdf[columns].rename_axis(None)
qgrid_widget = qgrid.show_grid(df, show_toolbar=True)
qgrid_widget

### HeatMap - Tactics vs DataTypes

In [None]:
# Subset selection for valid Tactic valuies
heatmap = newdf[newdf["Tactic"].isin(Tactics)]

# Plot HeatMap
pd.crosstab(heatmap["ConnectorId"], heatmap["Tactic"]).style.background_gradient(
    cmap="PuBuGn"
)

### HeatMap - Tactics vs Techniques

In [None]:
# Plot HeatMap
pd.crosstab(heatmap["TechniqueName"], heatmap["Tactic"]).style.background_gradient(
    cmap="PuBuGn"
)

### ATT&CK Navigator

#### Generate Navigator Layer Json file

In [None]:
# Read csv file as input
with open("output-with-header.csv", "r") as f:
    reader = csv.DictReader(f)
    a = list(reader)

# Creating set of Platforms and list of platforms
platforms = []

for t in a:
    if t["Platform"] not in platforms:
        platforms.append(t["Platform"])

# remove empty platforms
platforms.remove("")

# Empty Platform Data structure for Layer json file
platforms_list = []

for item in platforms:
    platform_dict = dict()
    platform_dict[item] = []
    platforms_list.append(platform_dict)

# Creating list of techniques per platfdorm and create Techniques data strcuture to iterate further
for aplatform in platforms_list:
    for ap, techniques_list in aplatform.items():
        for t in a:
            if ap == t["Platform"]:
                technique_dict = dict()
                technique_dict["techniqueId"] = t["TechniqueId"]
                technique_dict["DetectionName"] = t["DetectionName"]
                technique_dict["DataTypes"] = t["DataTypes"]
                if technique_dict not in techniques_list:
                    techniques_list.append(technique_dict)

# Code to Generate ATT&CK navigator Layer json file using Data Structures - Platforms and Techniques used
for platform in platforms_list:
    for k, v in platform.items():
        technique_mappings = dict()
        for technique in v:
            metadata = dict()
            metadata["name"] = technique["DetectionName"]
            metadata["value"] = technique["DataTypes"]
            if technique["techniqueId"] not in technique_mappings:
                technique_mappings[technique["techniqueId"]] = []
            if metadata not in technique_mappings[technique["techniqueId"]]:
                technique_mappings[technique["techniqueId"]].append(metadata)

        VERSION = "3.0"
        NAME = "{} Layer Json File for Azure Sentinel".format(k)
        DESCRIPTION = "{} ATT&CK Matrix Coverage for Azure Sentinel".format(k)
        DOMAIN = "mitre-enterprise"
        platform_layer = {
            "description": DESCRIPTION,
            "name": NAME,
            "domain": DOMAIN,
            "version": VERSION,
            "filters": {"stages": ["act"], "platforms": [k]},
            "techniques": [
                {"score": 1, "techniqueID": k, "metadata": v}
                for k, v in technique_mappings.items()
            ],
            "gradient": {
                "colors": ["#ffffff", "#66fff3"],
                "minValue": 0,
                "maxValue": 1,
            },
            "legendItems": [{"label": "Techniques researched", "color": "#66fff3"}],
        }
        with open(("./Json/{0}.json".format(k)), "w") as f:
            f.write(json.dumps(platform_layer))

####  ATT&CK Navigator Embedded View

In [None]:
import warnings

warnings.filterwarnings("ignore")
# AWS ATT&CK Coverage
import IPython

iframe = '<iframe src="https://mitre-attack.github.io/attack-navigator/enterprise/#layerURL=https://raw.githubusercontent.com/ashwin-patil/msft-mitreattack/main/Azure%20Sentinel/ATT%26CK-Navigator-layers/Azure.json&tabs=false&selecting_techniques=false" width="1000" height="400"></iframe>'
IPython.display.HTML(iframe)

### Radar Plots using Plotly

#### Data Wrangling

In [None]:
output_df = pd.read_csv("output-with-header.csv")

Tactics = [
    "InitialAccess",
    "Execution",
    "Persistence",
    "PrivilegeEscalation",
    "DefenseEvasion",
    "CredentialAccess",
    "Discovery",
    "LateralMovement",
    "Collection",
    "CommandAndControl",
    "Exfiltration",
    "Impact",
]

df_plot = (
    output_df[["Tactic", "DetectionType", "DetectionName"]]
    .dropna()
    .groupby(["Tactic", "DetectionType"])["DetectionName"]
    .nunique()
    .reset_index(level=[0, 1])
)
df_plot = df_plot.rename(columns={"DetectionName": "Count"})
df_plot = df_plot[df_plot["Tactic"].isin(Tactics)]
df_stats = df_plot.pivot(index="Tactic", columns="DetectionType", values="Count")
df_stats

In [None]:
hunting_count=[]
detection_count=[]
for tactic in Tactics:
    detection_count.append(df_stats.loc[tactic,'Detections'])
    hunting_count.append(df_stats.loc[tactic,'Hunting Queries'])
    
print(detection_count)
print(hunting_count)

#### Radar Plot Per Detection Type

In [None]:
fig = go.Figure()

fig.add_trace(go.Scatterpolar(
      r=detection_count,
      theta=Tactics,
      fill='toself',
      name='Detections'
))
fig.add_trace(go.Scatterpolar(
      r=hunting_count,
      theta=Tactics,
      fill='toself',
      name='Hunting Queries'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    )),
    height=600, width=800,
  showlegend=True,
  title="MITRE ATT&CK Coverage for Azure Sentinel", title_x=0.5
)

fig.show()

### MITRE ATT&CK Stats by Platform

#### Data Wrangling

In [None]:
df_platform = (
    output_df[["Tactic", "Platform", "DetectionName"]]
    .dropna()
    .groupby(["Tactic", "Platform"])["DetectionName"]
    .nunique()
    .reset_index(level=[0, 1])
)
df_platform = df_platform.rename(columns={"DetectionName": "Count"})
df_platform = df_platform[df_platform["Tactic"].isin(Tactics)]
df_platform = (
    df_platform.pivot(index="Tactic", columns="Platform", values="Count")
    .fillna(0)
    .astype(int)
)
df_platform

In [None]:
aws=[]
azure=[]
azuread=[]
o365=[]
windows=[]
linux=[]
for tactic in Tactics:
    aws.append(df_platform.loc[tactic,'AWS'])
    azure.append(df_platform.loc[tactic,'Azure'])
    azuread.append(df_platform.loc[tactic,'Azure AD'])
    o365.append(df_platform.loc[tactic,'Office 365'])
    windows.append(df_platform.loc[tactic,'Windows'])
    linux.append(df_platform.loc[tactic,'Linux'])
print(detection_count)
print(hunting_count)

#### Radar plot with Subplots per Platform

In [None]:
fig = go.Figure()


fig = make_subplots(rows=3, cols=2,
                    specs=[[{'type': 'polar'},    {'type': 'polar'}],
                           [{'type': 'polar'}, {'type': 'polar'}],
                          [{'type': 'polar'}, {'type': 'polar'}]])

fig.add_traces(
    [go.Scatterpolar(r=aws, theta=Tactics, fill='toself', name='AWS'),
    go.Scatterpolar(r=azure, theta=Tactics, fill='toself', name='Azure'),
    go.Scatterpolar(r=azuread, theta=Tactics, fill='toself', name='Azure AD'),
    go.Scatterpolar(r=o365, theta=Tactics, fill='toself', name='Office 365'),
    go.Scatterpolar(r=windows, theta=Tactics, fill='toself', name='AWS'),
    go.Scatterpolar(r=linux, theta=Tactics, fill='toself', name='AWS')
    ],
    rows=[1, 1, 2, 2, 3, 3],
    cols=[1, 2, 1, 2, 1, 2]
)


fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    )),
  height=1200, width=1000,
  showlegend=True,
  title="MITRE ATT&CK Coverage Per Platform", title_x=0.5
)

fig.show()

####  Radar plot (Consolidated for All Platforms)

In [None]:
fig = go.Figure()


fig.add_trace(go.Scatterpolar(
      r=aws,
      theta=Tactics,
      fill='toself',
      name='AWS'
     ))

fig.add_trace(go.Scatterpolar(
      r=azure,
      theta=Tactics,
      fill='toself',
      name='Azure'
))

fig.add_trace(go.Scatterpolar(
      r=azuread,
      theta=Tactics,
      fill='toself',
      name='Azure AD'
))

fig.add_trace(go.Scatterpolar(
      r=o365,
      theta=Tactics,
      fill='toself',
      name='Office 365'
))

fig.add_trace(go.Scatterpolar(
      r=windows,
      theta=Tactics,
      fill='toself',
      name='Windows'

))

fig.add_trace(go.Scatterpolar(
      r=linux,
      theta=Tactics,
      fill='toself',
      name='Linux'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True
    )),
  height=600, width=800,
  showlegend=True,
  title="MITRE ATT&CK Coverage Per Platform", title_x=0.5
)

fig.show()

## Visualizing Detections from Microsoft Platform Services

In [None]:
msft_df_tactics = (
    msft_df.groupby("Detection Service")["Alert"]
    .nunique()
    .to_frame(name="count")
    .reset_index()
)

labels = msft_df_tactics["Detection Service"]
values = msft_df_tactics["count"]

# Use `hole` to create a donut-like pie chart
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.4)])
fig.show()

In [None]:
msft_df_providers = (
    msft_df[msft_df["Provider"] != "N.A."]
    .groupby("Provider")["Alert"]
    .nunique()
    .to_frame(name="count")
    .reset_index()
)

labels = msft_df_providers["Provider"]
values = msft_df_providers["count"]

fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.3)])
fig.show()

In [None]:
#export the final results
msft_df.to_csv('msft-consolidated.csv')

## Upload Results to Azure Sentinel

In [None]:
la_ws_id = widgets.Text(description='Workspace ID:')
la_ws_key = widgets.Password(description='Workspace Key:')
display(la_ws_id)
display(la_ws_key)

In [None]:
# Instantiate our Uploader
la_up = LAUploader(workspace=la_ws_id.value, workspace_secret=la_ws_key.value, debug=True)

# Upload dataframe
la_up.upload_df(data=newdf, table_name='AzSentinelMITRE')
la_up.upload_df(data=msft_df, table_name='MSFTAlerts')