# Hunting - Automated Data Query and Ingestion to Custom Table

__Notebook Version:__ 1.0<br>
__Python Version:__ Python 3.8<br>
__Apache Spark Version:__ 3.1<br>
__Required Packages:__ azure-monitor-query, azure-mgmt-loganalytics<br>
__Platforms Supported:__  Azure Synapse Analytics
     
__Data Source Required:__ Log Analytics custom table defined
    
### Description
This notebook provides step-by-step instructions and sample code to query various data from Azure Log Analytics and then store it back to Log Analytocs pre-defined custom table.<br>
*** Please run the cells sequentially to avoid errors.  Please do not use "run all cells". *** <br>
Need to know more about KQL? [Getting started with Kusto Query Language](https://docs.microsoft.com/azure/data-explorer/kusto/concepts/).

## Table of Contents
1. Warm-up
2. Azure Log Analytics Data Queries
3. Save result to Azure Log Analytics Custom Table

## 1. Warm-up

In [None]:
%pip install azure.monitor.ingestion

In [None]:
# Load Python libraries that will be used in this notebook
from azure.mgmt.loganalytics import LogAnalyticsManagementClient
from azure.monitor.query import LogsQueryClient, MetricsQueryClient, LogsQueryStatus
#from azure.identity.aio import DefaultAzureCredential
from azure.monitor.ingestion import LogsIngestionClient

from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential
from azure.core.exceptions import  HttpResponseError 

from datetime import datetime, timezone, timedelta
import requests
import pandas as pd
import numpy
import json
import ipywidgets
from IPython.display import display, HTML, Markdown

In [None]:
# User inputs
tenant_id = ''
subscription_id = ''
akv_name = ''
akv_link_name = ''
workspace_id = ''
client_id_name = ''
client_secret_name = ''
resource_group_name = ""
location = ""
workspace_name = ''

In [None]:
# User inputs
# Parameters for provisioning resources"
workspace_resource_id = '/subscriptions/{subscription_id}}/resourceGroups/{resource_group_name}/providers/Microsoft.OperationalInsights/workspaces/{workspace_name}'
dataCollection_endpoint_name = ""
dataCollection_rule_name = ""
stream_name = ""
immutable_rule_id = ""
dce_endpoint = ''

In [None]:
# You may need to change resource_uri for various cloud environments.
resource_uri = "https://api.loganalytics.io"
client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)
client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)

credential = ClientSecretCredential(
    tenant_id=tenant_id, 
    client_id=client_id, 
    client_secret=client_secret)
access_token = credential.get_token(resource_uri + "/.default")
token = access_token[0]

## 2. Azure Log Analytics Data Queries

In [None]:
la_data_client = LogsQueryClient(credential=credential)

end_time =  datetime.now(timezone.utc)
start_time = end_time - timedelta(5)
query = "DynamicSummary | where TimeGenerated > ago(3d) | project TimeGenerated, MyData = SummaryStatus"
query_result = la_data_client.query_workspace(
        workspace_id=workspace_id,
        query=query,
        timespan=(start_time, end_time))

if query_result.status == LogsQueryStatus.SUCCESS:
    df_la_query = pd.DataFrame(data=query_result.tables[0].rows, columns=query_result.tables[0].columns)
    print(df_la_query)

In [None]:
# Calling Microsoft MDTI API for List, the same template can be used for calling other Azure REST APIs with different parameters.
# For different environments, such as national clouds, you may need to use different root_url, please contact with your admins.
# It can be ---.azure.us, ---.azure.microsoft.scloud, ---.azure.eaglex.ic.gov, etc.
def call_mdti_api_for_read(token, resource):
    "Calling Microsoft MDTI API"
    headers = {"Authorization": token, "content-type":"application/json" }
    root_url = "https://graph.microsoft.com"
    mdti_url_template = "{0}/beta/security/threatIntelligence/{1}"
    mdti_url = mdti_url_template.format(root_url, resource)
    print(mdti_url)
    response = requests.get(mdti_url, headers=headers, verify=True)
    return response

def get_token_for_graph():
    resource_uri = "https://graph.microsoft.com/ThreatIntelligence.Read.All"
    client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)
    client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)

    credential = ClientSecretCredential(
        tenant_id=tenant_id, 
        client_id=client_id, 
        client_secret=client_secret)
    access_token = credential.get_token(resource_uri + "/.default")
    return access_token[0]

In [None]:
# Calling Microsoft Sentinel Watchlist API
# If you don't have Watchlist, you may create one, or try to access different features, such as Bookmarks.
header_token_value = "Bearer {}".format(get_token_for_graph())
response_mdti = call_mdti_api_for_read(header_token_value, "hosts('riskiq.net')")

In [None]:
if response_mdti != None:
    print(response_mdti)
    #df_api_data = pd.DataFrame(response_watchlist.json()["value"])

In [None]:
# Calling Microsoft Sentinel API for List, the same template can be used for calling other Azure REST APIs with different parameters.
# For different environments, such as national clouds, you may need to use different root_url, please contact with your admins.
# It can be ---.azure.us, ---.azure.microsoft.scloud, ---.azure.eaglex.ic.gov, etc.
def call_azure_rest_api_for_get_watchlist_items(token, resource_group_name, sentinel_workspace_name, resource_alias, api_version):
    "Calling Microsoft Sentinel REST API"
    headers = {"Authorization": token, "content-type":"application/json" }
    provider_name = "Microsoft.OperationalInsights"
    provider2_name = "Microsoft.SecurityInsights"
    target_resource_name = "watchlists"
    sub_target_source_name = "watchlistItems"
    api_version = api_version
    root_url = "https://management.azure.com"
    arm_rest_url_template_for_list = "{0}/subscriptions/{1}/resourceGroups/{2}/providers/{3}/workspaces/{4}/providers/{5}/{6}/{7}/{8}?api-version={9}"
    arm_rest_url = arm_rest_url_template_for_list.format(root_url, subscription_id, resource_group_name, provider_name, sentinel_workspace_name, provider2_name, target_resource_name, resource_alias, sub_target_source_name, api_version)
    response = requests.get(arm_rest_url, headers=headers, verify=True)
    return response

def get_token_for_azure():
    resource_uri = "https://management.azure.com/"
    client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)
    client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)

    credential = ClientSecretCredential(
        tenant_id=tenant_id, 
        client_id=client_id, 
        client_secret=client_secret)
    access_token = credential.get_token(resource_uri + "/.default")
    return access_token[0]


In [None]:
def convert_dataframe_to_list_of_dictionaries(df, hasTimeGeneratedColumn):
    list = df.to_dict('records')

    for row in list:
        # The dataframe may have more than one datetime columns, add all datetiome columns inside this loop, to render ISO 8601
        if hasTimeGeneratedColumn and str(row['TimeGenerated']) != "NaT":
            row['TimeGenerated']= row['TimeGenerated'].strftime("%Y-%m-%dT%H:%M:%S.%fZ")
    
    return list


In [None]:
# Calling Microsoft Sentinel Watchlist API
# If you don't have Watchlist, you may create one, or try to access different features, such as Bookmarks.
header_token_value = "Bearer {}".format(get_token_for_azure())
response_watchlist = call_azure_rest_api_for_get_watchlist_items(header_token_value, "zhzhaopitest", "zhzhaoasi", "zz20220801", "2023-02-01")

In [None]:
if response_watchlist != None:
    df_api_data = pd.DataFrame(response_watchlist.json()["value"])

In [None]:
df_union = pd.concat([df_la_query, df_api_data])
#display(df_union)

In [None]:
final_result1 = convert_dataframe_to_list_of_dictionaries(df_la_query, True)
display(final_result1)

In [None]:
final_result2 = convert_dataframe_to_list_of_dictionaries(df_api_data, False)
#display(final_result2)

In [None]:
final_result = convert_dataframe_to_list_of_dictionaries(df_union, True)
#display(final_result)

## 3. Save result to Azure Log Analytics Custom Table

In [None]:
from azure.core.exceptions import HttpResponseError
from azure.identity import DefaultAzureCredential
from azure.monitor.ingestion import LogsIngestionClient

body = [
      {
        "TimeGenerated ": "2023-08-04T14:51:14.1104269Z",
        "MyData": "Computer1",
      },
      {
        "TimeGenerated ": "2023-08-04T14:41:14.1104269Z",
        "MyData": "Computer2",
      }
    ]


client = LogsIngestionClient(endpoint=dce_endpoint, credential=credential, logging_enable=True)

try:
    ingestion_result = client.upload(rule_id=immutable_rule_id, stream_name=stream_name, logs=body)
except HttpResponseError as e:
    print(f"Upload failed: {e}")