# Automation Gallery - Credential Scan on Azure Log Analytics

__Notebook Version:__ 1.0<br>
__Python Version:__ Python 3.8<br>
__Apache Spark Version:__ 3.1<br>
__Required Packages:__ azure-monitor-query, azure-mgmt-loganalytics<br>
__Platforms Supported:__  Azure Synapse Analytics
     
__Data Source Required:__ Log Analytics tables 
    
### Description
This notebook provides step-by-step instructions and sample code to detect credential leak into Azure Log Analytics using Azure SDK for Python and KQL.<br>
*** No need to download and install any other Python modules. ***<br>
*** Please run the cells sequentially to avoid errors.  Please do not use "run all cells". *** <br>
Need to know more about KQL? [Getting started with Kusto Query Language](https://docs.microsoft.com/azure/data-explorer/kusto/concepts/).

## Table of Contents
1. Warm-up
2. Azure Authentication
3. Azure Log Analytics Data Queries

## 1. Warm-up

In [None]:
# If you need to know what Python modules are available, you may run this:
# help("modules")
%pip install azure-monitor-query
%pip install azure-mgmt-loganalytics

In [None]:
# Load Python libraries that will be used in this notebook
from azure.mgmt.loganalytics import LogAnalyticsManagementClient
from azure.monitor.query import LogsQueryClient, MetricsQueryClient, LogsQueryStatus
from azure.identity import AzureCliCredential, DefaultAzureCredential, ClientSecretCredential
from azure.core.exceptions import  HttpResponseError 

from datetime import datetime, timezone, timedelta
import pandas as pd
import json
import ipywidgets
from IPython.display import display, HTML, Markdown

In [None]:
# Functions will be used in this notebook        
def get_credscan_kql_where_clause(column_name):
    "This function return the KQL where clause for credscan"
    where_clause = " | where TimeGenerated > ago({0}) | where {1} "
    time_range = "7d"
    regex_string = ""
    regex_list = [
        r"(?i)(ida:password|IssuerSecret|(api|client|app(lication)?)[_\\- ]?(key|secret)[^,a-z]|\\.azuredatabricks\\.net).{0,10}(dapi)?[a-z0-9/+]{22}",
        r"(?i)(x-api-(key|token).{0,10}[a-z0-9/+]{40}|v1\\.[a-z0-9/+]{40}[^a-z0-9/+])",
        r"(?-i)\\WAIza(?i)[a-z0-9_\\\\\\-]{35}\\W",
        r"(?i)(\\Wsig\\W|Secret(Value)?|IssuerSecret|(\\Wsas|primary|secondary|management|Shared(Access(Policy)?)?).?Key|\\.azure\\-devices\\.net|\\.(core|servicebus|redis\\.cache|accesscontrol|mediaservices)\\.(windows\\.net|chinacloudapi\\.cn|cloudapi\\.de|usgovcloudapi\\.net)|New\\-AzureRedisCache).{0,100}([a-z0-9/+]{43}=)",
        r"(?i)visualstudio\\.com.{1,100}\\W(?-i)[a-z2-7]{52}\\W",
        r"(?i)se=2021.+sig=[a-z0-9%]{43,63}%3d",
        r"(?i)(x-functions-key|ApiKey|Code=|\\.azurewebsites\\.net/api/).{0,100}[a-z0-9/\\+]{54}={2}",
        r"(?i)code=[a-z0-9%]{54,74}(%3d){2}",
        r"(?i)(userpwd|publishingpassword).{0,100}[a-z0-9/\\+]{60}\\W",
        r"(?i)[^a-z0-9/\\+][a-z0-9/\\+]{86}==",
        r"(?-i)\\-{5}BEGIN( ([DR]SA|EC|OPENSSH|PGP))? PRIVATE KEY( BLOCK)?\\-{5}",
        r"(?i)(app(lication)?|client)[_\\- ]?(key(url)?|secret)([\\s=:>]{1,10}|[\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2})[^\\-]",
        r"(?i)refresh[_\\-]?token([\\s=:>]{1,10}|[\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2})(\"data:text/plain,.+\"|[a-z0-9/+=_.-]{20,200})",
        r"(?i)AccessToken(Secret)?([\\s\"':=|>\\]]{3,15}|[\"'=:\\(]{2}|[\\s=:>]{1,10})[a-z0-9/+=_.-]{20,200}",
        r"(?i)[a-z0-9]{3,5}://[^%:\\s\"'/][^:\\s\"'/\\$]+[^:\\s\"'/\\$%]:([^%\\s\"'/][^@\\s\"'/]{0,100}[^%\\s\"'/])@[\\$a-z0-9:\\.\\-_%\\?=/]+",
        r"(?i)snmp(\\-server)?\\.exe.{0,100}(priv|community)",
        r"(?i)(ConvertTo\\-?SecureString\\s*((\\(|\\Wstring)\\s*)?['\"]+)",
        r"(?i)(Consumer|api)[_\\- ]?(Secret|Key)([\\s=:>]{1,10}|[\\s\"':=|>,\\]]{3,15}|[\"'=:\\(]{2})[^\\s]{5,}",
        r"(?i)authorization[,\\[:= \"']+([dbaohmnsv])",
        r"(?i)-u\\s+.{2,100}-p\\s+[^\\-/]",
        r"(?i)(amqp|ssh|(ht|f)tps?)://[^%:\\s\"'/][^:\\s\"'/\\$]+[^:\\s\"'/\\$%]:([^%\\s\"'/][^@\\s\"'/]{0,100}[^%\\s\"'/])@[\\$a-z0-9:\\.\\-_%\\?=/]+",
        r"(?i)(\\Waws|amazon)?.{0,5}(secret|access.?key).{0,10}\\W[a-z0-9/\\+]{40}",
        r"(?-i)(eyJ0eXAiOiJKV1Qi|eyJhbGci)",
        r"(?i)@(\\.(on)?)?microsoft\\.com[ -~\\s]{1,100}?(\\w?pass\\w?)",
        r"(?i)net(\\.exe)?.{1,5}(user\\s+|share\\s+/user:|user-?secrets? set)\\s+[a-z0-9]",
        r"(?i)xox[pbar]\\-[a-z0-9]",
        r"(?i)[\":\\s=]((x?corp|extranet(test)?|ntdev)(\\.microsoft\\.com)?|corp|redmond|europe|middleeast|northamerica|southpacific|southamerica|fareast|africa|exchange|extranet(test)?|partners|parttest|ntdev|ntwksta)\\W.{0,100}(password|\\Wpwd|\\Wpass|\\Wpw\\W|userpass)",
        r"(?i)(sign_in|SharePointOnlineAuthenticatedContext|(User|Exchange)Credentials?|password)[ -~\\s]{0,100}?@([a-z0-9.]+\\.(on)?)?microsoft\\.com['\"]?",
        r"(?i)(\\.database\\.azure\\.com|\\.database(\\.secure)?\\.windows\\.net|\\.cloudapp\\.net|\\.database\\.usgovcloudapi\\.net|\\.database\\.chinacloudapi\\.cn|\\.database.cloudapi.de).{0,100}(DB_PASS|(sql|service)?password|\\Wpwd\\W)",
        r"(?i)(secret(.?key)?|password)[\"']?\\s*[:=]\\s*[\"'][^\\s]+?[\"']",
        r"(?i)[^a-z\\$](DB_USER|user id|uid|(sql)?user(name)?|service\\s?account)\\s*[^\\w\\s,]([ -~\\s]{2,120}?|[ -~]{2,30}?)([^a-z\\s\\$]|\\s)\\s*(DB_PASS|(sql|service)?password|pwd)",
        r"(?i)(password|secret(key)?)[ \\t]*[=:]+[ \\t]*([^:\\s\"';,<]{2,200})",
    ]

    for (i, re_str) in enumerate(regex_list):
        if i != 0:
            if i == 27:
                regex_string += " and "
            else:
                regex_string += " or " 

        regex_string += " " + column_name + " matches regex \"" + re_str + "\""

    return where_clause.format(time_range, regex_string)


## 2. Azure Authentication

In [None]:
tenant_id = ''
subscription_id = ''
akv_name = ''
client_id_name = ''
client_secret_name = ''
akv_link_name = ''
workspace_id = ''

In [None]:
client_id = mssparkutils.credentials.getSecret(akv_name, client_id_name, akv_link_name)
client_secret = mssparkutils.credentials.getSecret(akv_name, client_secret_name, akv_link_name)

In [None]:
credential = ClientSecretCredential(
    tenant_id=tenant_id, 
    client_id=client_id, 
    client_secret=client_secret)

In [None]:
# Initialzie Azure LogAnalyticsDataClient, which is used to access Microsoft Sentinel log data in Azure Log Analytics.  
# You may need to change resource_uri for various cloud environments.
resource_uri = "https://api.loganalytics.io"
la_data_client = LogsQueryClient(credential=credential, endpoint=resource_uri)

## 3. Azure Log Analytics Data Queries

In [None]:
# Get all tables available using Kusto query language.  If you need to know more about KQL, please check out the link provided at the introductory section.
tables_result = None
table_list = None
end_time =  datetime.now(timezone.utc)
start_time = end_time - timedelta(1)

#start_time=datetime(2022, 1, 1, tzinfo=timezone.utc)
#end_time=datetime(2022, 12, 31, tzinfo=timezone.utc)

all_tables_query = "union withsource = SentinelTableName * | distinct SentinelTableName | sort by SentinelTableName asc"
tables_result = la_data_client.query_workspace(
        workspace_id=workspace_id,
        query=all_tables_query,
        timespan=(start_time, end_time))

if tables_result.status == LogsQueryStatus.SUCCESS:
    df_table = pd.DataFrame(data=tables_result.tables[0].rows, columns=tables_result.tables[0].columns)
    table_list =  list(df_table["SentinelTableName"])
    column_name = "*"
    
    for table_name in table_list:
        print(table_name)
        kql_where_clause = get_credscan_kql_where_clause(column_name)
        query = "{0}  {1}".format(table_name, kql_where_clause)

        # Run query
        try:
                result = la_data_client.query_workspace(
                        workspace_id=workspace_id,
                        query=query,
                        timespan=(start_time, end_time))

                # Display Result
                df = pd.DataFrame(data=result.tables[0].rows, columns=result.tables[0].columns)
                if not df.empty:
                        print(df)
                else:
                        print("Empty")
        except HttpResponseError as error:
                print("==============================");
                print(" This table got http error:")
                print(" message:" + error.message)
                print(" reason:" + error.reason)
                print("==============================");
