In [None]:
import numpy as np
import pandas as pd
from scipy.stats import sem, t
from numpy import mean, std
from keras.regularizers import l2
import tensorflow as tf
from tensorflow.keras import layers, losses
import scipy.stats as stats
from collections import Counter
from sklearn.model_selection import ParameterGrid
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Dropout
from keras.optimizers import Adam
from keras.activations import relu, sigmoid
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from user_agents import parse
from tensorflow.keras.losses import mse
from sklearn.preprocessing import OneHotEncoder
import random as random
import re

<b>Feature Engineering - Illicit Consent Grant</b>

In [None]:
'''
Create the features "ConsentCount" (counts the number of consents in a given day), 
"RecentConsentCount" (counts the number of consents in a 14 day period)
"recentConsent" (boolean feature stating whether the consent was recend, i.e., less than 14 days).

'''

def check_recent_consent(act):
    if act['Activity'] == "Consent to application":
        date_add_sp = df_audit[(df_audit['Target1DisplayName'] == act['Target1DisplayName']) & 
                                (df_audit['Activity'] == "Add service principal")]['Date (UTC)']
        if (not date_add_sp.empty):
            return 0 <= (pd.to_datetime(act['Date (UTC)']) - pd.to_datetime(date_add_sp.iloc[0])).days < 14
    return False

df_audit['recentConsent'] = df_audit.apply(check_recent_consent, axis=1)

############## The number of consents to apps recently added ##################

def count_recent_consent(df_audit):
    # Filter and group by 'Target1DisplayName' for recent consents
    consent_counts = df_audit[(df_audit['Activity'] == 'Consent to application') & (df_audit['recentConsent'] == True)]\
        .groupby('Target1DisplayName').size().reset_index(name='RecentConsentCount')

    df_audit = pd.merge(df_audit, consent_counts, on='Target1DisplayName', how='left')

    # Fill null entries with 0.
    df_audit['RecentConsentCount'] = df_audit['RecentConsentCount'].fillna(0).astype(int)

    return df_audit


def count_consent(df_audit):
    # Bin by "Day"
    df_audit['Date'] = pd.to_datetime(df_audit['Date (UTC)']).dt.date

    # Group by app name ('Target1DisplayName'), day ("Date") and activity, and count the number of consents
    consent_counts = df_audit[df_audit['Activity'] == 'Consent to application']\
        .groupby(['Target1DisplayName', 'Date', 'Activity']).size().reset_index(name='ConsentCount')

    df_audit = pd.merge(df_audit, consent_counts, on=['Target1DisplayName', 'Date', 'Activity'], how='left')

    # Fill null entries with 0.
    df_audit['ConsentCount'] = df_audit['ConsentCount'].fillna(0).astype(int)
    df_audit = df_audit.drop(columns=['Date'])

    return df_audit

df_audit = count_recent_consent(df_audit)
df_audit = count_consent(df_audit)

In [None]:
'''
For every "Add delegated permission grant" event type, create a feature that contains the consent scope (consentScope)
Create a variable with the name riskyPermissionsInScope that represents the number of risky permissions that 
are in the scope of the consent. Risky permissions are: Mail.* (including Mail.Send*, but not Mail.ReadBasic*), Contacts. *, 
MailboxSettings.*, People.*, Files.*, Notes.*, Directory.AccessAsUser.All, User_Impersonation. 
(https://learn.microsoft.com/en-us/security/operations/incident-response-playbook-app-consent)
Also check for application permissions, as all of them should be considered high risk.
'''
df_audit_copy = df_audit.copy()


df_audit_copy['delegatedPermissionsScope'] = df_audit_copy.apply(lambda row: re.findall('"([^"]*)"', row['Target1ModifiedProperty1NewValue'])[0] \
                                            if row['Activity'] == 'Add delegated permission grant' \
                                            or row['Activity'] == 'Remove delegated permission grant'\
                                            else 'Unknown', axis=1)

df_audit_copy['appPermissionsScope'] = df_audit_copy.apply(lambda row: re.findall('"([^"]*)"', row['Target1ModifiedProperty2NewValue'])[0] \
                                            if row['Activity'] == 'Add app role assignment to service principal' \
                                            else (re.findall('"([^"]*)"', row['Target1ModifiedProperty2NewValue'])[0] \
                                            if row['Activity'] == 'Remove app role assignment to service principal' else 'Unknown'), axis=1)

#Create a column that holds the name of the app that requires consent. 
# Create list with all event types that have the app name in the same variable
operation_names = ['Add application', 'Add service principal', 'Update application', \
                   'Update service principal', 'Consent to application', 'Add app role assignment grant to user']

df_audit_copy['appName'] = df_audit_copy.apply(lambda row: re.findall('"([^"]*)"', row['Target1ModifiedProperty2NewValue'])[0] \
                                            if row['Activity'] in operation_names\
                                            else 'Unknown', axis=1)

#Create a column that shows the onBehalfOfAll value of the "Consent to application" operation
df_audit_copy['onBehalfOfAll'] = df_audit_copy.apply(lambda row:  re.findall('"([^"]*)"', row['Target1ModifiedProperty3NewValue'])[0] \
                                            if row['Activity'] == 'Consent to application' else 'Unknown', axis=1)


#Create a column that shows the isRiskyApp value of the "Consent to application" operation
df_audit_copy['isRiskyApp'] = False

df_audit_copy['isRiskyApp'] =  df_audit_copy.apply(lambda row: True \
                                           if "Risky application detected" in str(row['Target1ModifiedProperty4NewValue'])\
                                           and (row['Activity'] == 'Consent to application') else False, axis=1)
    
# Also create the feature Weekend 
df_audit_copy['Weekend'] = df_audit_copy['Date (UTC)'].dt.weekday >= 5

In [None]:
features = ['CorrelationId', 'Activity', 'Date (UTC)', 'Result', 'ActorUserPrincipalName','delegatedPermissionsScope',\
           'appPermissionsScope', 'isRiskyApp', 'ConsentCount', 'RecentConsentCount', 'recentConsent',\
           'onBehalfOfAll', 'consentType', 'isAppOnly', 'appName', 'simulatedAttack', 'Label_Role_Management', 'Label_o365']

df_audit_selected_events = df_audit_copy[features].copy()

def propagate_values(group):
    mask_consent = group['Activity'] == 'Consent to application'
    mask_delegated = (group['Activity'] == 'Remove delegated permission grant') | 
    (group['Activity'] == 'Add delegated permission grant') 
    mask_app_role = (group['Activity'] == 'Remove app role assignment to service principal') | 
    (group['Activity'] == 'Add app role assignment to service principal')
    
    # Create a grouping using the masks for defined above
    
    if mask_1.any():
        values_row = grouping.loc[mask_consent].iloc[0]
        grouping['onBehalfOfAll'] = values_row['onBehalfOfAll']
        grouping['appName'] = values_row['appName']
    if mask_2.any():
        values_row = grouping.loc[mask_delegated].iloc[0]
        grouping['consentType'] = values_row['consentType']
    if mask_3.any():
        values_row = grouping.loc[mask_app_role].iloc[0]
        grouping['consentType'] = values_row['consentType']
    return group

# Group by correlation ID and apply the function to each grouping
df_audit_selected_events = df_audit_selected_events.groupby('CorrelationId', group_keys=False).apply(propagate_values)

In [None]:
'''
Create features "numberDelegatedRiskyPermissionsPerApp", "numberDelegatedNonRiskyPermissionsPerApp" that track 
the number of risky/non-risky delegated permissions an app currently has in scope.
Define a list of risky permissions according to Microsoft.
(https://learn.microsoft.com/en-us/security/operations/incident-response-playbook-app-consent)
'''

#This list represents risky permissions from both delegated and AppRole side.
risky_permissions_del_app = ['Mail.', 'Contacts.', 'MailboxSettings.', 'People.', 'Files.', 'Notes.', 'Directory.AccessAsUser.All',\
                     'User_Impersonation', 'Application.ReadWrite.All', 'Directory.ReadWrite.All', 'Domain.ReadWrite.All',\
                    'EduRoster.ReadWrite.All', 'Group.ReadWrite.All', 'Member.Read.Hidden', 'RoleManagement.ReadWrite.Directory',\
                    'User.ReadWrite.All', 'User.ManageIdentities.All']

#However, as app permissions are more dangerous, we shall consider also every permission with write access as being risky.
risky_write_app = 'Write'

# Keeping Track of risky/non-risky delegated permissions for each Application
risky_permissions = {}
non_risky_permissions = {}

# Function used for counting the number of delegated risky permissions
def count_delegated_risky_perms(act):
    corrID = act['CorrelationId']
    
    # Add the correlation ID to the dictionary if not present already
    if corrID not in risky_permissions:
        risky_permissions[corrID] = 0
        
    # Access only the activities related to the delegated permission scope of a consent grant
    if act['Activity'] == 'Add delegated permission grant' or act['Activity'] == 'Remove delegated permission grant':
        delegated_perms_scope = act['delegatedPermissionsScope'].split(" ")
        if risky_permissions[corrID] == 0:
            for perm in delegated_perms_scope:
                for risky_perms in risky_permissions_del_app:
                    if risky_perms in perm and risky_perms != "Mail.ReadBasic":
                        risky_permissions[corrID] += 1
            if ('offline_access' in delegated_perms_scope) & (risky_permissions[corrID] != 0):
                risky_permissions[corrID] += 1
    return risky_permissions[corrID]

# Function used for counting the number of delegated non-risky permissions
def count_delegated_non_risky_perms(act):
    corrID = act['CorrelationId']
    
    # Add the correlation ID to the dictionary if not present already
    if corrID not in non_risky_permissions:
        non_risky_permissions[corrID] = 0
    if act['Activity'] == 'Add delegated permission grant' or act['Activity'] == 'Remove delegated permission grant':
        if '' in act['delegatedPermissionsScope'].split(" "):
            non_risky_permissions[corrID] = len(act['delegatedPermissionsScope'].split(" ")) - 1 - act['numberDelegatedRiskyPermissionsPerApp']
        else:
            non_risky_permissions[corrID] = len(act['delegatedPermissionsScope'].split(" ")) - act['numberDelegatedRiskyPermissionsPerApp']
    return non_risky_permissions[corrID]

# Create new features
df_audit_selected_events['numberDelegatedRiskyPermissionsPerApp'] = df_audit_selected_events.apply(count_delegated_risky_perms, axis=1)
df_audit_selected_events['numberDelegatedNonRiskyPermissionsPerApp'] = df_audit_selected_events.apply(count_delegated_non_risky_perms, axis=1)


In [None]:
'''
Create features "numberAppRoleRiskyPermissionsPerApp", "numberAppRoleNonRiskyPermissionsPerApp" that track 
the number of risky/non-risky app role permissions an app currently has in scope.
Define a list of risky permissions according to Microsoft.
(https://learn.microsoft.com/en-us/security/operations/incident-response-playbook-app-consent)
'''

# Keeping Track of risky/non-risky app role permissions for each Application
app_risky_permissions = {}
app_non_risky_permissions = {}

# Check each row in the dataset for app role permissions
for index, row in df_audit_selected_events.iterrows():
    ok = 0
    # get the app name and risky permissions list for this row
    app_name = row['appName']
    app_risky_permissions = row['appPermissionsScope']
    
    # check if app_name is already in the dictionary
    if app_name not in num_app_risky_perms_dict:
        num_app_risky_perms_dict[app_name] = 0
    if app_name not in num_app_non_risky_perms_dict:
        num_app_non_risky_perms_dict[app_name] = 0

    # update count based on operationName
    for perm in risky_permissions_del_app:
        if perm in app_risky_permissions or risky_write_app in app_risky_permissions:
            if row['Activity'] == 'Add app role assignment to service principal':
                num_app_risky_perms_dict[app_name] += 1
            elif row['Activity'] == 'Remove app role assignment to service principal':
                num_app_risky_perms_dict[app_name] -= 1
            ok = 1
            break
        else:
            if row['Activity'] == 'Add app role assignment to service principal':
                num_app_non_risky_perms_dict[app_name] += 1
            elif row['Activity'] == 'Remove app role assignment to service principal':
                num_app_non_risky_perms_dict[app_name] -= 1
            ok = 2
            break
    
    # assign the count to the row in the dataframe
    df_audit_selected_events.loc[index, 'numberAppRoleRiskyPermissionsPerApp'] = num_app_risky_perms_dict[app_name]
    df_audit_selected_events.loc[index, 'numberAppRoleNonRiskyPermissionsPerApp'] = num_app_non_risky_perms_dict[app_name]

# The features required for the anomaly detection task.
# Note that "onBehalfOfAll" is the same as "ConsentType"
features = ['ActorUserPrincipalName','isRiskyApp','numberAppRoleRiskyPermissionsPerApp','numberAppRoleNonRiskyPermissionsPerApp', 
              'numberDelegatedRiskyPermissionsPerApp', 'numberDelegatedNonRiskyPermissionsPerApp', 'Weekend', 'ConsentCount', 
            'RecentConsentCount', 'recentConsent', 'onBehalfOfAll', 'simulatedAttack', 'Label_Role_Management', 'Label_o365']
    
df_audit_flows = df_audit_selected_events[df_audit_selected_events['Activity'] == 'Consent to application']
df_audit_flows = df_audit_flows.reset_index(drop=True)
df_audit_flows = df_audit_flows[features]

# Ground truth labeling
df_audit_flow.loc[df_audit_flow['simulatedAttack'] != 'Normal', 'Attack'] = 1
df_audit_flow.loc[df_audit_flow['simulatedAttack'] == 'Normal', 'Attack'] = 0


<b>Synthetic Data Generation - Illicit Consent Grant</b>

In [None]:
def get_probabilities(data):
    # Extracting the probability distribution for each feature
    probabilities = {}
    for feature in data.columns:
        values = data[feature].value_counts(normalize=True).index.tolist()
        prob = data[feature].value_counts(normalize=True).values.tolist()
        probabilities[feature] = (values, prob)
    
    # Estimating joint probabilities
    joint_probs = real_data.groupby(['RecentConsentCount', \
                                     'recentConsent']).size() / len(real_data)
    probs['joint_consent_count'] = joint_probs
    return probabilities

def generate_synthetic_data(n_samples):
    data = {
        'ActorUserPrincipalName': np.random.choice(probabilities['ActorUserPrincipalName'][0], n_samples, p=probabilities['ActorUserPrincipalName'][1]),
        'isRiskyApp': [False] * n_samples,
        'numberAppRoleRiskyPermissionsPerApp': np.random.choice([0, 1], n_samples),
        'numberAppRoleNonRiskyPermissionsPerApp': np.random.choice([0, 1, 2, 3], n_samples),
        'numberDelegatedRiskyPermissionsPerApp': np.random.choice([0, 1, 2, 3, 4, 5], n_samples),
        'numberDelegatedNonRiskyPermissionsPerApp': np.random.choice([0, 1, 2, 3, 4, 5, 6], n_samples),
        'Weekend': [False] * n_samples,
        'ConsentCount': [1] * n_samples,
#         'RecentConsentCount': np.random.choice(probabilities['RecentConsentCount'][0], n_samples, p=probabilities['RecentConsentCount'][1]),
#         'recentConsent': np.random.choice(probabilities['recentConsent'][0], n_samples, p=probabilities['recentConsent'][1]),
        'onBehalfOfAll': np.random.choice(probabilities['onBehalfOfAll'][0], n_samples, p=probabilities['onBehalfOfAll'][1]),
    }
    
    # Handling joint probabilities
    recentconsent_choices, recentconsent_probs = list(probs['joint_consent_count'].index), probs['joint_consent_count'].values
    recentconsent_samples = np.random.choice(range(len(recentconsent_choices)), p=recentconsent_probs, size=n_samples)
    data['RecentConsentCount'], data['recentConsent'] = zip(*[recentconsent_choices[i] for i in recentconsent_samples])
    
    df = pd.DataFrame(data)
    
    return df

# Split into legitimate and attack df
df_audit_legitimate = df_audit_flow[df_audit_flow['simulatedAttack'] != 'Normal'].copy()
df_audit_attack = df_audit_flow[df_audit_flow['simulatedAttack'] == 'Normal'].copy()

probabilities = estimate_probabilities(df_audit_legitimate)
synthetic_df = generate_synthetic_samples(1000)

# Create split proportions
train_size = int(n * 0.7)
threshold_size = int(n * 0.1)

# Split the dataset
train_data = synthetic_data_training[:train_size]
threshold_data = synthetic_data_training[train_size:train_size+threshold_size]
validation_data = synthetic_data_training[train_size+threshold_size:train_size+2*threshold_size]
test_data = synthetic_data_training[train_size+2*threshold_size:]

# Inject the attack data in the validation and test splits
validation_data_copy = validation_data.copy()

# Select a random position to add the attacks to
n = random.randint(1, 40)
validation_data_add_before = validation_data_copy.iloc[:n]
validation_data_add_after = validation_data_copy.iloc[n:]

validation_data_copy = pd.concat([validation_data_add_before, \
                       df_audit_attack[df_audit_attack['simulatedAttack'] == 'AttackAdminVictim'][engineered_features_3], \
                       validation_data_add_after], ignore_index=True)

n = random.randint(40, 70)
validation_data_add_before = validation_data_copy.iloc[:n]
validation_data_add_after = validation_data_copy.iloc[n:]
validation_data_copy = pd.concat([validation_data_add_before, \
               df_audit_attack[df_audit_attack['simulatedAttack'] == 'AttackUserVictims1'][engineered_features_3], \
               validation_data_add_after], ignore_index=True)

n = random.randint(70, 100)
validation_data_add_before = validation_data_copy.iloc[:n]
validation_data_add_after = validation_data_copy.iloc[n:]
validation_data_copy = pd.concat([val_data_add_before_third_attack, \
                df_audit_attack[df_audit_attack['simulatedAttack'] == 'AttackUserVictims2'][engineered_features_3], \
                val_data_add_after_third_attack], ignore_index=True)

validation_data_copy[validation_data_copy['Attack'] == 1]

n = random.randint(1, 40)
test_data_add_before = df_test.iloc[:n]
test_data_add_after = df_test.iloc[n:]

test_data_copy = pd.concat([test_data_add_before, \
                           df_audit_attack[df_audit_attack['simulatedAttack'] == 'AttackAdminVictim2'], \
                           test_data_add_after], ignore_index=True)
n = random.randint(40, 70)
test_data_add_before = test_data_copy.iloc[:n]
test_data_add_after = test_data_copy.iloc[n:]
test_data_copy = pd.concat([test_data_add_before, \
                           df_audit_attack[df_audit_attack['simulatedAttack'] == 'AttackUserVictims3'], \
                           test_data_add_after], ignore_index=True)
n = random.randint(70, 100)
test_data_add_before = test_data_copy.iloc[:n]
test_data_add_after = test_data_copy.iloc[n:]
test_data_copy = pd.concat([test_data_add_before, \
                           df_audit_attack[df_audit_attack['simulatedAttack'] == 'AttackUserVictims4'], \
                           test_data_add_after], ignore_index=True)


<b>Add noise to the synthetic data for each split</b>

In [None]:
def add_noise_to_dataframe(df):
    
    upn_noise = ['account1@domain.com','account2@domain.com','account3@domain.com', 'account4@domain.com', 'account5@domain.com']

    permission_columns = [
        "numberAppRoleRiskyPermissionsPerApp",
        "numberAppRoleNonRiskyPermissionsPerApp",
        "numberDelegatedRiskyPermissionsPerApp",
        "numberDelegatedNonRiskyPermissionsPerApp"
    ]

    for col in permission_columns:
        noise = np.random.normal(loc=0, scale=0.5, size=len(df))
        discrete_noise = np.round(noise)
        df[col] = df[col] + discrete_noise
        # Ensure the noisy data has valid values
        df_training_noisy[col] = np.clip(df_training_noisy[col], 0, df_training_noisy[col].max())
    
    # Select a few rows at random to add random noise to
    noise_probability = 0.02
    selected_rows = np.random.rand(len(df)) < noise_probability
    
    for idx in df[selected_rows].index:
        df.at[idx, 'ConsentCount'] += 1
        
        noisy_row = df.loc[idx].copy()
        new_row = noisy_row.copy()
        
        possible_names = df.loc[df['ActorUserPrincipalName'] != noisy_row['ActorUserPrincipalName'], 'ActorUserPrincipalName']
        new_name = np.random.choice(possible_names)
        new_row['ActorUserPrincipalName'] = new_name
        
        top = df.iloc[:idx+1, :]
        bottom = df.iloc[idx+1:, :]
        df = pd.concat([top, pd.DataFrame(new_row).T, bottom], ignore_index=True)
        
        to_remove = df[df['ActorUserPrincipalName'] == new_name].sample(1).index
        df = df.drop(to_remove)

    df.loc[(df['ConsentCount'] == 2) & (df['RecentConsentCount'] == 1), 'RecentConsentCount'] = 2

    noise = np.random.normal(loc=0, scale=1, size=len(df))
    replace_mask = np.abs(noise) > 2
    df.loc[replace_mask, "ActorUserPrincipalName"] = np.random.choice(upn_noise, size=replace_mask.sum())

    return df

df_training =  train_data
df_test =  test_data_copy
df_validation = validation_data_copy
df_threshold = threshold_data

df_training = add_noise_to_dataframe(df_training)
df_threshold = add_noise_to_dataframe(df_threshold)

subset0 = df_validation[df_validation_noisy['Attack'] == 0].copy()
subset1 = df_validation[df_validation_noisy['Attack'] == 1].copy()
subset2 = df_test[df_test_noisy['Attack'] == 0].copy()
subset3 = df_test[df_test_noisy['Attack'] == 1].copy()

subset_0 = add_noise_to_dataframe(subset_0)
subset_2 = add_noise_to_dataframe(subset_2)

df_validation = pd.concat([subset_0, subset_1]).sort_index()
df_test = pd.concat([subset_2, subset_3]).sort_index()

<b>Data Transformation</b>

In [None]:
categorical_list = ['ActorUserPrincipalName','isRiskyApp', 'Weekend', 'recentConsent','onBehalfOfAll']
numerical_list = ['RecentConsentCount', 'ConsentCount', 'numberDelegatedNonRiskyPermissionsPerApp', \
                 'numberDelegatedRiskyPermissionsPerApp', 'numberAppRoleNonRiskyPermissionsPerApp', 'numberAppRoleRiskyPermissionsPerApp']

scaler = MinMaxScaler(feature_range=(0, 1))  # create a MinMaxScaler object
scaler.fit(df_training[numerical_list])  # fit the scaler on the training dataset

# normalize each column not in drop_list based on the min-max from the training dataset
df_training[numerical_list] = scaler.transform(df_training[numerical_list])


df_threshold[numerical_list] = scaler.transform(df_threshold[numerical_list])
df_validation[numerical_list] = scaler.transform(df_validation[numerical_list])
df_test[numerical_list] = scaler.transform(df_test[numerical_list])

In [None]:
# Label encode the binary categorical variables (as 0 or 1)
label_encoders = {}

for col in categorical_list:
    if col != "ActorUserPrincipalName":
        
        # Create a mapping for known labels
        label_mapping = {label: idx for idx, label in enumerate(df_train[col].unique())}
        
        # Ensure we handle both True and False in the mapping
        if True not in label_mapping:
            label_mapping[True] = 1
        if False not in label_mapping:
            label_mapping[False] = 0
        
        # Apply the mapping to the datasets
        df_training[col] = df_train[col].map(label_mapping)
        df_threshold[col] = df_threshold[col].map(label_mapping)
        df_validation[col] = df_validation[col].map(label_mapping)
        df_test[col] = df_test[col].map(label_mapping)
        
        label_encoders[col] = label_mapping


In [None]:
# Add "Unseen" category
df_train["ActorUserPrincipalName"].astype('str').astype('category').cat.add_categories(["Unseen"], inplace=True)

# Convert values that are not in training to "Unseen"
df_validation.loc[~df_validation["ActorUserPrincipalName"].isin(df_train["ActorUserPrincipalName"]), "ActorUserPrincipalName"] = "Unseen"
df_test.loc[~df_test["ActorUserPrincipalName"].isin(df_train["ActorUserPrincipalName"]), "ActorUserPrincipalName"] = "Unseen"

# one-hot encode the "ActorUserPrincipalName" feature in each split
df_train = pd.get_dummies(df_train, columns=["ActorUserPrincipalName"])
df_threshold = pd.get_dummies(df_threshold, columns=["ActorUserPrincipalName"])
df_validation = pd.get_dummies(df_validation, columns=["ActorUserPrincipalName"])
df_test = pd.get_dummies(df_test, columns=["ActorUserPrincipalName"])

<b><font size="3">Illicit Consent Grant: Modeling Phase</font></b>

In [None]:
users_only = [i for i in df_train if 'ActorUserPrincipalName_' in i]
permissions_only = ['numberDelegatedNonRiskyPermissionsPerApp', 'numberDelegatedNonRiskyPermissionsPerApp',\
                    'numberAppRoleNonRiskyPermissionsPerApp', 'numberAppRoleRiskyPermissionsPerApp']
recent_consent = ['RecentConsentCount', 'ConsentCount', 'recentConsent']

recent_consent_and_permissions = recent_consent + permissions_only
recent_consent_and_permissions_and_users = recent_consent_and_permissions + users_only
permissions_and_users = permissions_only + users_only

features_dict = {
    "users_only": users_only,
    "all_features": all_features,
    "recent_consent_and_permissions_and_users": recent_consent_and_permissions_and_users,
    "recent_consent_and_permissions": recent_consent_and_permissions,
    "permissions_only": permissions_only,
    "recent_consent": recent_consent,
    "permissions_and_users": permissions_and_users
    
}

<b>Modeling Autoencoder</b>

In [None]:
'''
Autoencoder Model initialization.
'''

np.random.seed(0)
tf.random.set_seed(0)
random.seed(0)

# Function creating the Autoencoder network
def build_autoencoder(drop_r, l2_reg, l_rate, input_dim):
    
    # Other hyperparameters about the network structure of the AE (chosen empirically)
    bottleneck_size = 2        # Controls the size of the encoding
    num_hidden_layers = 2      # Controls the number of hidden layers
    ratio = 0.5                # Controls the number of neurons in each layer
    
    # Input layer initialization
    layer_dims = [input_dim]
    input_layer = layers.Input(shape=(input_dim,))
    
    # Dense layer with sigmoid activation function
    x = layers.Dense(input_dim, activation="sigmoid")(input_layer)

    for i in range(num_hidden_layers//2):
        new_dim = int(layer_dims[-1] * 0.5)
        layer_dims.append(new_dim)
    
    # Encoding layers
    encoder = tf.keras.Sequential([
      layers.Dropout(drop_r),
      layers.Dense(layer_dims[-1], activation='relu', kernel_regularizer=l2_reg),
      layers.Dropout(drop_r),
      layers.Dense(bottleneck_size, activation=activation_func, kernel_regularizer=l2_reg)])(x)
    
    # Decoding layers
    decoder = tf.keras.Sequential([
          layers.Dense(layer_dims[-1], activation='relu', kernel_regularizer=l2_reg),
          layers.Dense(input_dim, activation="sigmoid")])(encoder)
    
    # Create the autoencoder
    autoencoder = tf.keras.Model(inputs=input_layer, outputs=decoder)
    opt = Adam(learning_rate=l_rate)
    autoencoder.compile(optimizer=opt, loss='mse')
    
    return autoencoder

In [None]:
# For computing the CI
confidence_level = 0.95 
degrees_freedom = 49

# Hyperparameter grid space
dropouts = [0.3, 0.5]
l2_regs = [1e-6, 5e-6, 1e-5, 5e-5]
learning_rates = [1e-5, 5e-5, 1e-4, 5e-4]

all_configs = list(itertools.product(dropouts, l2_regs, learning_rates, bottleneck_sizes))

# Set the number of iterations for random search
n_iterations = 100

# Other hyperparameters chosen empirically
epochs_num = 20
selected_batch_size = 64


for feature_name, feature in features_dict.items():
    print(feature_name)
    X_train = df_train[feature].copy()
    X_validation = df_validation[feature].copy()
    y_validation = df_validation['Attack']
    X_threshold = df_threshold[feature].copy()
    
    all_results = []
    input_dim = X_train.shape[1]
    for thrsh in range(4)
        for config in all_configs:
            f1_scores, precisions, recalls, fprs = [], [], [], []

            for iteration in range(n_iter):


                dr = config[0]
                l2_r = l2(config[1])
                l_r = config[2]

                # Build and train the model
                model = build_autoencoder(dr, l2_r, l_r, input_dimension)
                model.fit(X_train, X_train, shuffle=False, epochs=epochs_num, batch_size=selected_batch_size, verbose=0)

                # Compute the reconstruction error of the threshold set
                X_threshold_predicted = model.predict(X_threshold)
                re_threshold = np.mean(np.power(X_threshold - X_threshold_predicted, 2), axis=1)

                if thrsh == 0:
                    #MAD
                    median_reconstruction_errors = np.median(re_threshold)
                    mad = 1.4826 * np.median(np.abs(re_threshold - median_reconstruction_errors))
                    threshold = median_reconstruction_errors + 3 * mad
                    threshold_name = 'MAD'
                elif thrsh == 1:
                    #IQR
                    Q1 = np.percentile(re_threshold, 25)
                    Q3 = np.percentile(re_threshold, 75)
                    IQR = Q3 - Q1
                    threshold = Q3 + 1.5*IQR
                    threshold_name = 'IQR'
                elif thrsh == 2:
                    # 2 sigma
                    threshold = np.mean(re_threshold) + 2 * np.std(re_threshold)
                    threshold_name = '2-sigma'
                else:
                    # 3 sigma
                    threshold = np.mean(re_threshold) + 3 * np.std(re_threshold)
                    threshold_name = '3-sigma'

                # Validation
                X_validation_predicted = model.predict(X_validation)
                re_validation = np.mean(np.power(X_validation - X_validation_predicted, 2), axis=1)

                # Assign an anomaly label to each sample in the validation set based on the computed threshold value
                y_predicted_validation = [1 if re > threshold else 0 for re in re_validation]

                # Compute F1, precision, recall and fpr
                tn, fp, fn, tp = confusion_matrix(y_validation, y_predicted_validation).ravel()
                f1 = f1_score(y_validation, y_predicted_validation)
                precision = precision_score(y_validation, y_predicted_validation, zero_division=1)
                recall = recall_score(y_validation, y_predicted_validation)
                fpr = fp / (tn + fp)

                # Add the computed metric to their corresponding list
                f1_scores.append(f1)
                precisions.append(precision)
                recalls.append(recall)
                fprs.append(fpr)

                print(f"Iteration {iteration + 1}. F1 score: {f1}")

            # After n_iterations, compute the average and the CI

            # Average for each metric
            avg_f1 = mean(f1_scores)
            avg_precision = mean(precisions)
            avg_recall = mean(recalls)
            avg_fpr = mean(fprs)

            # Calculate the standard error of the mean
            error_f1 = sem(f1_scores)
            error_precision = sem(precisions)
            error_recall = sem(recalls)
            error_fpr = sem(fprs)

            # Calculate the confidence intervals
            t_ci_f1 = t.interval(confidence_level, degrees_freedom, avg_f1, error_f1)
            t_ci_precision = t.interval(confidence_level, degrees_freedom, avg_precision, error_precision)
            t_ci_recall = t.interval(confidence_level, degrees_freedom, avg_recall, error_recall)
            t_ci_fpr = t.interval(confidence_level, degrees_freedom, avg_fpr, error_fpr)

            print(f"Average F1 Score: {avg_f1}, Confidence Interval t-Dist: {t_ci_f1}.")
            print(f"Average Precision: {avg_precision}, Confidence Interval t-Dist: {t_ci_precision}.")
            print(f"Average Recall: {avg_recall}, Confidence Interval t-Dist: {t_ci_recall}.")
            print(f"Average FPR: {avg_fpr}, Confidence Interval t-Dist: {t_ci_fpr}.")

            # Create dictionary with the results for each config
            config_result = {
                    'dropout': dr,
                    'l2_regularization': config[1],
                    'learning_rate': l_r,
                    'threshold': threshold_name,
                    'avg_f1': avg_f1,
                    'avg_precision': avg_precision,
                    'avg_recall': avg_recall,
                    'avg_fpr': avg_fpr,
                    't_ci_f1': t_ci_f1,
                    't_ci_precision': t_ci_precision,
                    't_ci_recall': t_ci_recall,
                    't_ci_fpr': t_ci_fpr
                }

            all_results.append(config_result)
        
        all_results = sorted(all_results, key=lambda x: x['avg_f1'], reverse=True)

<b>Modeling Isolation Forest</b>

In [None]:
# Hyperparameter grid space
parameter_grid = {
    'n_estimators': [10, 20, 50, 100],
    'max_samples': [16, 32, 64, 128, 256],
    'max_features': [0.1, 0.2, 0.4, 0.6, 0.8, 1.0],
    'contamination': [0.055, 0.06, 0.065]
}
rng = np.random.RandomState(0)

# Set the number of iterations for random search
n_iterations = 100
  
# Iterate through all of the feature sets
for set_name, feature_set in features_dictionary.items(): 
    all_configs = list(ParameterGrid(parameter_grid))
    all_results = []

    X_train = df_training_copy[feature_set].copy()
    X_validation = df_encoded_validation_sim_copy[feature_set].copy()
    y_validation = df_encoded_validation_sim_copy['Attack']

    # List containing the results of the experiments for one feature set in this list
    all_results = []
    
    for config in all_configs:
        f1_scores, precisions, recalls, fpr_scores, auc_scores = [], [], [], [], []
    
        for iteration in range(n_iterations):
            model = IsolationForest(**config, bootstrap=True, random_state=rng, n_jobs=-2)
            model.fit(X_train)

            # Isolation Forest anomaly labels
            y_predicted_validation = model.predict(X_validation)
            
            # Transform the predictions from -1 to 1 and from 1 to 0
            y_predicted_validation = (y_predicted_validation == -1).astype(int)
            
            # Compute F1, precision, recall and fpr
            f1 = f1_score(y_validation, y_predicted_validation)
            precision = precision_score(y_validation, y_predicted_validation, zero_division=1)
            recall = recall_score(y_validation, y_predicted_validation)
            tn, fp, fn, tp = confusion_matrix(y_validation, y_predicted_validation).ravel()
            fpr = fp / (tn + fp)
            
            f1_scores.append(f1)
            precisions.append(precision)
            recalls.append(recall)
            fpr_scores.append(fpr)

            print(f"Iteration {iteration + 1}. F1 score: {f1}")

        # After n_iterations, compute the average and the CI
        
        # Average for each metric
        avg_f1 = mean(f1_scores)
        avg_precision = mean(precisions)
        avg_recall = mean(recalls)
        avg_fpr = mean(fprs)

        # Calculate the standard error of the mean
        error_f1 = sem(f1_scores)
        error_precision = sem(precisions)
        error_recall = sem(recalls)
        error_fpr = sem(fprs)

        # Calculate the confidence intervals
        t_ci_f1 = t.interval(confidence_level, degrees_freedom, avg_f1, error_f1)
        t_ci_precision = t.interval(confidence_level, degrees_freedom, avg_precision, error_precision)
        t_ci_recall = t.interval(confidence_level, degrees_freedom, avg_recall, error_recall)
        t_ci_fpr = t.interval(confidence_level, degrees_freedom, avg_fpr, error_fpr)

        print(f"Average F1 Score: {avg_f1}, Confidence Interval t-Dist: {t_ci_f1}.")
        print(f"Average Precision: {avg_precision}, Confidence Interval t-Dist: {t_ci_precision}.")
        print(f"Average Recall: {avg_recall}, Confidence Interval t-Dist: {t_ci_recall}.")
        print(f"Average FPR: {avg_fpr}, Confidence Interval t-Dist: {t_ci_fpr}.")
        
        # Create dictionary with the results for each config
        config_result = {
            **config,
            'avg_f1': avg_f1,
            'avg_precision': avg_precision,
            'avg_recall': avg_recall,
            'avg_fpr': avg_fpr,
            't_ci_f1': t_ci_f1,
            't_ci_precision': t_ci_precision,
            't_ci_recall': t_ci_recall,
            't_ci_fpr': t_ci_fpr
        }
    
        all_results.append(config_result)
        
    all_results = sorted(all_results, key=lambda x: x['avg_f1'], reverse=True)

<b><font size="3">Password Spray: Evaluation Phase</font></b>

<b>Evaluation Autoencoder</b>

In [None]:
# Best Hyperparameter configuration
dr = 0.5
l2_r = l2(5e-6)
l_r = 1e-4

# Best feature set
feature_set = recent_consent_and_permissions_and_users

X_train = df_training_copy[feature_set].copy()
X_threshold = df_threshold_copy[feature_set].copy()
X_test = df_test_copy[feature_set].copy()
y_test = df_test_copy['Attack']

input_dimension = X_train.shape[1]

# Build and train the model
model = build_autoencoder(dr, l2_r, l_r, btln, input_dimension)
history = model.fit(X_train, X_train, shuffle=False, epochs=epochs_num, batch_size=selected_batch_size, verbose=0)

# Compute the reconstruction error of the threshold set
X_threshold_predicted = model.predict(X_threshold)
re_threshold = np.mean(np.power(X_threshold - X_threshold_predicted, 2), axis=1)

# Use the 3 sigma rule to compute the threshold
# threshold = np.mean(re_threshold) + 3 * np.std(re_threshold)

# Use the 2 sigma rule to compute the threshold
#threshold = np.mean(re_threshold) + 2 * np.std(re_threshold)

# Mad threshold
# median_reconstruction_errors = np.median(re_threshold)
# mad = 1.4826 * np.median(np.abs(re_threshold - median_reconstruction_errors))
# threshold = median_reconstruction_errors + 3 * mad

# IQR
Q1 = np.percentile(re_threshold, 25)
Q3 = np.percentile(re_threshold, 75)
IQR = Q3 - Q1
threshold = Q3 + 1.5 * IQR

# Assign anomaly label for each sample in X_test
X_test_predicted = model.predict(X_test)
re_test = np.mean(np.power(X_test - X_test_predicted, 2), axis=1)

y_test_predicted = [1 if e > threshold else 0 for e in re_test]

# Compute F1, precision, recall and fpr on the test dataset
f1_test = f1_score(y_test, y_test_pred)
precision_test = precision_score(y_test, y_test_pred, zero_division=1)
recall_test = recall_score(y_test, y_test_pred)
tn_test, fp_test, fn_test, tp_test = confusion_matrix(y_test, y_test_pred).ravel()
fpr_test = fp_test / (tn_test + fp_test)

print(f"TP: {tp_test}")
print(f"FP: {fp_test}")
print(f"TN: {tn_test}")
print(f"FN: {fn_test}")
print(f"Precision: {precision_test:.4f}")
print(f"Recall: {recall_test:.4f}")
print(f"F1-score: {f1_test:.4f}")
print(f"FPR: {fpr_test:.4f}")

<b>Evaluation Isolation Forest</b>

In [None]:
# Best feature set
feature_set = recent_consent

# Best Hyperparameter configuration
param_grid = {
    'n_estimators': [100],
    'max_samples': [128],
    'max_features': [0.6],
    'contamination': [0.06]
}
rng = np.random.RandomState(0)

# One configuration only
all_configs = list(ParameterGrid(param_grid))

X_train = df_training_copy[feature_set].copy()
X_test = df_encoded_test_sim_copy[feature_set].copy()
y_test = df_encoded_test_sim_copy['ActualAttack']

model = IsolationForest(**all_configs[0], bootstrap=True, random_state=rng, n_jobs=-2)
model.fit(X_train)

# Isolation Forest anomaly labels
y_predicted_test = model.predict(X_test)

# Transform the predictions from -1 to 1 and from 1 to 0
y_predicted_test = (y_predicted_test == -1).astype(int)

f1_test = f1_score(y_test_2, y_predicted_test)
precision_test = precision_score(y_test_2, y_predicted_test, zero_division=1)
recall_test = recall_score(y_test_2, y_predicted_test)
tn_test, fp_test, fn_test, tp_test = confusion_matrix(y_test_2, y_predicted_test).ravel()
fpr_test = fp_test / (tn_test + fp_test)

print(f"TP: {tp_test}")
print(f"FP: {fp_test}")
print(f"TN: {tn_test}")
print(f"FN: {fn_test}")
print(f"Precision: {precision_test:.4f}")
print(f"Recall: {recall_test:.4f}")
print(f"F1-score: {f1_test:.4f}")
print(f"FPR: {fpr_test:.4f}")