In [1]:
import numpy as np
import torch
from deepcase_copy.context_builder import ContextBuilder
from deepcase_copy.preprocessing import Preprocessor

builder = ContextBuilder.load('save/builder.save')
preprocessor = Preprocessor(
    length=10,  # 10 events in context
    timeout=86400,  # Ignore events older than 1 day (60*60*24 = 86400 seconds)
)
context, events, labels, mapping = preprocessor.csv('alerts.csv', verbose=True)

# In case no labels are provided, set labels to -1
# IMPORTANT: If no labels are provided, make sure to manually set the labels
# before calling the interpreter.score_clusters method. Otherwise, this will
# raise an exception, because scores == NO_SCORE cannot be computed.
if labels is None:
    labels = np.full(events.shape[0], -1, dtype=int)

# Cast to cuda if available
if torch.cuda.is_available():
    builder = builder.to('cuda')
    events = events.to('cuda')
    context = context.to('cuda')


# Split into train and test sets (20:80) by time - assuming events are ordered chronologically
events_train  = events [:events.shape[0]//5 ]
events_test   = events [ events.shape[0]//5:]

context_train = context[:events.shape[0]//5 ]
context_test  = context[ events.shape[0]//5:]

labels_train  = labels [:events.shape[0]//5 ]
labels_test   = labels [ events.shape[0]//5:]

Loading: 100%|██████████| 3353/3353 [00:01<00:00, 3235.44it/s]


In [2]:
import torch

def get_unique_indices_per_row(tensor):
    indices_list = []
    row_list = []
    indices_list_set = set()
    
    # Iterate over each row
    for row in range(len(tensor)):
        curr = tuple(tensor[row].tolist())
        
        # Check if the current row already exists in row_list using torch.equal
        if curr in indices_list_set:
            continue
        
        # Append unique row and its index
        row_list.append(curr)
        indices_list.append(row)
        indices_list_set.add(curr)
    
    # Return the indices and unique rows
    return indices_list, row_list
indices, rows = get_unique_indices_per_row(context_test)

In [3]:
context_filtered = torch.tensor(rows)
events_filtered = events_test[indices].clone().detach()
labels_filtered = labels_test[indices].clone().detach()

if torch.cuda.is_available():
    context_filtered = context_filtered.to('cuda')
    events_filtered = events_filtered.to('cuda')
    labels_filtered = labels_filtered.to('cuda')

In [110]:
l = 1000
chosen_index = 0
context_picked = context_filtered[chosen_index:chosen_index+l].detach()
events_picked = events_filtered[chosen_index:chosen_index+l].detach()
labels_picked = labels_filtered[chosen_index:chosen_index+l]

In [111]:
from deepcase_copy.context_builder.loss import LabelSmoothing

max_iterations_global = 100
alpha_chosen = 0.01
epsilon_chosen = 0.5

def max_to_one(tensor):
    max_indices = torch.argmax(tensor, dim=-1, keepdim=True)
    result = torch.zeros_like(tensor)
    result.scatter_(-1, max_indices, 1.0)
    return result

def get_results(results):
    results_picked = torch.topk(results[0][0][0], 3)
    exp = results_picked.values.exp()
    res_indices = results_picked.indices
    s = []
    for j in range(3):
        s.append(f"{format_list([res_indices[j].item()])} {'{:.3f}'.format(exp[j])}")
    return res_indices, ", ".join(s)

def compute_change(trace, original, epsilon=0.1):
    a = torch.clamp(original - epsilon, min=0)
    b = (trace >= a).float() * trace + (trace < a).float() * a
    c = (b > original + epsilon).float() * (original + epsilon) + (b <= original + epsilon).float() * b
    return max_to_one(c)

def bim_attack(context_given, target_given, alpha=0.1, epsilon=0.1, num_iterations=max_iterations_global):
    change = None
    original_context = builder.embedding_one_hot(context_given)
    context_processed = builder.embedding_one_hot(context_given)
    criterion = LabelSmoothing(builder.decoder_event.out.out_features, 0.1)    
    changes = []
    for i in range(num_iterations):
        context_processed.requires_grad_(True)
        output = builder.predict(context_processed)
        indices_of_results, prediction_str = get_results(output)
        changes.append({
            "changed_to": torch.argmax(context_processed, axis=-1).tolist()[0],
            "prediction_str": prediction_str,
        })
        if target_given[0] != indices_of_results[0]:
            break
        loss = criterion(output[0][0], target_given)
        context_processed.retain_grad()
        loss.backward(retain_graph=True)
        grad = context_processed.grad.sign()
        if change is None:
            change = alpha * grad
        else:
            change += alpha * grad
        context_processed = context_processed + change
        context_processed = compute_change(context_processed, original_context, epsilon)
    return changes

def count_changes(changes_needed):
    orig = changes_needed[0]["changed_to"]
    final = changes_needed[-1]["changed_to"]
    changed_entries = 0
    for orig, final in zip(orig, final):
        if orig != final:
            changed_entries += 1
    return changed_entries

def show_changes(changes_needed):
    orig = changes_needed[0]["changed_to"]
    final = changes_needed[-1]["changed_to"]
    changes = []
    same = []
    for orig, final in zip(orig, final):
        if orig == final:
            changes.append("-")
            same.append(final)
        else:
            changes.append(final)
            same.append("XX")
    return format_list(changes), format_list(same)

def format_list(li):
    return f"[{", ".join([f'{num:2}' for num in li])}]"
            
def print_state(changes_needed, current_trace_num, con, e, print_path=True):
    mode_int = 0
    changed_num = len(changes_needed)
    perturbations_num = count_changes(changes_needed)
    result_string = ""
    if changed_num == 1:
        pass
    elif changed_num == max_iterations_global:
        mode_int = 3
    else:
        mode_int = 1
        if perturbations_num <= 3:
            mode_int = 2
            result_string += f"{current_trace_num}: {format_list(con[0].tolist())} == {e.tolist()} Changed {{{changed_num}}}, Perturbations {{{perturbations_num}}}\n"
            if print_path:
                for change in changes_needed:
                    result_string += f"{" "*(len(str(current_trace_num)) + 2)}{format_list(change["changed_to"])} -> {change['prediction_str']}\n"
            else:
                change_last = changes_needed[-1]
                result_string += f"{" "*(len(str(current_trace_num)) + 2)}{format_list(change_last["changed_to"])} -> {change_last['prediction_str']}\n"
            changed_entries, same_entries = show_changes(changes_needed)
            result_string += f"{" "*(len(str(current_trace_num)) - 1)}== {same_entries}\n"
            result_string += f"{" "*(len(str(current_trace_num)) - 1)}-> {changed_entries}\n"
            result_string += "\n" 
    return mode_int, result_string

def process_traces():
    perturbed_collected_main = []
    print_path = False
    states = [0, 0, 0, 0]
    safe_to_file = ""
    for current_trace_num in range(len(context_picked)):
        con, e = context_picked[current_trace_num], events_picked.unsqueeze(1)[current_trace_num]
        con.resize_(1, con.size()[-1])
        changes_needed = bim_attack(context_given=con, target_given=e, alpha=0.01, epsilon=0.5, num_iterations=max_iterations_global)
        mode_int, result_string = print_state(changes_needed, current_trace_num, con, e, print_path=print_path)
        print(result_string, end="")
        safe_to_file += result_string
        if mode_int == 2:
            perturbed_collected_main.append((current_trace_num, changes_needed[0]['changed_to'], changes_needed[-1]['changed_to']))
        states[mode_int] += 1
    print(f"incorrect={states[0]} changed={states[1]} perturbed={states[2]} timeout={states[3]}")
    safe_to_file += f"incorrect={states[0]} changed={states[1]} perturbed={states[2]} timeout={states[3]}"
    with open(f"results/length={l}, alpha={alpha_chosen}, epsilon={epsilon_chosen}, num_iterations={max_iterations_global}, print_path={print_path}.txt", "w") as f:
        f.write(safe_to_file)
    return perturbed_collected_main

perturbed_collected = process_traces()

3: [86, 87, 86, 87, 66, 66, 42, 42, 42, 72] == [72] Changed {52}, Perturbations {2}
   [86, 87, 86, 87, 66, 66,  1, 42, 42,  1] -> [ 1] 0.236, [15] 0.118, [72] 0.027
== [86, 87, 86, 87, 66, 66, XX, 42, 42, XX]
-> [- , - , - , - , - , - ,  1, - , - ,  1]

5: [87, 86, 87, 66, 66, 42, 42, 42, 72, 72] == [72] Changed {52}, Perturbations {2}
   [87, 86, 87, 66, 66, 42,  1, 42, 72,  1] -> [ 1] 0.234, [15] 0.118, [72] 0.037
== [87, 86, 87, 66, 66, 42, XX, 42, 72, XX]
-> [- , - , - , - , - , - ,  1, - , - ,  1]

12: [71, 71, 71, 71, 71, 71, 71, 71, 71, 76] == [79] Changed {52}, Perturbations {2}
    [71, 71, 71, 71, 71, 71,  3, 71, 71,  4] -> [71] 0.213, [64] 0.078, [72] 0.072
 == [71, 71, 71, 71, 71, 71, XX, 71, 71, XX]
 -> [- , - , - , - , - , - ,  3, - , - ,  4]

29: [64, 64, 64, 64, 64, 57, 64, 72, 86, 87] == [86] Changed {52}, Perturbations {3}
    [64, 64, 64, 64, 64,  0,  0,  0, 86, 87] -> [66] 0.375, [86] 0.272, [87] 0.217
 == [64, 64, 64, 64, 64, XX, XX, XX, 86, 87]
 -> [- , - , - , -

In [119]:
import itertools


def process_single(context_chosen):
    if torch.cuda.is_available():
        context_chosen = context_chosen.to('cuda')
    context_chosen.resize_(1, context_chosen.size()[-1])

    context_one_hot = builder.embedding_one_hot(context_chosen)
    output = builder.predict(context_one_hot)
    attentions = [round(x, 5) for x in output[1][0][0].tolist()]
    indices_of_results, prediction_str = get_results(output)
    print(f"{format_list(context_chosen[0].tolist())} -> {prediction_str}")
    for c, a in zip(context_chosen[0], attentions):
        print(f"[{c:2}] {'{:.5f}'.format(a)}", end=" ")
        
    print()
    print()

def get_changes_list(s, f):
    perturbations_made = []
    for i in range(len(s)):
        if s[i] != f[i]:
            perturbations_made.append((i, f[i]))
            
    return perturbations_made

def get_possible_combinations(perturbations_made):
    subsets = []
    for r in range(1, len(perturbations_made) + 1):
        subsets.extend(itertools.combinations(perturbations_made, r))
    result = [list(subset) for subset in subsets]
    return result

def get_minimum_change_for_perturbation(index_in_list):
    i, s, f = perturbed_collected[index_in_list]
    event_target = events_picked[i]
    combination_of_perturbation = get_possible_combinations(get_changes_list(s, f))
    for combination in combination_of_perturbation:
        copy = torch.tensor(s).detach()
        if torch.cuda.is_available():
            copy = copy.to('cuda')
        for index_of_change, value_of_change in combination:
            copy[index_of_change] = value_of_change
        copy.resize_(1, copy.size()[-1])
        output = builder.predict(builder.embedding_one_hot(copy))
        indices_of_results, _ = get_results(output)
        if event_target != indices_of_results[0]:
            return copy
    print("ERROR: Couldn't find a perturbation")
        
def analysis(index_picked):
    print(f"Analyzing {perturbed_collected[index_picked][0]}")
    i, s, f = perturbed_collected[index_picked]
    process_single(torch.tensor(s))
    process_single(get_minimum_change_for_perturbation(index_picked))

for i in range(len(perturbed_collected)):
    analysis(i)

Analyzing 3
[86, 87, 86, 87, 66, 66, 42, 42, 42, 72] -> [72] 0.505, [42] 0.221, [78] 0.023
[86] 0.01905 [87] 0.01721 [86] 0.01846 [87] 0.02446 [66] 0.02594 [66] 0.04448 [42] 0.02189 [42] 0.14865 [42] 0.12885 [72] 0.55101 

[86, 87, 86, 87, 66, 66, 42, 42, 42,  1] -> [ 1] 0.230, [15] 0.117, [72] 0.028
[86] 0.00872 [87] 0.00716 [86] 0.01070 [87] 0.00991 [66] 0.01211 [66] 0.01966 [42] 0.00878 [42] 0.06008 [42] 0.04498 [ 1] 0.81790 

Analyzing 5
[87, 86, 87, 66, 66, 42, 42, 42, 72, 72] -> [72] 0.496, [42] 0.229, [78] 0.023
[87] 0.03021 [86] 0.02620 [87] 0.02293 [66] 0.03826 [66] 0.03166 [42] 0.07705 [42] 0.02581 [42] 0.20654 [72] 0.14183 [72] 0.39951 

[87, 86, 87, 66, 66, 42, 42, 42, 72,  1] -> [ 1] 0.224, [15] 0.117, [72] 0.038
[87] 0.01224 [86] 0.00977 [87] 0.01182 [66] 0.01337 [66] 0.01373 [42] 0.02445 [42] 0.00928 [42] 0.05334 [72] 0.04194 [ 1] 0.81006 

Analyzing 12
[71, 71, 71, 71, 71, 71, 71, 71, 71, 76] -> [79] 0.346, [71] 0.141, [77] 0.108
[71] 0.02067 [71] 0.02257 [71] 0.03009 [

AttributeError: 'NoneType' object has no attribute 'to'