#### Imports

In [1]:
import pandas as pd
from collections import defaultdict
import re

  from pandas.core.computation.check import NUMEXPR_INSTALLED


#### Data

In [2]:
int_column_names = ['interval_name', 'interval_begin', 'interval_end', 'keys', 'values']
event_column_names = ['event_id', 'timestamp', 'keys', 'values']

intervals = pd.read_csv('lanl_intervals', sep='|', header=None, names=int_column_names)
events = pd.read_csv('lanl_10k.events', sep='|', header=None, names=event_column_names)

spec_data = defaultdict(list)
with open('lanl.nfer', 'r') as file:
    lines = file.readlines()

for line in lines:
    line = line.strip()
    if ":-" in line:
        interval_name, values = line.split(":-")
        interval_name = interval_name.strip()
        values = values.strip()
        spec_data[interval_name].append(values)
        
unique_intervals = intervals['interval_name'].unique().tolist()


#### Example interval instance

In [7]:
# instance_id = 3        # event
# instance_id = 9        # event vs event
# instance_id = 10     #event vs interval
instance_id = 12     # subrule: event vs interval **special case with two pairs of possibilties

selected_interval = intervals.iloc[instance_id]

interval_name = selected_interval['interval_name']
interval_begin = selected_interval['interval_begin']
interval_end = selected_interval['interval_end']

print(interval_name,interval_begin,interval_end)

sensor_responded 16630407242086 16630407795002


#### Retrieve associated rule

In [8]:
keywords = ['during','before']

# Retrieve the rule associated with the selected interval name
rule = spec_data[interval_name][0]

def split_rule_by_keywords(rule, keywords):
    # Escape the keywords for use in regex and create a pattern that matches any of the keywords
    pattern = '|'.join(re.escape(keyword) for keyword in keywords)
    
    
    # Use regex to find all parts of the rule based on the keywords
    parts = re.split(f'\\s*({pattern})\\s*', rule)
    
    
    # Filter out empty strings and keep the relevant parts
    parts = [part for part in parts if part]
    # To correctly handle nested parts within parentheses, we will keep track of them
    result = []
    current_part = []

    for part in parts:
        if '(' in part:
            current_part.append(part)
        elif ')' in part:
            current_part.append(part)
            # Join all parts within parentheses and add to result
            result.append(' '.join(current_part))
            current_part = []
        elif current_part:
            current_part.append(part)
        else:
            result.append(part)

    return result

# Split the rule using the function
split_rules  = split_rule_by_keywords(rule, keywords)
print('MAIN RULE: ',split_rules)

MAIN RULE:  ['(req:USREVENT_EVENT-8 before resp:USREVENT_EVENT-10)', 'during', 'c:controller where req.pid = resp.pid & req.pid = c.pid map { pid -> c.pid }']


In [5]:
# Initialize a directed graph
G = nx.DiGraph()

# Recursive function to process valid_event_data and build the graph
def process_node(node, parent=None):
    node_id = node['id']
    G.add_node(node_id)  # Add the current node

    if parent:
        # If there is a parent, add an edge with the relationship label
        relationship = node.get('relationship', '')
        G.add_edge(parent, node_id, relationship=relationship)

    # If the node has children, process them recursively
    if node['children']:
        for child_group in node['children']:
            for child in child_group['data']:
                process_node(child, node_id)

# Process the main valid_event_data, with "sensor_ok" as the root
root_node = 'sensor_ok'
G.add_node(root_node)

for event in valid_event_data['data']:
    process_node(event, root_node)

# Create a layout for the tree
pos = nx.spring_layout(G, k=0.5, seed=42)

# Draw the graph
nx.draw(G, pos, with_labels=True, node_size=2000, node_color='lightblue', font_size=10, font_weight='bold', arrows=True)

# Draw edge labels (relationships like 'before' and 'during')
edge_labels = nx.get_edge_attributes(G, 'relationship')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)

# Show the graph
plt.show()

NameError: name 'nx' is not defined

#### Retrieve interval creation DS

In [9]:
import re

# Check if an element is an event
def is_event(element):
    match = re.search(r'USREVENT_EVENT-\d+', element)
    return bool(match), match.group(0) if match else None

# Check if an element is an interval
def is_interval(element):
    pattern = r'\b(' + '|'.join(re.escape(interval) for interval in intervals['interval_name'].unique()) + r')\b'
    
    # Search for the interval name in the element
    match = re.search(pattern, element)
    
    if match:
        return True, match.group(1)  #import re

# Check if an element is an event
def is_event(element):
    match = re.search(r'USREVENT_EVENT-\d+', element)
    return bool(match), match.group(0) if match else None

# Check if an element is an interval
def is_interval(element):
    # extract the event name from the string
    pattern = r'\b(' + '|'.join(re.escape(interval) for interval in intervals['interval_name'].unique()) + r')\b'
    
    # Search for the interval name in the element
    match = re.search(pattern, element)
    
    if match:
        return True, match.group(1)  # Return flag and interval name
    return False, None

# Retrieve the timestamp for an event
def get_event_timestamps(event_id):
    print(event_id)
    event_rows = events[events['event_id'] == event_id]
    if not event_rows.empty:
        # Return all timestamps as a list
        return event_rows['timestamp'].tolist()
    else:
        print(f"Event {event_id} not found")
        return []

# Retrieve instances of an interval from the interval dataframe
def get_interval_instances(interval_name):
    return intervals[intervals['interval_name'] == interval_name]



def check_split_rule_for_subrule(rule, keywords):
    # Escape the keywords for use in regex and create a pattern that matches any of the keywords
    pattern = '|'.join(re.escape(keyword) for keyword in keywords)
    
    
    # Use regex to find all parts of the rule based on the keywords
    parts = re.split(f'\\s*({pattern})\\s*', rule)
    
    
    # Filter out empty strings and keep the relevant parts
    parts = [part for part in parts if part]
    if len(parts)>1:
        return parts,1

    return parts,None

def validate_syntax(i1_id, i1_data, i2_data, i2_id, i_start, i_end, syntax,subruleFlag):
    """
    Validate the relationship between i1 and i2 based on the syntax.
    i1_data and i2_data can be either lists of event timestamps or intervals (DataFrames).
    i_start and i_end represent the main interval's start and end, ensuring that the validation respects those bounds.
    """

    valid_instances = []

    # Case 1: Both i1_data and i2_data are lists of event timestamps
    if isinstance(i1_data, list) and isinstance(i2_data, list):  # Both are events
        for i1_time in i1_data:
            for i2_time in i2_data:
                if syntax == 'before':
                    # Event 1 must occur before Event 2, and respect the interval bounds
                    if subruleFlag ==True:
                        if i1_time < i2_time and i1_time>=i_start and i2_time<=i_end:
                            valid_instances.append({
                                'data': [
                                    {
                                        'id': i1_id,
                                        'start': i1_time,
                                        'end': i1_time,
                                        'children': None,
                                        'relationship': 'before'
                                    },
                                    {
                                        'id': i2_id,
                                        'start': i2_time,
                                        'end': i2_time,
                                        'children': None
                                    }
                                ]})
                    else:
                        if i1_time < i2_time and i_start == i1_time and i_end == i2_time:
                            valid_instances.append({
                                'data': [
                                    {
                                        'id': i1_id,
                                        'start': i1_time,
                                        'end': i1_time,
                                        'children': None,
                                        'relationship':'before'
                                    },
                                    {
                                        'id': i2_id,
                                        'start': i2_time,
                                        'end': i2_time,
                                        'children': None
                                    }
                                ]})
                # 'during' does not apply to events
                # Add more syntaxes as needed

    # Case 2: i1_data is a list of event timestamps (events), i2_data is an interval (DataFrame)
    elif isinstance(i1_data, list) and isinstance(i2_data, pd.DataFrame):  # i1 is event, i2 is interval
        new_rule = spec_data[i2_id][0]
        for i1_time in i1_data:
            for _, i2_row in i2_data.iterrows():
                i2_start = i2_row['interval_begin']
                i2_end = i2_row['interval_end']

                if syntax == 'before':
                    # Event must occur before the interval starts and respect interval bounds
                    if i1_time < i2_start and i_start == i1_time and i_end == i2_end:
                        valid_instances.append({
                                'data': [
                                    {
                                        'id': i1_id,
                                        'start': i1_time,
                                        'end': i1_time,
                                        'children': None,
                                        'relationship':'before'
                                    },
                                    {
                                        'id': i2_id,
                                        'start': i2_start,
                                        'end': i2_end,
                                        'children': process_general_rule(split_rule_by_keywords(new_rule, keywords), i2_start, i2_end,subruleFlag=False)
                                    }
                                ]})
                elif syntax == 'during':
                    # Event occurs during the interval, and respect interval bounds
                    if i1_time >= i2_start and i1_time <= i2_end and i_start == i2_start and i_end == i2_end:
                        valid_instances.append({
                                'data': [
                                    {
                                        'id': i1_id,
                                        'start': i1_time,
                                        'end': i1_time,
                                        'children': None,
                                        'relationship':'during'
                                    },
                                    {
                                        'id': i2_id,
                                        'start': i2_start,
                                        'end': i2_end,
                                        'children': process_general_rule(split_rule_by_keywords(new_rule, keywords), i2_start, i2_end,subruleFlag=False)
                                    }
                                ]})

    # Case 3: i1_data is an interval (DataFrame), i2_data is a list of event timestamps (events)
    elif isinstance(i1_data, pd.DataFrame) and isinstance(i2_data, list):  # i1 is interval, i2 is event
        new_rule = spec_data[i1_id][0]
        for _, i1_row in i1_data.iterrows():
            i1_start = i1_row['interval_begin']
            i1_end = i1_row['interval_end']

            for i2_time in i2_data:
                if syntax == 'before':
                    # Interval must end before the event and respect interval bounds
                    if i1_end < i2_time and i_start == i1_start and i_end == i2_time:
                        valid_instances.append({
                                'data': [
                                    {
                                        'id': i1_id,
                                        'start': i1_start,
                                        'end': i1_end,
                                        'children': process_general_rule(split_rule_by_keywords(new_rule, keywords), i1_start, i1_end,subruleFlag=False),
                                        'relationship':'before'
                                    },
                                    {
                                        'id': i2_id,
                                        'start': i2_time,
                                        'end': i2_time,
                                        'children': None
                                    }
                                ]})
                elif syntax == 'during':
                    # Event occurs during the interval and respect interval bounds
                    if i2_time >= i1_start and i2_time <= i1_end and i_start == i1_start and i_end == i1_end:
                        valid_instances.append({
                                'data': [
                                    {
                                        'id': i1_id,
                                        'start': i1_start,
                                        'end': i1_end,
                                        'children': process_general_rule(split_rule_by_keywords(new_rule, keywords), i1_start, i1_end,subruleFlag=False),
                                        'relationship':'during'
                                    },
                                    {
                                        'id': i2_id,
                                        'start': i2_time,
                                        'end': i2_time,
                                        'children': None
                                    }
                                ]})

    # Case 4: Both i1_data and i2_data are intervals (DataFrames)
    elif isinstance(i1_data, pd.DataFrame) and isinstance(i2_data, pd.DataFrame):  # Both are intervals
        new_rule_1 = spec_data[i1_id][0]
        new_rule_2 = spec_data[i2_id][0]
        for _, i1_row in i1_data.iterrows():
            i1_start = i1_row['interval_begin']
            i1_end = i1_row['interval_end']

            for _, i2_row in i2_data.iterrows():
                i2_start = i2_row['interval_begin']
                i2_end = i2_row['interval_end']

                if syntax == 'before':
                    # Interval 1 ends before Interval 2 starts, and respect the interval bounds
                    if i1_end < i2_start and i_start == i1_start and i_end == i2_end:
                        valid_instances.append({
                                'data': [
                                    {
                                        'id': i1_id,
                                        'start': i1_start,
                                        'end': i1_end,
                                        'children': process_general_rule(split_rule_by_keywords(new_rule_1, keywords), i1_start, i1_end,subruleFlag=False),
                                        'relationship':'before'
                                    },
                                    {
                                        'id': i2_id,
                                        'start': i2_start,
                                        'end': i2_end,
                                        'children': process_general_rule(split_rule_by_keywords(new_rule_2, keywords), i2_start, i2_end,subruleFlag=False)
                                    }
                                ]})
                elif syntax == 'during':
                    # Interval 1 occurs during Interval 2, and respect the interval bounds
                    if i1_start >= i2_start and i1_end <= i2_end and i_start == i2_start and i_end == i2_end:
                        valid_instances.append({
                                'data': [
                                    {
                                        'id': i1_id,
                                        'start': i1_start,
                                        'end': i1_end,
                                        'children': process_general_rule(split_rule_by_keywords(new_rule_1, keywords), i1_start, i1_end,subruleFlag=False),
                                        'relationship':'during'
                                    },
                                    {
                                        'id': i2_id,
                                        'start': i2_start,
                                        'end': i2_end,
                                        'children': process_general_rule(split_rule_by_keywords(new_rule_2, keywords), i2_start, i2_end,subruleFlag=False)
                                    }
                                ]})

    return valid_instances if valid_instances else None  # Return valid instances or None

# Process the rule with dynamic assignment of i1 (event) and i2 (event or interval set)
def process_general_rule(final_rule, interval_begin=None, interval_end=None,subruleFlag=False):
    i1, i2, syntax = None, None, None

    print(f"Processing rule: {final_rule}")
    sub_result=None
    for element in final_rule:
        # Check if the element is an event
        if check_split_rule_for_subrule(element,keywords)[1]:
            "---Processing Sub Rule---"
            
            subruleFlag=True
            sub_rule=check_split_rule_for_subrule(element,keywords)[0]
            sub_result = process_general_rule(sub_rule, interval_begin, interval_end,subruleFlag)
            subruleFlag=False          
        else:
            is_event_result, event_id = is_event(element)
            
            if is_event_result:
                # Process the event and retrieve all occurrences (timestamps)
                
                event_timestamps = get_event_timestamps(event_id)

                if not event_timestamps:
                    return None  # Event not found

                if len(final_rule)==1:
                    valid_event_instances = []
                    for event_timestamp in event_timestamps:
                        if event_timestamp >= interval_begin and event_timestamp <= interval_end:
                            valid_event_instances.append(event_timestamp)
                    return [{'data': {'id': event_id,
                                        'start': ts,
                                        'end': ts,
                                        'children': None
                                    }}for ts in valid_event_instances]
                # Assign to i1 or i2 depending on whether i1 is already assigned
                if i1 is None:
                    i1 = (event_id, event_timestamps)  # Store event_id and all timestamps for i1
                else:
                    i2 = (event_id, event_timestamps)  # Store event_id and all timestamps for i2


            elif is_interval(element)[0]:
                # Process the interval\
                name = is_interval(element)[1]
                interval_instances = get_interval_instances(name)
                new_rule = spec_data[name][0]
                if interval_instances.empty:
                    return None  # No valid interval instances found
                if sub_result is not None:
                    sub_result.append({
                                'data': [
                                    {
                                        'id': name,
                                        'start': interval_begin,
                                        'end': interval_end,
                                        'children': process_general_rule(split_rule_by_keywords(new_rule, keywords), interval_begin, interval_end,subruleFlag=False)
                                    }
                                ]})
                    
                    return sub_result

                if i1 is None:
                    i1 = (element, interval_instances)
                else:
                    i2 = (element, interval_instances)
            else:
                syntax = element

            # When both i1, i2, and syntax are available, we validate the rule
            if i1 and i2 and syntax:
                i1_id, i1_data = i1
                i2_id, i2_data = i2
               

                # Handle the validation logic (event vs event, interval vs interval, or mixed)
                valid_instances = validate_syntax(i1_id, i1_data, i2_data, i2_id, interval_begin, interval_end, syntax,subruleFlag)
                if valid_instances:
                    return valid_instances  # Return the valid instances with IDs
                else:
                    print("No valid instances found")
                    return None  # Syntax validation failed

    return None

print("INTERVAL NAME:{},INTERVAL BEGIN:{},INTERVAL END:{}".format(interval_name,interval_begin, interval_end))
print("ASSOCIATED RULE:{}".format(split_rules))
print("EVENT AND INTERVAL RESPONSIBLE:")
valid_event_data = process_general_rule(split_rules, interval_begin, interval_end,subruleFlag=False)

for data in valid_event_data:
    print(data)
    print('\n')


INTERVAL NAME:sensor_responded,INTERVAL BEGIN:16630407242086,INTERVAL END:16630407795002
ASSOCIATED RULE:['(req:USREVENT_EVENT-8 before resp:USREVENT_EVENT-10)', 'during', 'c:controller where req.pid = resp.pid & req.pid = c.pid map { pid -> c.pid }']
EVENT AND INTERVAL RESPONSIBLE:
Processing rule: ['(req:USREVENT_EVENT-8 before resp:USREVENT_EVENT-10)', 'during', 'c:controller where req.pid = resp.pid & req.pid = c.pid map { pid -> c.pid }']
Processing rule: ['(req:USREVENT_EVENT-8', 'before', 'resp:USREVENT_EVENT-10)']
USREVENT_EVENT-8
USREVENT_EVENT-10
Processing rule: ['woke:USREVENT_EVENT-24', 'before', 'sleep:USREVENT_EVENT-25 where woke.pid = sleep.pid map { pid -> woke.pid }']
USREVENT_EVENT-24
USREVENT_EVENT-25
{'data': [{'id': 'USREVENT_EVENT-8', 'start': 16630407244374, 'end': 16630407244374, 'children': None, 'relationship': 'before'}, {'id': 'USREVENT_EVENT-10', 'start': 16630407642906, 'end': 16630407642906, 'children': None}]}


{'data': [{'id': 'USREVENT_EVENT-8', 'sta