In [3]:
import csv
from collections import deque
from datetime import datetime

def parse_time_stamp(dt_str):
    """
    Parse a timestamp string of the form: "YYYY-MM-DD HH:MM:SS.sss",
    e.g. "2024-12-01 23:16:50.000",
    into a float representing seconds from the Unix epoch (1970-01-01).
    """
    # For example: "2024-12-01 23:16:50.000"
    # We'll parse with strptime:
    #   year-month-day space hour:minute:second.micro
    dt = datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S.%f")
    # Convert to a numeric (float) timestamp
    return dt.timestamp()

def load_and_sort_activity_logs(input_csv='activity_logs.csv',
                                output_csv='activity_logs_sorted.csv'):
    """
    Reads activity_logs.csv with columns:
      user_id, computer_id, timestamp
    The timestamp is "YYYY-MM-DD HH:MM:SS.sss".
    Skips header, parse time to numeric, sorts, writes local sorted CSV.
    Returns a list of (user_id, computer_id, original_ts, numeric_time).
    """
    unsorted_rows = []
    with open(input_csv, 'r', newline='') as f_in:
        reader = csv.reader(f_in)
        # Skip the header row
        header = next(reader, None)
        
        for row in reader:
            if not row:
                continue
            user_id, computer_id, dt_str = row
            t_val = parse_time_stamp(dt_str)
            unsorted_rows.append((user_id, computer_id, dt_str, t_val))

    # Sort by t_val
    unsorted_rows.sort(key=lambda x: x[3])

    # Write out a local copy
    with open(output_csv, 'w', newline='') as f_out:
        writer = csv.writer(f_out)
        # Re-write a header if desired
        writer.writerow(["user_id","computer_id","timestamp"])
        for (u, c, original_ts, numeric_time) in unsorted_rows:
            writer.writerow([u, c, original_ts])

    return unsorted_rows

def load_and_sort_access_logs(input_csv='access_logs.csv',
                              output_csv='access_logs_sorted.csv'):
    """
    Reads access_logs.csv with columns:
      user_id, activity_type, affected_user_id, timestamp
    The timestamp is "YYYY-MM-DD HH:MM:SS.sss".
    Skips header, parse time to numeric, sorts, writes local copy.
    Returns a list of (user_id, activity_type, affected_user_id, original_ts, numeric_time).
    """
    unsorted_rows = []
    with open(input_csv, 'r', newline='') as f_in:
        reader = csv.reader(f_in)
        # skip header
        header = next(reader, None)

        for row in reader:
            if not row:
                continue
            user_id, activity_type, aff_user, dt_str = row
            t_val = parse_time_stamp(dt_str)
            unsorted_rows.append((user_id, activity_type, aff_user, dt_str, t_val))

    unsorted_rows.sort(key=lambda x: x[4])

    with open(output_csv, 'w', newline='') as f_out:
        writer = csv.writer(f_out)
        writer.writerow(["user_id","activity_type","affected_user_id","timestamp"])
        for (u, act_type, auser, original_ts, numeric_time) in unsorted_rows:
            writer.writerow([u, act_type, auser, original_ts])

    return unsorted_rows

def suspicious_bfs(activity_sorted, access_sorted, starting_user='u1'):
    """
    BFS demonstration using the sorted logs.
    We build:
      user->set of computers  (from activity_sorted)
      user->list of suspicious actions (from access_sorted)

    Then if a user performed certain suspicious activity types,
    we consider them suspicious, see which computers they used, etc.
    """
    # Build user->set of computers
    user2computers = {}
    for (u, c, dt_str, numeric_time) in activity_sorted:
        user2computers.setdefault(u, set()).add(c)

    # Build user->list of suspicious actions
    user2accesses = {}
    for (u, act_type, aff_user, dt_str, numeric_time) in access_sorted:
        user2accesses.setdefault(u, []).append((act_type, aff_user, dt_str, numeric_time))

    queue = deque([starting_user])
    visited_users = set()
    visited_computers = set()

    event_log = []
    final_susp_users = {starting_user}
    final_susp_comps = set()

    suspicious_types = {"UNAUTHORIZED_ACCESS", "MALWARE_ALERT", "DATA_EXFILTRATION"}

    while queue:
        current_u = queue.popleft()
        if current_u in visited_users:
            continue
        visited_users.add(current_u)
        event_log.append(f"Processing user: {current_u}")

        # For each computer that 'current_u' touched
        comps = user2computers.get(current_u, [])
        for c in comps:
            if c not in visited_computers:
                visited_computers.add(c)
                final_susp_comps.add(c)
                event_log.append(f"  Checking computer: {c}")

                # naive approach: any user with suspicious activity type => suspicious
                for other_user, logs in user2accesses.items():
                    for (act_type, affu, ts_s, ntime) in logs:
                        if act_type in suspicious_types:
                            if other_user not in visited_users:
                                final_susp_users.add(other_user)
                                queue.append(other_user)
                                event_log.append(f"    Found suspicious user: {other_user} => {act_type}")
    return event_log, final_susp_users, final_susp_comps

def main():
    # 1) Sort
    activity_sorted = load_and_sort_activity_logs('activity_logs.csv', 'activity_logs_sorted.csv')
    access_sorted   = load_and_sort_access_logs('access_logs.csv', 'access_logs_sorted.csv')



    #activity_sorted = load_and_sort_activity_logs('activity.csv', 'activity_sorted.csv')
    #access_sorted   = load_and_sort_access_logs('access.csv', 'access_sorted.csv')
    print("Sorted CSVs: activity_logs_sorted.csv, access_logs_sorted.csv created.")

    # 2) BFS approach
    logs, sus_users, sus_comps = suspicious_bfs(activity_sorted, access_sorted, starting_user='U1')

    # 3) Print BFS logs
    print("\n--- BFS LOGS ---")
    for line in logs:
        print(line)

    # 4) Summaries
    print("\nSuspicious users:", sus_users)
    print("Suspicious computers:", sus_comps)

if __name__ == "__main__":
    main()


Sorted CSVs: activity_logs_sorted.csv, access_logs_sorted.csv created.

--- BFS LOGS ---
Processing user: U1
  Checking computer: C34
    Found suspicious user: C18 => DATA_EXFILTRATION
    Found suspicious user: C18 => DATA_EXFILTRATION
    Found suspicious user: C18 => MALWARE_ALERT
    Found suspicious user: C18 => DATA_EXFILTRATION
    Found suspicious user: C18 => MALWARE_ALERT
    Found suspicious user: C18 => DATA_EXFILTRATION
    Found suspicious user: C18 => MALWARE_ALERT
    Found suspicious user: C18 => DATA_EXFILTRATION
    Found suspicious user: C18 => UNAUTHORIZED_ACCESS
    Found suspicious user: C18 => MALWARE_ALERT
    Found suspicious user: C18 => DATA_EXFILTRATION
    Found suspicious user: C18 => MALWARE_ALERT
    Found suspicious user: C18 => UNAUTHORIZED_ACCESS
    Found suspicious user: C18 => UNAUTHORIZED_ACCESS
    Found suspicious user: C18 => UNAUTHORIZED_ACCESS
    Found suspicious user: C18 => MALWARE_ALERT
    Found suspicious user: C18 => MALWARE_ALERT
  