In [1]:
import numpy as np
import networkx as nx
import csv
import matplotlib.pyplot as plt
import time
import os
from tqdm import tqdm
import datetime
from construct_rule import *

def extract_vertices_from_files(directory_path):
    """Extract vertices from CSV files in the specified directory."""
    print("Extracting vertices from data files:")
    vertices = []

    # Process logon.csv
    with open(os.path.join(directory_path, "logon.csv"), 'r') as file:
        print("...Processing logon.csv...")
        reader = csv.reader(file)
        next(reader)  # Skip header
        for row in tqdm(reader):
            vertex_id = row[0]
            timestamp = time.mktime(time.strptime(row[1], '%m/%d/%Y %H:%M:%S'))
            vertex = {
                'vertex_type': 'logon',
                'vertex_id': vertex_id,
                'user': row[2],
                'pc': row[3],
                'activity': row[4],
                'timestamp': timestamp,
                'host': row[3],
                'time_str': row[1]
            }
            vertices.append(vertex)

    # Process file.csv
    with open(os.path.join(directory_path, "file.csv"), 'r') as file:
        print("...Processing file.csv...")
        reader = csv.reader(file)
        next(reader)
        for row in tqdm(reader):
            vertex_id = row[0]
            timestamp = time.mktime(time.strptime(row[1], '%m/%d/%Y %H:%M:%S'))
            vertex = {
                'vertex_type': 'file',
                'vertex_id': vertex_id,
                'user': row[2],
                'filename': row[4],
                'activity': row[5],
                'timestamp': timestamp,
                'host': row[3],
                'time_str': row[1]
            }
            vertices.append(vertex)

    # Process http.csv
    with open(os.path.join(directory_path, "http.csv"), 'r') as file:
        print("...Processing http.csv...")
        reader = csv.reader(file)
        next(reader)
        for row in tqdm(reader):
            vertex_id = row[0]
            timestamp = time.mktime(time.strptime(row[1], '%m/%d/%Y %H:%M:%S'))
            vertex = {
                'vertex_type': 'http',
                'vertex_id': vertex_id,
                'user': row[2],
                'url': row[4].split(' ')[0],
                'activity': "visit",
                'timestamp': timestamp,
                'host': row[3],
                'content_list': row[4].split(' ')[1:],
                'time_str': row[1]
            }
            vertices.append(vertex)

    # Process device.csv
    with open(os.path.join(directory_path, "device.csv"), 'r') as file:
        print("...Processing device.csv...")
        reader = csv.reader(file)
        next(reader)
        for row in tqdm(reader):
            vertex_id = row[0]
            timestamp = time.mktime(time.strptime(row[1], '%m/%d/%Y %H:%M:%S'))
            vertex = {
                'vertex_type': 'device',
                'vertex_id': vertex_id,
                'user': row[2],
                'host': row[3],
                'activity': row[-1],
                'timestamp': timestamp,
                'file_tree': row[4],
                'time_str': row[1]
            }
            vertices.append(vertex)

    # Sort vertices by user and timestamp
    sorted_vertices = sorted(vertices, key=lambda x: (x['user'], x['timestamp']))
    
    print("Sample of sorted vertices:")
    print(sorted_vertices[:1])

    return sorted_vertices

def calculate_day_difference(timestamp1, timestamp2):
    """Calculate the number of days between two timestamps."""
    delta = datetime.datetime.fromtimestamp(timestamp1) - datetime.datetime.fromtimestamp(timestamp2)
    return delta.days

def get_total_days(sorted_vertices):
    """Determine the total number of days spanned by the dataset."""
    earliest_time = float('inf')
    latest_time = 0
    for vertex in sorted_vertices:
        if vertex['timestamp'] > latest_time:
            latest_time = vertex['timestamp']
        if vertex['timestamp'] < earliest_time:
            earliest_time = vertex['timestamp']

    total_days = calculate_day_difference(latest_time, earliest_time) + 2
    print(f"Dataset spans {total_days} days")
    return total_days

def group_vertices_by_day(sorted_vertices, total_days):
    """Group vertices into daily sequences using MultiGraph."""
    # Approximately 1000 entries span 4 days
    earliest_time = float('inf')
    for vertex in sorted_vertices:
        if vertex['timestamp'] < earliest_time:
            earliest_time = vertex['timestamp']

    daily_graphs = [None] * total_days

    print("...Grouping vertices by day...")
    for vertex in tqdm(sorted_vertices):
        # Calculate the day index (0-based, adjusted by 1)
        day_index = calculate_day_difference(vertex['timestamp'], earliest_time) - 1

        # Initialize MultiGraph for the day if not already created
        if not daily_graphs[day_index]:
            daily_graphs[day_index] = nx.MultiGraph()

        # Add vertex to the graph
        daily_graphs[day_index].add_node(
            vertex['vertex_id'],
            type=vertex['vertex_type'],
            user=vertex['user'],
            obj=vertex.get('filename', vertex.get('url', vertex.get('host'))),
            activity=vertex['activity'],
            timestamp=vertex['timestamp'],
            host=vertex['host']
        )

    return daily_graphs

In [2]:
def build_activity_graph(daily_sequences, total_days):
    """Construct an activity graph based on user behavior across daily sequences."""
    activity_graph = nx.MultiGraph()

    # Rule 1: Connect activities of the same user on the same host within a single day
    activity_graph = rule_1(activity_graph, daily_sequences, total_days)
    
    # Rule 2: Link behavior chains of the same user on the same host across multiple days
    activity_graph = rule_2(activity_graph, daily_sequences, total_days)
    
    # Rule 3: Associate group operation types (e.g., Connect -> Disconnect, File Open -> File Write, web visits)
    # for the same user on the same host across multiple days
    activity_graph = rule_3(activity_graph, daily_sequences, total_days)

    return activity_graph

def build_company_graph():
    """Placeholder for constructing a company graph."""
    pass

def build_object_graph():
    """Placeholder for constructing an object graph."""
    pass

# TODO:
# 1. Implement build_activity_graph
# 2. Implement build_company_graph
# 3. Implement build_object_graph
# 4. Implement graph merging functionality

In [3]:
# start_time = time.time()

data_version = "r_part"
# Extract vertices from CSV files
vertices = extract_vertices_from_files(os.path.join("./our_data/", data_version))
# Calculate the total number of days in the dataset
total_days = get_total_days(vertices)
# Group vertices into daily graphs (each element is a graph containing all log nodes for a specific day)
daily_sequences = group_vertices_by_day(vertices, total_days)

# # Construct activity, company, and object graphs
# activity_graph = build_activity_graph(daily_sequences, total_days)
# company_graph = build_company_graph()
# object_graph = build_object_graph()

# # Save the activity graph as an edge list and pickled format
# nx.write_edgelist(activity_graph, "./our_data/activity_graph_edge")
# nx.write_gpickle(activity_graph, "./our_data/activity_graph.gpickle")

# print("Graph saving completed")
# print(f"Execution time: {time.time() - start_time} seconds")

Extracting vertices from data files:
...Processing logon.csv...


999it [00:00, 48674.66it/s]


...Processing file.csv...


999it [00:00, 34462.11it/s]


...Processing http.csv...


999it [00:00, 17595.30it/s]


...Processing device.csv...


999it [00:00, 48768.14it/s]


Sample of sorted vertices:
[{'vertex_type': 'logon', 'vertex_id': '{Y0A4-H1YL52RQ-3185VBZR}', 'user': 'AAB1302', 'pc': 'PC-5565', 'activity': 'Logon', 'timestamp': 1262419020.0, 'host': 'PC-5565', 'time_str': '01/02/2010 08:57:00'}]
Dataset spans 4 days
...Grouping vertices by day...


100%|██████████| 3996/3996 [00:00<00:00, 181533.45it/s]


In [4]:
# Access the attributes of a specific node in the second day's graph
node_attributes = daily_sequences[1].nodes['{Q2I7-M9EJ92BC-8465IMNM}']
print(node_attributes)

# Optionally, retrieve all node IDs in the second day's graph
# node_ids = list(daily_sequences[1].nodes)

{'type': 'logon', 'user': 'AAG1136', 'obj': 'PC-5456', 'activity': 'Logon', 'timestamp': 1262570670.0, 'host': 'PC-5456'}


In [5]:
# Print the number of daily graphs
print(f"Number of daily graphs: {len(daily_sequences)}")

Number of daily graphs: 4


In [6]:
from itertools import chain

def apply_rule_1(activity_graph, daily_sequences, total_days):
    """Connect nodes for the same user on the same host within the same day."""
    # Initialize a list to store host-to-node mappings for each day
    host_to_nodes = [None] * total_days
    
    for daily_graph in tqdm(daily_sequences):
        if daily_graph:
            # Dictionary to group nodes by host within the current day's graph
            host_groups = {}
            nodes = list(daily_graph.nodes())
            for node_id in nodes:
                host = daily_graph.nodes[node_id]['host']
                if host not in host_groups:
                    host_groups[host] = [node_id]
                else:
                    host_groups[host].append(node_id)
                    # Add an edge between consecutive nodes on the same host
                    daily_graph.add_edge(
                        host_groups[host][-2],
                        host_groups[host][-1],
                        EdgeType=1,
                        weight=1
                    )
            # Store the host-to-node mapping for the current day
            day_index = daily_sequences.index(daily_graph)
            host_to_nodes[day_index] = host_groups
    
    # Return activity_graph unchanged (kept for interface consistency) and host-to-node mappings
    return activity_graph, host_to_nodes

def apply_rule_2(activity_graph, daily_sequences, total_days, host_to_nodes):
    """Link behavior chains for the same user on the same host across multiple days."""
    # Merge all daily graphs into the activity graph
    for daily_graph in daily_sequences:
        if daily_graph:
            activity_graph = nx.compose(activity_graph, daily_graph)
    
    # Connect nodes across different days for the same host
    for day_i in range(total_days):
        for day_j in range(day_i + 1, total_days):
            if not (daily_sequences[day_i] and daily_sequences[day_j]):
                continue
            if host_to_nodes[day_i] and host_to_nodes[day_j]:
                for host in host_to_nodes[day_i]:
                    if host in host_to_nodes[day_j]:
                        # Get first and last nodes for the host on both days
                        start_i = host_to_nodes[day_i][host][0]
                        end_i = host_to_nodes[day_i][host][-1]
                        start_j = host_to_nodes[day_j][host][0]
                        end_j = host_to_nodes[day_j][host][-1]
                        
                        # Calculate edge weight based on node counts
                        count_i = len(host_to_nodes[day_i][host])
                        count_j = len(host_to_nodes[day_j][host])
                        weight = min(count_i, count_j) / max(count_i, count_j)
                        
                        # Add edges between start and end nodes across days
                        activity_graph.add_edge(start_i, start_j, EdgeType=2, weight=weight)
                        activity_graph.add_edge(end_i, end_j, EdgeType=2, weight=weight)
    
    return activity_graph

def apply_rule_3(activity_graph, daily_sequences, total_days, host_to_nodes):
    """Associate group operation types (e.g., File Open -> File Write, Connect -> Disconnect) across days."""
    # Define valid group operation patterns
    operation_patterns = [["File Open", "File Write"], ["Connect", "Disconnect"]]
    valid_operations = list(chain.from_iterable(operation_patterns))

    def identify_pattern_matches(node_ids):
        """Identify nodes matching group operation patterns for the same object."""
        # Map activity types to node IDs, tracking if they match a pattern
        activity_map = {}
        for node_id in node_ids:
            act_type = activity_graph.nodes[node_id]['activity']
            if act_type in valid_operations:
                if act_type not in activity_map:
                    activity_map[act_type] = []
                # Store node ID and match status (0 = unmatched, 1 = matched)
                activity_map[act_type].append([node_id, 0])
        
        # Match nodes within patterns for the same object
        for pattern in operation_patterns:
            if pattern[0] in activity_map and pattern[1] in activity_map:
                for start_node in activity_map[pattern[0]]:
                    obj = activity_graph.nodes[start_node[0]]['obj']
                    for end_node in activity_map[pattern[-1]]:
                        if obj == activity_graph.nodes[end_node[0]]['obj']:
                            start_node[1] = end_node[1] = 1
                            break
        
        # Remove unmatched nodes
        for act_type in list(activity_map.keys()):
            activity_map[act_type] = [node for node in activity_map[act_type] if node[1] == 1]
            if not activity_map[act_type]:
                del activity_map[act_type]
        
        return activity_map

    # Organize host activity and pattern matches by host and day
    host_day_activities = {}
    host_day_patterns = {}
    for day_idx in range(total_days):
        if host_to_nodes[day_idx] is None:
            continue
        for host in host_to_nodes[day_idx]:
            if host not in host_day_activities:
                host_day_activities[host] = []
                host_day_patterns[host] = []
            host_day_activities[host].append(host_to_nodes[day_idx][host])
            host_day_patterns[host].append(identify_pattern_matches(host_to_nodes[day_idx][host]))

    # Connect nodes of the same operation type across different days for the same host
    for host in host_day_patterns:
        for day_i in range(len(host_day_patterns[host])):
            for day_j in range(day_i + 1, len(host_day_patterns[host])):
                for activity in host_day_patterns[host][day_i]:
                    if activity in host_day_patterns[host][day_j]:
                        for node_i in host_day_patterns[host][day_i][activity]:
                            for node_j in host_day_patterns[host][day_j][activity]:
                                # TODO: Define a dynamic weight
                                activity_graph.add_edge(node_i[0], node_j[0], EdgeType=3, weight=0.5)

    return activity_graph

# Initialize the activity graph
activity_graph = nx.MultiGraph()

# Apply Rule 1: Connect nodes for the same user on the same host within the same day
activity_graph, host_to_nodes = apply_rule_1(activity_graph, daily_sequences, total_days)

# Apply Rule 2: Link behavior chains across days for the same host
activity_graph = apply_rule_2(activity_graph, daily_sequences, total_days, host_to_nodes)

# Apply Rule 3: Connect group operation types across days for the same host
activity_graph = apply_rule_3(activity_graph, daily_sequences, total_days, host_to_nodes)

100%|██████████| 4/4 [00:00<00:00, 166.66it/s]


In [7]:
# Access the host-to-node mapping for the last day
last_day_hosts = host_to_nodes[-1]
if last_day_hosts is not None:
    print(f"Host-to-node mapping for the last day: {last_day_hosts}")
else:
    print("No activity recorded for the last day")

Host-to-node mapping for the last day: {'PC-5565': ['{Y0A4-H1YL52RQ-3185VBZR}', '{I9Z7-P8BH93GZ-3728JYER}', '{B5T6-D9IS87XI-7265XQTC}', '{V6Y1-J0YZ81RE-9196GDRV}', '{B1P9-D0MO83HY-2970LHTX}', '{K5W7-E7DB54VH-8075VMGF}', '{X2F0-W2QL07DJ-7820BSWU}', '{U0U0-Z3TJ40RO-4444CPOP}', '{B8V5-U4UM39SP-1559TSIR}', '{E9M5-Q1GF37JY-3959IYQR}', '{B7R6-C1QJ26IS-6557IWTQ}', '{U4K0-D6RZ00SW-8017AQYD}', '{A0C1-X0QW67TA-6190GRLG}', '{T4U5-M7RS65KK-3980KHJS}', '{G1O2-N0UV95ZF-6212XOVU}', '{X6R1-X8IN05VY-2422RTOO}', '{V8X6-K8DL10YQ-2855MASK}'], 'PC-0765': ['{L2L2-S1KZ66VU-4509IIDB}', '{S0C7-P9DK78RA-0121EQKB}', '{Z4I4-I7QO41CC-1361ZGEI}', '{B9D6-X4HI52XA-9124LWMH}', '{E5Y7-I0AO25KK-5206GALB}', '{Q8I7-J2KM90AE-1480AEEL}'], 'PC-2009': ['{K8O6-Q0GC86VF-1536AAUT}', '{G6P7-Q0TQ86MX-1325UUVZ}', '{T1A7-W9IF66JO-7157EDKL}', '{S2L3-Y7RR70FH-8820IDCD}'], 'PC-9155': ['{X8Y2-S6IG70VE-2437JWPD}', '{W3Y9-Z7TA15OZ-6871JPUP}', '{C4E3-Z9QA61JB-8109GRGY}', '{B4X7-N9UD22XQ-5466HGMX}', '{G5T3-Y0XQ76TY-3213OBNM}', '{X0L2-D0HJ23

In [8]:
# Check host-to-node mappings and activity graph connections
# Print node IDs for host PC-9950 on the first day (day 0)
if host_to_nodes[0] is not None and 'PC-9950' in host_to_nodes[0]:
    print(f"Nodes for PC-9950 on day 0: {host_to_nodes[0]['PC-9950']}")
else:
    print("No nodes for PC-9950 on day 0")

# Print node IDs for host PC-9950 on the second day (day 1)
if host_to_nodes[1] is not None and 'PC-9950' in host_to_nodes[1]:
    print(f"Nodes for PC-9950 on day 1: {host_to_nodes[1]['PC-9950']}")
else:
    print("No nodes for PC-9950 on day 1")

# Print neighbors and edge data for the first node of PC-9950 on the second day
if host_to_nodes[1] is not None and 'PC-9950' in host_to_nodes[1] and host_to_nodes[1]['PC-9950']:
    first_node = host_to_nodes[1]['PC-9950'][0]
    print(f"Neighbors of {first_node} in activity_graph: {activity_graph[first_node]}")
else:
    print("No nodes or neighbors for PC-9950 on day 1")

Nodes for PC-9950 on day 0: ['{Y3E5-O5JH43YP-2140MCDI}', '{V8T4-X1BO44XW-6300FEJL}', '{A0K8-Y3OO71TA-8438JNAO}', '{H3C3-O6WG03OY-0619DOQP}', '{Z8L6-O4RO26OO-0236TYAG}', '{M4B7-X8KB96EZ-6502LWAW}', '{B9T8-O8GP76AG-5890LFCB}', '{J1Q6-N9BB85SO-2705GKMO}', '{B7N7-A2HS26CH-2459SERU}', '{Z8K8-N6FD13EU-1547IJKK}', '{G3Z2-X2HZ41PF-4739KFTU}', '{I5H3-H7SO25AV-0123ZWKE}', '{K0X1-U2UU70IA-4970DVGV}', '{Q9I1-K3QL90TL-8552HBKA}', '{K1N2-U2SK21AR-7192IIQF}', '{Q3Y1-Y9SC12MB-2466BTLS}', '{H0I8-N0FY75UO-6936XUNA}', '{P7V9-A3OA41IG-8730MTTL}', '{M8Z2-K4CM26FD-2054FKGT}', '{S3P9-R7WM46BI-2267MZOH}', '{I4E6-K7HN90FQ-5248JXGM}', '{Q1X2-Z0MX16AD-2287SLBL}', '{Z7K0-E2UH16SM-8696WPKA}', '{V2W1-B2BB60VH-8198OJJM}', '{R8K9-C4IU76HA-7025BZNA}', '{E5L6-C0ZI10KF-8661PXMK}', '{R3O7-H4RE54UM-2774SMJG}', '{B1T5-W0XH95QK-9519TESR}', '{E2B3-P5UC96II-7420FAIF}', '{E3L0-L8XF74DW-6334NDPQ}', '{Y4X9-V5BO56MZ-5658SVLF}', '{V0M6-R0EH82CS-1641PLDI}', '{T7E8-L8NU53MH-1998PEIX}', '{W3O1-V3UH44WW-1478ZGBF}', '{Y1I2-T6EA38GF-039

In [9]:
# Check attributes of a specific node in the activity graph
node_id = '{Y3E5-O5JH43YP-2140MCDI}'
if node_id in activity_graph.nodes:
    print(f"Attributes of node {node_id}: {activity_graph.nodes[node_id]}")
else:
    print(f"Node {node_id} not found in activity_graph")

# Print host, activity, and object for nodes associated with PC-9950 on the first day
if host_to_nodes[0] is not None and 'PC-9950' in host_to_nodes[0]:
    print("Nodes for PC-9950 on day 0:")
    for node_id in host_to_nodes[0]['PC-9950']:
        node_attrs = activity_graph.nodes[node_id]
        print(f"  Host: {node_attrs['host']}, Activity: {node_attrs['activity']}, Object: {node_attrs['obj']}")
else:
    print("No nodes for PC-9950 on day 0")

Attributes of node {Y3E5-O5JH43YP-2140MCDI}: {'type': 'logon', 'user': 'WTC0699', 'obj': 'PC-9950', 'activity': 'Logon', 'timestamp': 1262502000.0, 'host': 'PC-9950'}
Nodes for PC-9950 on day 0:
  Host: PC-9950, Activity: Logon, Object: PC-9950
  Host: PC-9950, Activity: Logon, Object: PC-9950
  Host: PC-9950, Activity: Connect, Object: PC-9950
  Host: PC-9950, Activity: File Open, Object: R:\E5S5QBXU.doc
  Host: PC-9950, Activity: File Write, Object: R:\WTC0699\MKE4S35D.pdf
  Host: PC-9950, Activity: File Write, Object: R:\JCE2TLZ7.doc
  Host: PC-9950, Activity: File Write, Object: R:\JCE2TLZ7.doc
  Host: PC-9950, Activity: Disconnect, Object: PC-9950
  Host: PC-9950, Activity: Connect, Object: PC-9950
  Host: PC-9950, Activity: File Open, Object: R:\WTC0699\VMMYIW17.pdf
  Host: PC-9950, Activity: File Open, Object: R:\E5S5QBXU.doc
  Host: PC-9950, Activity: Disconnect, Object: PC-9950
  Host: PC-9950, Activity: Connect, Object: PC-9950
  Host: PC-9950, Activity: File Open, Object: R:

In [10]:
# Check attributes of a specific node in the activity graph
node_id = '{E4U3-S2ED81TV-8881XRGN}'
if node_id in activity_graph.nodes:
    print(f"Attributes of node {node_id}: {activity_graph.nodes[node_id]}")
else:
    print(f"Node {node_id} not found in activity_graph")

Attributes of node {E4U3-S2ED81TV-8881XRGN}: {'type': 'file', 'user': 'WTC0699', 'obj': 'R:\\JCE2TLZ7.doc', 'activity': 'File Write', 'timestamp': 1262592342.0, 'host': 'PC-9950'}


In [11]:
# Print the number of nodes for PC-9950 on the first day (day 0)
if host_to_nodes[0] is not None and 'PC-9950' in host_to_nodes[0]:
    print(f"Number of nodes for PC-9950 on day 0: {len(host_to_nodes[0]['PC-9950'])}")
else:
    print("No nodes for PC-9950 on day 0")

# Print the number of nodes for PC-9950 on the second day (day 1)
if host_to_nodes[1] is not None and 'PC-9950' in host_to_nodes[1]:
    print(f"Number of nodes for PC-9950 on day 1: {len(host_to_nodes[1]['PC-9950'])}")
else:
    print("No nodes for PC-9950 on day 1")

# Collect and print up to 10 host IDs from the first day
host_ids = []
if host_to_nodes[0] is not None:
    host_ids = list(host_to_nodes[0].keys())
    print(f"First 10 hosts on day 0: {host_ids[:10]}")
else:
    print("No hosts on day 0")

Number of nodes for PC-9950 on day 0: 42
Number of nodes for PC-9950 on day 1: 3
First 10 hosts on day 0: ['PC-3971', 'PC-4302', 'PC-0765', 'PC-2009', 'PC-2320', 'PC-9155', 'PC-1262', 'PC-9843', 'PC-1713', 'PC-5507']


In [12]:
# Collect unique activity types for a specific host (PC-9950) across all days
host = 'PC-9950'
activity_set = set()

for day_idx, day_activity in enumerate(host_to_nodes):
    if day_activity is not None and host in day_activity:
        for node_id in day_activity[host]:
            activity_set.add(activity_graph.nodes[node_id]['activity'])

print(f"Unique activities for {host} across all days: {activity_set}")

Unique activities for PC-9950 across all days: {'Logoff', 'File Open', 'Connect', 'Disconnect', 'Logon', 'File Write'}


In [13]:
# Access attributes of the first node for PC-9950 on day 0
host = 'PC-9950'
if host_to_nodes[0] is not None and host in host_to_nodes[0] and host_to_nodes[0][host]:
    first_node_id = host_to_nodes[0][host][0]
    if first_node_id in activity_graph.nodes:
        print(f"Attributes of node {first_node_id}: {activity_graph.nodes[first_node_id]}")
    else:
        print(f"Node {first_node_id} not found in activity_graph")
else:
    print(f"No nodes for {host} on day 0")

# Optionally, print the list of node IDs for PC-9950 on day 0 (uncommented version of the commented line)
# if host_to_nodes[0] is not None and host in host_to_nodes[0]:
#     print(f"Node IDs for {host} on day 0: {host_to_nodes[0][host]}")
# else:
#     print(f"No nodes for {host} on day 0")

Attributes of node {Y3E5-O5JH43YP-2140MCDI}: {'type': 'logon', 'user': 'WTC0699', 'obj': 'PC-9950', 'activity': 'Logon', 'timestamp': 1262502000.0, 'host': 'PC-9950'}


In [14]:
# Initialize a list of sublists
a = [[1, 2], [3, 4]]

# Modify the second element of each sublist to 555
for sublist in a:
    if len(sublist) > 1:  # Ensure the sublist has an index 1
        sublist[1] = 555
    else:
        print(f"Warning: Sublist {sublist} is too short to modify index 1")

print(f"Modified list: {a}")

Modified list: [[1, 555], [3, 555]]


In [15]:
# Initialize a list of integers
a = [1, 2, 3, 4, 5, 6, 7]

# Keep only odd numbers
a = [num for num in a if num % 2 != 0]

print(f"List after removing even numbers: {a}")

List after removing even numbers: [1, 3, 5, 7]
