In [8]:
# Step 1 - load Windows logs
import pandas as pd

df = pd.read_csv('../data/windows_logs.csv')
df.head()


Unnamed: 0,timestamp,event_id,event_type,username,host,logon_type,process_name,privilege_used
0,2025-07-21 11:00:00,4625,Logon Failure,guest,WORKSTATION-01,RemoteDesktop,cmd.exe,
1,2025-07-21 11:00:13,4688,Process Created,admin,WORKSTATION-01,Network,cmd.exe,
2,2025-07-21 11:00:30,4625,Logon Failure,alice,FINANCE-LAPTOP,Interactive,cmd.exe,
3,2025-07-21 11:00:54,4672,Privilege Escalation,guest,SERVER-02,RemoteDesktop,chrome.exe,SeDebugPrivilege
4,2025-07-21 11:01:12,4672,Privilege Escalation,guest,WORKSTATION-01,Network,explorer.exe,SeDebugPrivilege


In [9]:
# Step 2 - prep columns and keep a pristine copy
df['timestamp'] = pd.to_datetime(df['timestamp'])
df_clean = df.copy()  # keep for IOC injections and clean re-runs


In [10]:
# Rule 1 - suspicious parent-child process execution

# if your dataset does not have a parent_process field, simulate one for testing
if 'parent_process' not in df.columns:
    df['parent_process'] = ['explorer.exe'] * len(df)

# lower case for safe matching
df['process_name'] = df['process_name'].astype(str).str.lower()
df['parent_process'] = df['parent_process'].astype(str).str.lower()

suspicious_parents = ['explorer.exe', 'winword.exe', 'outlook.exe']
suspicious_children = ['powershell.exe', 'cmd.exe', 'wscript.exe', 'mshta.exe', 'certutil.exe']

suspicious_exec = df[
    df['parent_process'].isin(suspicious_parents) &
    df['process_name'].isin(suspicious_children)
]

if not suspicious_exec.empty:
    print('⚑ suspicious parent-child process executions detected:')
    display(suspicious_exec[['timestamp', 'parent_process', 'process_name', 'username', 'host']])
else:
    print('✅ no suspicious parent-child executions detected in this dataset.')


⚑ suspicious parent-child process executions detected:


Unnamed: 0,timestamp,parent_process,process_name,username,host
0,2025-07-21 11:00:00,explorer.exe,cmd.exe,guest,WORKSTATION-01
1,2025-07-21 11:00:13,explorer.exe,cmd.exe,admin,WORKSTATION-01
2,2025-07-21 11:00:30,explorer.exe,cmd.exe,alice,FINANCE-LAPTOP
7,2025-07-21 11:03:30,explorer.exe,cmd.exe,guest,FINANCE-LAPTOP
13,2025-07-21 11:02:49,explorer.exe,powershell.exe,charlie,FINANCE-LAPTOP
16,2025-07-21 11:03:28,explorer.exe,cmd.exe,guest,WORKSTATION-01
22,2025-07-21 11:10:16,explorer.exe,cmd.exe,alice,WORKSTATION-01
29,2025-07-21 11:13:03,explorer.exe,powershell.exe,admin,FINANCE-LAPTOP
33,2025-07-21 11:08:15,explorer.exe,powershell.exe,guest,SERVER-02
38,2025-07-21 11:06:20,explorer.exe,powershell.exe,admin,WORKSTATION-01


In [21]:
# Rule 2 - repeated failed logins from the same host (clean run)

from datetime import timedelta

failed_logins = df[df['event_id'] == 4625].copy()
failed_logins['timestamp'] = pd.to_datetime(failed_logins['timestamp'])
failed_logins = failed_logins.sort_values(by='timestamp')

threshold_attempts = 5
time_window = timedelta(minutes=2)
suspicious_windows = []

for host, group in failed_logins.groupby('host'):
    group = group.sort_values('timestamp').reset_index(drop=True)

    for i in range(len(group)):
        window_start = group.loc[i, 'timestamp']
        window_end = window_start + time_window

        window_group = group[(group['timestamp'] >= window_start) &
                             (group['timestamp'] <= window_end)]

        if len(window_group) >= threshold_attempts:
            suspicious_windows.append(window_group)
            break  # one hit per host is enough for this demo

if suspicious_windows:
    result_df = pd.concat(suspicious_windows).drop_duplicates()
    print('⚑ multiple failed login attempts detected from same host:')
    display(result_df[['timestamp', 'event_type', 'username', 'host']])
else:
    print('✅ no suspicious login patterns detected in this dataset.')


✅ no suspicious login patterns detected in this dataset.


In [17]:
# IOC injection for Rule 2 - create a burst of 5 fails from one host inside 2 minutes

from datetime import timedelta

# work on a copy of the clean dataset
df = df_clean.copy()

# choose an anchor time safely inside your data range
anchor = df['timestamp'].min() + timedelta(minutes=90)

ioc_host = 'WORKSTATION-01'   # reuse a real host from your dataset
ioc_user = 'guest'            # any user is fine for this rule

ioc_rows = [
    {'timestamp': (anchor + timedelta(seconds=0)).strftime('%Y-%m-%d %H:%M:%S'),
     'event_id': 4625, 'event_type': 'Logon Failure', 'username': ioc_user,
     'host': ioc_host, 'logon_type': 'Network', 'process_name': 'cmd.exe', 'privilege_used': None},

    {'timestamp': (anchor + timedelta(seconds=20)).strftime('%Y-%m-%d %H:%M:%S'),
     'event_id': 4625, 'event_type': 'Logon Failure', 'username': ioc_user,
     'host': ioc_host, 'logon_type': 'Network', 'process_name': 'cmd.exe', 'privilege_used': None},

    {'timestamp': (anchor + timedelta(seconds=40)).strftime('%Y-%m-%d %H:%M:%S'),
     'event_id': 4625, 'event_type': 'Logon Failure', 'username': ioc_user,
     'host': ioc_host, 'logon_type': 'Network', 'process_name': 'cmd.exe', 'privilege_used': None},

    {'timestamp': (anchor + timedelta(seconds=60)).strftime('%Y-%m-%d %H:%M:%S'),
     'event_id': 4625, 'event_type': 'Logon Failure', 'username': ioc_user,
     'host': ioc_host, 'logon_type': 'Network', 'process_name': 'cmd.exe', 'privilege_used': None},

    {'timestamp': (anchor + timedelta(seconds=80)).strftime('%Y-%m-%d %H:%M:%S'),
     'event_id': 4625, 'event_type': 'Logon Failure', 'username': ioc_user,
     'host': ioc_host, 'logon_type': 'Network', 'process_name': 'cmd.exe', 'privilege_used': None},
]

df_ioc = pd.concat([df, pd.DataFrame(ioc_rows)], ignore_index=True)
df_ioc['timestamp'] = pd.to_datetime(df_ioc['timestamp'])
df_ioc = df_ioc.sort_values('timestamp')

print('IOC injected for Rule 2 - 5 fails within 2 minutes on', ioc_host)
df_ioc.tail(7)


IOC injected for Rule 2 - 5 fails within 2 minutes on WORKSTATION-01


Unnamed: 0,timestamp,event_id,event_type,username,host,logon_type,process_name,privilege_used
89,2025-07-21 11:44:30,9999,Mimikatz Execution,bob,SERVER-02,Network,svchost.exe,SeDebugPrivilege
92,2025-07-21 11:46:00,4625,Logon Failure,bob,HR-PC,Network,rundll32.exe,
100,2025-07-21 12:30:00,4625,Logon Failure,guest,WORKSTATION-01,Network,cmd.exe,
101,2025-07-21 12:30:20,4625,Logon Failure,guest,WORKSTATION-01,Network,cmd.exe,
102,2025-07-21 12:30:40,4625,Logon Failure,guest,WORKSTATION-01,Network,cmd.exe,
103,2025-07-21 12:31:00,4625,Logon Failure,guest,WORKSTATION-01,Network,cmd.exe,
104,2025-07-21 12:31:20,4625,Logon Failure,guest,WORKSTATION-01,Network,cmd.exe,


In [18]:
# Re-run Rule 2 detection on the IOC dataset

failed_logins = df_ioc[df_ioc['event_id'] == 4625].copy()
failed_logins['timestamp'] = pd.to_datetime(failed_logins['timestamp'])
failed_logins = failed_logins.sort_values(by='timestamp')

threshold_attempts = 5
time_window = timedelta(minutes=2)
suspicious_windows = []

for host, group in failed_logins.groupby('host'):
    group = group.sort_values('timestamp').reset_index(drop=True)

    for i in range(len(group)):
        window_start = group.loc[i, 'timestamp']
        window_end = window_start + time_window

        window_group = group[(group['timestamp'] >= window_start) &
                             (group['timestamp'] <= window_end)]

        if len(window_group) >= threshold_attempts:
            suspicious_windows.append(window_group)
            break

if suspicious_windows:
    result_df = pd.concat(suspicious_windows).drop_duplicates()
    print('⚑ multiple failed login attempts detected from same host (IOC run):')
    display(result_df[['timestamp', 'event_type', 'username', 'host']])
else:
    print('✅ no suspicious login patterns detected.')


⚑ multiple failed login attempts detected from same host (IOC run):


Unnamed: 0,timestamp,event_type,username,host
2,2025-07-21 12:30:00,Logon Failure,guest,WORKSTATION-01
3,2025-07-21 12:30:20,Logon Failure,guest,WORKSTATION-01
4,2025-07-21 12:30:40,Logon Failure,guest,WORKSTATION-01
5,2025-07-21 12:31:00,Logon Failure,guest,WORKSTATION-01
6,2025-07-21 12:31:20,Logon Failure,guest,WORKSTATION-01


In [20]:
result_df_ioc_preview = df_ioc[df_ioc['host'] == ioc_host].sort_values('timestamp').tail(10)
display(result_df_ioc_preview[['timestamp','event_id','event_type','username','host']])


Unnamed: 0,timestamp,event_id,event_type,username,host
90,2025-07-21 11:27:00,4688,Process Created,admin,WORKSTATION-01
62,2025-07-21 11:29:58,4624,Logon Success,admin,WORKSTATION-01
98,2025-07-21 11:31:02,4688,Process Created,bob,WORKSTATION-01
82,2025-07-21 11:31:26,4688,Process Created,charlie,WORKSTATION-01
93,2025-07-21 11:37:12,4672,Privilege Escalation,guest,WORKSTATION-01
100,2025-07-21 12:30:00,4625,Logon Failure,guest,WORKSTATION-01
101,2025-07-21 12:30:20,4625,Logon Failure,guest,WORKSTATION-01
102,2025-07-21 12:30:40,4625,Logon Failure,guest,WORKSTATION-01
103,2025-07-21 12:31:00,4625,Logon Failure,guest,WORKSTATION-01
104,2025-07-21 12:31:20,4625,Logon Failure,guest,WORKSTATION-01


In [19]:
# Rule 3 - suspicious privilege escalation (event id 4672)

priv_escalation = df_clean[df_clean['event_id'] == 4672].copy()
priv_escalation['timestamp'] = pd.to_datetime(priv_escalation['timestamp'])

# tune these to your dataset
suspicious_users = ['guest', 'test', 'admin', 'svc_account']
suspicious_hosts = ['FINANCE-LAPTOP', 'DEV-01', 'HR-PC']

suspicious_events = priv_escalation[
    priv_escalation['username'].str.lower().isin([u.lower() for u in suspicious_users]) |
    priv_escalation['host'].isin(suspicious_hosts)
]

if not suspicious_events.empty:
    print('⚑ suspicious privilege escalation events detected:')
    display(suspicious_events[['timestamp', 'event_type', 'username', 'host', 'privilege_used']])
else:
    print('✅ no suspicious privilege escalation detected.')


⚑ suspicious privilege escalation events detected:


Unnamed: 0,timestamp,event_type,username,host,privilege_used
3,2025-07-21 11:00:54,Privilege Escalation,guest,SERVER-02,SeDebugPrivilege
4,2025-07-21 11:01:12,Privilege Escalation,guest,WORKSTATION-01,SeDebugPrivilege
9,2025-07-21 11:03:27,Privilege Escalation,guest,WORKSTATION-01,SeDebugPrivilege
16,2025-07-21 11:03:28,Privilege Escalation,guest,WORKSTATION-01,SeDebugPrivilege
20,2025-07-21 11:07:40,Privilege Escalation,guest,WORKSTATION-01,SeDebugPrivilege
28,2025-07-21 11:13:32,Privilege Escalation,guest,HR-PC,SeDebugPrivilege
37,2025-07-21 11:18:30,Privilege Escalation,guest,FINANCE-LAPTOP,SeDebugPrivilege
38,2025-07-21 11:06:20,Privilege Escalation,admin,WORKSTATION-01,SeDebugPrivilege
45,2025-07-21 11:17:15,Privilege Escalation,bob,HR-PC,SeDebugPrivilege
50,2025-07-21 11:21:40,Privilege Escalation,guest,FINANCE-LAPTOP,SeDebugPrivilege
