In [3]:
import pandas as pd

# Load Windows log file
df = pd.read_csv('../data/windows_logs.csv')

# Preview first few rows
df.head()


Unnamed: 0,timestamp,event_id,event_type,username,host,logon_type,process_name,privilege_used
0,2025-07-21 11:00:00,4625,Logon Failure,guest,WORKSTATION-01,RemoteDesktop,cmd.exe,
1,2025-07-21 11:00:13,4688,Process Created,admin,WORKSTATION-01,Network,cmd.exe,
2,2025-07-21 11:00:30,4625,Logon Failure,alice,FINANCE-LAPTOP,Interactive,cmd.exe,
3,2025-07-21 11:00:54,4672,Privilege Escalation,guest,SERVER-02,RemoteDesktop,chrome.exe,SeDebugPrivilege
4,2025-07-21 11:01:12,4672,Privilege Escalation,guest,WORKSTATION-01,Network,explorer.exe,SeDebugPrivilege


In [4]:
# Fill every row with a placeholder parent process ('explorer.exe') just for detection testing
df['parent_process'] = ['explorer.exe'] * len(df)


In [5]:
# Lowercase both process name columns for safe comparison
df['process_name'] = df['process_name'].str.lower()
df['parent_process'] = df['parent_process'].str.lower()


In [12]:
# Define suspicious parent and child processes
suspicious_parents = ['explorer.exe', 'winword.exe', 'outlook.exe']
suspicious_children = ['powershell.exe', 'cmd.exe', 'wscript.exe', 'mshta.exe', 'certutil.exe']

# Filter the DataFrame
suspicious_exec = df[
    (df['parent_process'].isin(suspicious_parents)) &
    (df['process_name'].isin(suspicious_children))
]


In [13]:
# Show detections if found
if not suspicious_exec.empty:
    print("🚩 Suspicious parent-child process executions detected:")
    display(suspicious_exec[['timestamp', 'parent_process', 'process_name', 'username', 'host']])
else:
    print("✅ No suspicious parent-child executions detected in this dataset.")


🚩 Suspicious parent-child process executions detected:


Unnamed: 0,timestamp,parent_process,process_name,username,host
0,2025-07-21 11:00:00,explorer.exe,cmd.exe,guest,WORKSTATION-01
1,2025-07-21 11:00:13,explorer.exe,cmd.exe,admin,WORKSTATION-01
2,2025-07-21 11:00:30,explorer.exe,cmd.exe,alice,FINANCE-LAPTOP
7,2025-07-21 11:03:30,explorer.exe,cmd.exe,guest,FINANCE-LAPTOP
13,2025-07-21 11:02:49,explorer.exe,powershell.exe,charlie,FINANCE-LAPTOP
16,2025-07-21 11:03:28,explorer.exe,cmd.exe,guest,WORKSTATION-01
22,2025-07-21 11:10:16,explorer.exe,cmd.exe,alice,WORKSTATION-01
29,2025-07-21 11:13:03,explorer.exe,powershell.exe,admin,FINANCE-LAPTOP
33,2025-07-21 11:08:15,explorer.exe,powershell.exe,guest,SERVER-02
38,2025-07-21 11:06:20,explorer.exe,powershell.exe,admin,WORKSTATION-01


In [3]:
# Filter only failed logon attempts (event ID 4625)
failed_logins = df[df['event_id'] == 4625].copy()
failed_logins.head()


Unnamed: 0,timestamp,event_id,event_type,username,host,logon_type,process_name,privilege_used
0,2025-07-21 11:00:00,4625,Logon Failure,guest,WORKSTATION-01,RemoteDesktop,cmd.exe,
2,2025-07-21 11:00:30,4625,Logon Failure,alice,FINANCE-LAPTOP,Interactive,cmd.exe,
8,2025-07-21 11:01:28,4625,Logon Failure,alice,HR-PC,RemoteDesktop,explorer.exe,
10,2025-07-21 11:02:20,4625,Logon Failure,bob,WORKSTATION-01,Network,mimikatz.exe,
13,2025-07-21 11:02:49,4625,Logon Failure,charlie,FINANCE-LAPTOP,Interactive,powershell.exe,


In [4]:
import pandas as pd
from datetime import timedelta

# Load the dataset
df = pd.read_csv('../data/windows_logs.csv')

# Filter only failed login attempts (Event ID 4625)
failed_logins = df[df['event_id'] == 4625].copy()

# Convert timestamp column to datetime and sort by time
failed_logins['timestamp'] = pd.to_datetime(failed_logins['timestamp'])
failed_logins = failed_logins.sort_values(by='timestamp')

# Detection parameters
threshold_attempts = 5
time_window = timedelta(minutes=2)
suspicious_windows = []


In [5]:
# Group by host and look for repeated failed logins within 2-minute windows
for host, group in failed_logins.groupby('host'):
    group = group.sort_values('timestamp').reset_index(drop=True)
    
    for i in range(len(group)):
        window_start = group.loc[i, 'timestamp']
        window_end = window_start + time_window
        
        window_group = group[(group['timestamp'] >= window_start) & (group['timestamp'] <= window_end)]
        
        if len(window_group) >= threshold_attempts:
            suspicious_windows.append(window_group)
            break



In [6]:
# Display suspicious failed login clusters
if suspicious_windows:
    result_df = pd.concat(suspicious_windows).drop_duplicates()
    print("🚩 Multiple failed login attempts detected from same host:")
    display(result_df[['timestamp', 'event_type', 'username', 'host']])
else:
    print("✅ No suspicious login patterns detected in this dataset.")


✅ No suspicious login patterns detected in this dataset.


In [6]:
# Rule 3 – Detect suspicious use of privilege escalation (Event ID 4672)

# Filter only privileged logon events
priv_escalation = df[df['event_id'] == 4672].copy()

# Convert timestamp
priv_escalation['timestamp'] = pd.to_datetime(priv_escalation['timestamp'])

# Define suspicious usernames and hostnames (can customise)
suspicious_users = ['guest', 'test', 'admin', 'svc_account']
suspicious_hosts = ['FINANCE-LAPTOP', 'DEV-01', 'HR-PC']

# Apply filters
suspicious_events = priv_escalation[
    (priv_escalation['username'].str.lower().isin(suspicious_users)) |
    (priv_escalation['host'].isin(suspicious_hosts))
]

# Display the results
if not suspicious_events.empty:
    print("🚩 Suspicious privilege escalation events detected:")
    display(suspicious_events[['timestamp', 'event_type', 'username', 'host', 'privilege_used']])
else:
    print("✅ No suspicious privilege escalation detected.")


🚩 Suspicious privilege escalation events detected:


Unnamed: 0,timestamp,event_type,username,host,privilege_used
3,2025-07-21 11:00:54,Privilege Escalation,guest,SERVER-02,SeDebugPrivilege
4,2025-07-21 11:01:12,Privilege Escalation,guest,WORKSTATION-01,SeDebugPrivilege
9,2025-07-21 11:03:27,Privilege Escalation,guest,WORKSTATION-01,SeDebugPrivilege
16,2025-07-21 11:03:28,Privilege Escalation,guest,WORKSTATION-01,SeDebugPrivilege
20,2025-07-21 11:07:40,Privilege Escalation,guest,WORKSTATION-01,SeDebugPrivilege
28,2025-07-21 11:13:32,Privilege Escalation,guest,HR-PC,SeDebugPrivilege
37,2025-07-21 11:18:30,Privilege Escalation,guest,FINANCE-LAPTOP,SeDebugPrivilege
38,2025-07-21 11:06:20,Privilege Escalation,admin,WORKSTATION-01,SeDebugPrivilege
45,2025-07-21 11:17:15,Privilege Escalation,bob,HR-PC,SeDebugPrivilege
50,2025-07-21 11:21:40,Privilege Escalation,guest,FINANCE-LAPTOP,SeDebugPrivilege
