In [10]:
# 🔹 LOAD LOGS (Step 1 of your SIEM detection notebook)

import pandas as pd

# Load the generated authentication logs into a pandas DataFrame
df = pd.read_csv("../data/auth_logs.csv")

# Show the first few rows for inspection
df.head()


Unnamed: 0,timestamp,username,source_ip,status,location
0,2025-07-21 03:30:00,hr_user,172.16.0.14,SUCCESS,United Kingdom
1,2025-07-21 08:00:10,root,192.168.1.18,FAIL,China
2,2025-07-21 08:00:13,admin,212.45.99.5,FAIL,Russia
3,2025-07-21 08:00:24,charlie,102.133.9.88,FAIL,Brazil
4,2025-07-21 08:00:37,david,192.168.1.10,FAIL,Germany


In [11]:
# After loading df and showing df.head()
df['timestamp'] = pd.to_datetime(df['timestamp'])
df_clean = df.copy()   # keep a pristine copy for clean runs


In [12]:
# DETECTION LOGIC: Brute-force login alert (5+ FAILs from same IP in 60 seconds)

from datetime import timedelta

# Convert timestamp to proper datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Filter only failed login attempts
failed_logins = df[df['status'] == "FAIL"].copy()

# Sort logs by source IP and timestamp
failed_logins = failed_logins.sort_values(by=['source_ip', 'timestamp'])

# Detection logic
alerts = []

for ip in failed_logins['source_ip'].unique():
    ip_df = failed_logins[failed_logins['source_ip'] == ip]
    timestamps = ip_df['timestamp'].tolist()

    for i in range(len(timestamps)):
        start = timestamps[i]
        window = [t for t in timestamps if 0 <= (t - start).total_seconds() <= 60]

        if len(window) >= 5:
            alerts.append({
                'source_ip': ip,
                'fail_count': len(window),
                'start_time': start,
                'end_time': window[-1]
            })
            break  # only flag first brute-force window per IP

# Show results
if alerts:
    print("🚨 Brute-force login attempts detected:")
    for alert in alerts:
        print(alert)
else:
    print("✅ No brute-force behaviour detected.")


✅ No brute-force behaviour detected.


In [13]:
# Inject IOC rows that simulate 5 fails from one IP in under 60 seconds
df = df_clean.copy()

ioc_rows = [
    ['2025-07-21 09:15:00', 'alice', '192.168.1.99', 'FAIL', 'United Kingdom'],
    ['2025-07-21 09:15:12', 'alice', '192.168.1.99', 'FAIL', 'United Kingdom'],
    ['2025-07-21 09:15:24', 'alice', '192.168.1.99', 'FAIL', 'United Kingdom'],
    ['2025-07-21 09:15:36', 'alice', '192.168.1.99', 'FAIL', 'United Kingdom'],
    ['2025-07-21 09:15:48', 'alice', '192.168.1.99', 'FAIL', 'United Kingdom'],
]
df_ioc = pd.DataFrame(ioc_rows, columns=['timestamp','username','source_ip','status','location'])
df_ioc['timestamp'] = pd.to_datetime(df_ioc['timestamp'])

df = pd.concat([df, df_ioc], ignore_index=True).sort_values('timestamp')
print("IOC injected for Rule 1")


IOC injected for Rule 1


In [23]:
from datetime import timedelta

#  Step 1: Ensure timestamps are in proper datetime format
# This is required so we can do time-based calculations (like comparing minutes)
df['timestamp'] = pd.to_datetime(df['timestamp'])

#  Step 2: Sort the log data first by IP, then by timestamp
# This helps us analyse login activity in correct time order for each IP
df = df.sort_values(by=['source_ip', 'timestamp'])

#  Step 3: Loop through each unique IP address found in the logs
# This allows us to examine login behaviour for each IP one at a time
for ip in df['source_ip'].unique():
    
    # Isolate only the log entries related to this specific IP
    ip_logs = df[df['source_ip'] == ip]

    #  Step 4: Loop through each login attempt from this IP
    # We’ll use each timestamp to look ahead 60 seconds and see what other login attempts happened
    for row_index, row_data in ip_logs.iterrows():
        
        #  Get the time of the current login attempt
        timestamp = row_data["timestamp"]
        
        # 🪜 Define the end of our 1-minute sliding window
        # timedelta lets us add a precise amount of time (1 minute) to the current timestamp
        window_end = timestamp + timedelta(minutes=1)

        #  Step 5: Get all login attempts from this IP within the next 60 seconds
        # This helps us check if the attacker is trying multiple usernames in a short burst
        window_logs = ip_logs[
            (ip_logs['timestamp'] >= timestamp) &
            (ip_logs['timestamp'] <= window_end)
        ]

        #  Step 6: Count how many **different usernames** were attempted in that 1-minute window
        # .nunique() = “number of unique values” — it filters out repeats
        unique_usernames = window_logs['username'].nunique()

        #  Step 7: Raise an alert if 5 or more usernames were attempted — likely a password spray
        if unique_usernames >= 5:
            print(f"Password spraying detected from IP {ip} at {timestamp}")

            #  Print the list of usernames that were tried during this attack window
            # .tolist() turns a pandas Series into a plain Python list (for cleaner output)
            print(f"Usernames tried: {window_logs['username'].tolist()}")
            print("-" * 50)  # Divider for readability
            alert_triggered = True


#Print a confirmation message if no alerts were triggered
if 'alert_triggered' not in locals():
            print("✅ No password spraying activity detected in this dataset.")


Password spraying detected from IP 10.0.0.44 at 2025-07-21 05:00:00
Usernames tried: ['alpha', 'bravo', 'charlie', 'delta', 'echo']
--------------------------------------------------


In [20]:
# IOC injection for Rule 2 (Password Spraying) 
from datetime import timedelta

# Work on a copy of the clean dataset
df_ioc = df_clean.copy()

# Choose a time anchor that’s safely inside your data range
anchor = df_ioc['timestamp'].min() + timedelta(minutes=90)

spray_ip = '10.0.0.44'   # reuse an internal/VPN-style IP you’ve used elsewhere
usernames = ['alpha', 'bravo', 'charlie', 'delta', 'echo']  # 5 distinct usernames

ioc_rows = []
for i, u in enumerate(usernames):
    ioc_rows.append({
        'timestamp': (anchor + timedelta(seconds=10*i)).strftime('%Y-%m-%d %H:%M:%S'),
        'username': u,
        'source_ip': spray_ip,
        'status': 'FAIL',
        'location': 'Brazil'
    })

df_ioc = pd.concat([df_ioc, pd.DataFrame(ioc_rows)], ignore_index=True)
df_ioc['timestamp'] = pd.to_datetime(df_ioc['timestamp'])
df_ioc = df_ioc.sort_values(['source_ip', 'timestamp'])

print("IOC injected for Rule 2 (password spray) → 5 FAILs, 5 different usernames, 60s window.")
df_ioc.tail(7)


IOC injected for Rule 2 (password spray) → 5 FAILs, 5 different usernames, 60s window.


Unnamed: 0,timestamp,username,source_ip,status,location
87,2025-07-21 09:02:45,support,212.45.99.5,FAIL,United Kingdom
141,2025-07-21 09:52:10,charlie,212.45.99.5,FAIL,India
45,2025-07-21 08:30:57,bob,85.111.23.6,SUCCESS,Russia
46,2025-07-21 08:31:37,finance_user,85.111.23.6,FAIL,China
61,2025-07-21 08:46:01,bob,85.111.23.6,SUCCESS,Germany
108,2025-07-21 09:21:11,bob,85.111.23.6,FAIL,China
109,2025-07-21 09:22:41,eve,85.111.23.6,FAIL,Brazil


In [19]:
# sanity check: do we actually have 5 FAILs from the same IP within 60s?
spray_ip = '10.0.0.44'
df_ioc[df_ioc['source_ip'] == spray_ip].sort_values('timestamp').head(10)


Unnamed: 0,timestamp,username,source_ip,status,location
150,2025-07-21 05:00:00,alpha,10.0.0.44,FAIL,Brazil
151,2025-07-21 05:00:10,bravo,10.0.0.44,FAIL,Brazil
152,2025-07-21 05:00:20,charlie,10.0.0.44,FAIL,Brazil
153,2025-07-21 05:00:30,delta,10.0.0.44,FAIL,Brazil
154,2025-07-21 05:00:40,echo,10.0.0.44,FAIL,Brazil
29,2025-07-21 08:16:11,bob,10.0.0.44,FAIL,Brazil
35,2025-07-21 08:22:29,david,10.0.0.44,FAIL,Brazil
38,2025-07-21 08:24:25,charlie,10.0.0.44,SUCCESS,Brazil
56,2025-07-21 08:39:48,bob,10.0.0.44,SUCCESS,India
64,2025-07-21 08:50:00,eve,10.0.0.44,SUCCESS,India


In [22]:
# IMPORTANT: point the detection logic at the IOC-augmented dataset
df = df_ioc.copy()


In [16]:
#  DETECTION LOGIC: Success After Multiple Failures from the Same IP

from datetime import timedelta  # Used to calculate time windows (e.g. "last 10 minutes")

# STEP 1: Separate SUCCESS and FAIL login attempts
# Extract all successful login events from the DataFrame
success_logs = df[df['status'] == "SUCCESS"]

# Extract all failed login events from the DataFrame
fail_logs = df[df['status'] == "FAIL"]

# STEP 2: Sort logs by IP address and timestamp
# Sorting is critical for time-window analysis so that we
# compare events in chronological order per IP address

success_logs = success_logs.sort_values(by=['source_ip', 'timestamp'])
fail_logs = fail_logs.sort_values(by=['source_ip', 'timestamp'])

# STEP 3: Create an empty list to store any alerts triggered
success_after_fail_alerts = []  # This will store dictionaries for each detected event

# STEP 4: For each IP that had a successful login, look backward
# Loop through each unique IP address that had at least one SUCCESS login
for ip in success_logs['source_ip'].unique():

    # Filter successful login events for the current IP
    ip_successes = success_logs[success_logs['source_ip'] == ip]

    # Filter failed login events for the same IP
    ip_fails = fail_logs[fail_logs['source_ip'] == ip]

    # Loop through every successful login attempt for this IP
    for index_row, sucess_row_data in ip_successes.iterrows(): # Every loop it indexes the current row in idx + creates a mini table called success_row that holds the data in the one row
        success_time = success_row['timestamp']     # Timestamp of successful login
        username = success_row['username']          # Username that logged in

        # STEP 5: Look for failed logins within 10 minutes before the success
        # Calculate the beginning of the 10-minute window before the success
        window_start = success_time - timedelta(minutes=10)

        # Filter failed logins that occurred between window_start and success_time
        recent_fails = ip_fails[
            (ip_fails['timestamp'] >= window_start) &
            (ip_fails['timestamp'] < success_time)
        ]

        # If there are 3 or more failures in that 10-minute window, we trigger an alert
        if len(recent_fails) >= 3:
            success_after_fail_alerts.append({
                'source_ip': ip,                           # Attacker IP
                'username': username,                      # Account that logged in
                'fail_count': len(recent_fails),           # Number of failed attempts
                'fail_window_start': window_start,         # When the fail streak began
                'success_time': success_time               # When login was successful
            })

# STEP 6: Print results in a readable format
if success_after_fail_alerts:
    print(" Success-after-failure alerts:\n")
    for alert in success_after_fail_alerts:
        print(alert)
else:
    print("✅ No success-after-failure activity detected.")


✅ No success-after-failure activity detected.


In [17]:
# IOC injection for Rule 3 (Success after Failures)
from datetime import timedelta
import pandas as pd

# Work on a copy of the clean dataset
df_ioc3 = df_clean.copy()

# Choose a safe time anchor inside your data range (far from other tests)
anchor = df_ioc3['timestamp'].min() + timedelta(minutes=150)

# One IP repeatedly fails, then succeeds on the same account within 10 minutes
ip = '192.168.1.5'
user = 'alice'
loc = 'United Kingdom'

ioc3_rows = [
    {'timestamp': (anchor + timedelta(minutes=-9)).strftime('%Y-%m-%d %H:%M:%S'),
     'username': user, 'source_ip': ip, 'status': 'FAIL', 'location': loc},
    {'timestamp': (anchor + timedelta(minutes=-7)).strftime('%Y-%m-%d %H:%M:%S'),
     'username': user, 'source_ip': ip, 'status': 'FAIL', 'location': loc},
    {'timestamp': (anchor + timedelta(minutes=-3)).strftime('%Y-%m-%d %H:%M:%S'),
     'username': user, 'source_ip': ip, 'status': 'FAIL', 'location': loc},
    {'timestamp': (anchor + timedelta(minutes=0)).strftime('%Y-%m-%d %H:%M:%S'),
     'username': user, 'source_ip': ip, 'status': 'SUCCESS', 'location': loc},
]

df_ioc3 = pd.concat([df_ioc3, pd.DataFrame(ioc3_rows)], ignore_index=True)
df_ioc3['timestamp'] = pd.to_datetime(df_ioc3['timestamp'])
df_ioc3 = df_ioc3.sort_values(['source_ip', 'timestamp'])

print('IOC injected for Rule 3')
df_ioc3.tail(10)


IOC injected for Rule 3


Unnamed: 0,timestamp,username,source_ip,status,location
143,2025-07-21 09:53:38,eve,192.168.1.9,FAIL,India
2,2025-07-21 08:00:13,admin,212.45.99.5,FAIL,Russia
82,2025-07-21 09:01:22,bob,212.45.99.5,FAIL,United States
87,2025-07-21 09:02:45,support,212.45.99.5,FAIL,United Kingdom
141,2025-07-21 09:52:10,charlie,212.45.99.5,FAIL,India
45,2025-07-21 08:30:57,bob,85.111.23.6,SUCCESS,Russia
46,2025-07-21 08:31:37,finance_user,85.111.23.6,FAIL,China
61,2025-07-21 08:46:01,bob,85.111.23.6,SUCCESS,Germany
108,2025-07-21 09:21:11,bob,85.111.23.6,FAIL,China
109,2025-07-21 09:22:41,eve,85.111.23.6,FAIL,Brazil


In [None]:
for row in df.iterrows():
    print(row)
    break


In [8]:
# DETECTION LOGIC: Success after multiple failures from the same IP (within 10 minutes)

from datetime import timedelta

# Safety: work from the main df (not df_clean) so reruns reflect new rows
df['timestamp'] = pd.to_datetime(df['timestamp'])
df_sorted = df.sort_values(['source_ip', 'timestamp'])

alerts_rule3 = []

# Split once for clarity
fails = df_sorted[df_sorted['status'] == 'FAIL']
successes = df_sorted[df_sorted['status'] == 'SUCCESS']

for ip, ip_success in successes.groupby('source_ip'):
    ip_fails = fails[fails['source_ip'] == ip]

    # iterate rows properly so success_row exists
    for _, success_row in ip_success.iterrows():
        success_time = success_row['timestamp']
        username = success_row['username']

        # 10-minute lookback
        window_start = success_time - timedelta(minutes=10)

        # failures strictly before the success within the window
        recent_fails = ip_fails[
            (ip_fails['timestamp'] >= window_start) &
            (ip_fails['timestamp'] < success_time)
        ]

        if len(recent_fails) >= 3:
            alerts_rule3.append({
                'source_ip': ip,
                'username': username,
                'fail_count': len(recent_fails),
                'fail_window_start': recent_fails['timestamp'].min(),
                'success_time': success_time
            })
            break  # avoid duplicate alerts per IP

# Output
if alerts_rule3:
    print("Success-after-failure alerts:\n")
    for a in alerts_rule3:
        print(a)
else:
    print("✅ No success-after-failure activity detected.")


✅ No success-after-failure activity detected.


In [9]:
# IOC injection for Rule 3 (success after failures)
from datetime import timedelta
import pandas as pd

# Work on a fresh copy so we never lose the original dataset
df_ioc = df_clean.copy()

# Anchor inside your dataset’s time range
anchor = df_ioc['timestamp'].min() + timedelta(minutes=60)

ip = '212.45.99.5'     # reuse an IP from your dataset for realism
user = 'charlie'       # same user later succeeds
loc  = 'United Kingdom'

ioc_rows = [
    {'timestamp': (anchor + timedelta(minutes= -9, seconds= 0 )).strftime('%Y-%m-%d %H:%M:%S'), 'username': user, 'source_ip': ip, 'status': 'FAIL',    'location': loc},
    {'timestamp': (anchor + timedelta(minutes= -8, seconds=30 )).strftime('%Y-%m-%d %H:%M:%S'), 'username': user, 'source_ip': ip, 'status': 'FAIL',    'location': loc},
    {'timestamp': (anchor + timedelta(minutes= -3, seconds=15 )).strftime('%Y-%m-%d %H:%M:%S'), 'username': user, 'source_ip': ip, 'status': 'FAIL',    'location': loc},
    {'timestamp': (anchor + timedelta(minutes=  0, seconds= 0 )).strftime('%Y-%m-%d %H:%M:%S'), 'username': user, 'source_ip': ip, 'status': 'SUCCESS', 'location': loc},
]

df_ioc = pd.concat([df_ioc, pd.DataFrame(ioc_rows)], ignore_index=True)
df_ioc['timestamp'] = pd.to_datetime(df_ioc['timestamp'])
df = df_ioc.sort_values(['source_ip', 'timestamp'])  # replace working df for rerun

print("IOC injected for Rule 3")
df.tail(7)


IOC injected for Rule 3


Unnamed: 0,timestamp,username,source_ip,status,location
87,2025-07-21 09:02:45,support,212.45.99.5,FAIL,United Kingdom
141,2025-07-21 09:52:10,charlie,212.45.99.5,FAIL,India
45,2025-07-21 08:30:57,bob,85.111.23.6,SUCCESS,Russia
46,2025-07-21 08:31:37,finance_user,85.111.23.6,FAIL,China
61,2025-07-21 08:46:01,bob,85.111.23.6,SUCCESS,Germany
108,2025-07-21 09:21:11,bob,85.111.23.6,FAIL,China
109,2025-07-21 09:22:41,eve,85.111.23.6,FAIL,Brazil
