In [1]:
import pandas as pd

# Load auth logs
df = pd.read_csv("../data/auth_logs.csv")
df.head()


Unnamed: 0,timestamp,username,source_ip,status,location
0,2025-07-21 10:00:00,alice,192.168.1.5,FAIL,United Kingdom
1,2025-07-21 10:00:10,alice,192.168.1.5,FAIL,United Kingdom
2,2025-07-21 10:00:20,alice,192.168.1.5,FAIL,United Kingdom
3,2025-07-21 10:00:30,alice,192.168.1.5,FAIL,United Kingdom
4,2025-07-21 10:00:40,alice,192.168.1.5,FAIL,United Kingdom


In [8]:
# 🔹 LOAD LOGS (Step 1 of your SIEM detection notebook)

import pandas as pd

# Load the generated authentication logs into a pandas DataFrame
df = pd.read_csv("../data/auth_logs.csv")

# Show the first few rows for inspection
df.head()


Unnamed: 0,timestamp,username,source_ip,status,location
0,2025-07-21 03:30:00,hr_user,172.16.0.14,SUCCESS,United Kingdom
1,2025-07-21 08:00:10,root,192.168.1.18,FAIL,China
2,2025-07-21 08:00:13,admin,212.45.99.5,FAIL,Russia
3,2025-07-21 08:00:24,charlie,102.133.9.88,FAIL,Brazil
4,2025-07-21 08:00:37,david,192.168.1.10,FAIL,Germany


In [9]:
# 🔸 DETECTION LOGIC: Brute-force login alert (5+ FAILs from same IP in 60 seconds)

from datetime import timedelta

# Convert timestamp to proper datetime format
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Filter only failed login attempts
failed_logins = df[df['status'] == "FAIL"].copy()

# Sort logs by source IP and timestamp
failed_logins = failed_logins.sort_values(by=['source_ip', 'timestamp'])

# Detection logic
alerts = []

for ip in failed_logins['source_ip'].unique():
    ip_df = failed_logins[failed_logins['source_ip'] == ip]
    timestamps = ip_df['timestamp'].tolist()

    for i in range(len(timestamps)):
        start = timestamps[i]
        window = [t for t in timestamps if 0 <= (t - start).total_seconds() <= 60]

        if len(window) >= 5:
            alerts.append({
                'source_ip': ip,
                'fail_count': len(window),
                'start_time': start,
                'end_time': window[-1]
            })
            break  # only flag first brute-force window per IP

# Show results
if alerts:
    print("🚨 Brute-force login attempts detected:")
    for alert in alerts:
        print(alert)
else:
    print("✅ No brute-force behaviour detected.")


✅ No brute-force behaviour detected.


In [10]:
df[df['source_ip'] == '192.168.1.5']


Unnamed: 0,timestamp,username,source_ip,status,location
32,2025-07-21 08:20:39,alice,192.168.1.5,FAIL,United Kingdom
48,2025-07-21 08:32:26,alice,192.168.1.5,SUCCESS,United Kingdom
69,2025-07-21 08:54:50,alice,192.168.1.5,FAIL,United Kingdom
70,2025-07-21 08:55:00,alice,192.168.1.5,FAIL,United Kingdom
102,2025-07-21 09:12:52,charlie,192.168.1.5,FAIL,Germany
104,2025-07-21 09:16:23,alice,192.168.1.5,FAIL,United Kingdom
127,2025-07-21 09:39:09,alice,192.168.1.5,FAIL,United Kingdom
139,2025-07-21 09:51:43,david,192.168.1.5,FAIL,United Kingdom
145,2025-07-21 09:55:07,admin,192.168.1.5,FAIL,United Kingdom
