In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

print("Environment ready")


Environment ready


In [2]:
data = [
    ["u001", "2025-01-01 08:01:00", "192.168.1.10", 0],
    ["u001", "2025-01-01 08:02:00", "192.168.1.10", 0],
    ["u001", "2025-01-01 08:03:00", "192.168.1.10", 0],
    ["u001", "2025-01-01 08:04:00", "192.168.1.10", 0],
    ["u001", "2025-01-01 08:05:00", "192.168.1.10", 0],

    ["u002", "2025-01-01 09:10:00", "10.0.0.5", 0],
    ["u002", "2025-01-01 09:11:00", "10.0.0.5", 1],

    ["u999", "2025-01-01 01:00:00", "203.0.113.9", 0],
    ["u999", "2025-01-01 01:01:00", "203.0.113.9", 0],
    ["u999", "2025-01-01 01:02:00", "203.0.113.9", 0],
    ["u999", "2025-01-01 01:03:00", "203.0.113.9", 0],
    ["u999", "2025-01-01 01:04:00", "203.0.113.9", 0],
]

df = pd.DataFrame(
    data,
    columns=["user_id", "timestamp", "ip", "success"]
)

df["timestamp"] = pd.to_datetime(df["timestamp"])
df


Unnamed: 0,user_id,timestamp,ip,success
0,u001,2025-01-01 08:01:00,192.168.1.10,0
1,u001,2025-01-01 08:02:00,192.168.1.10,0
2,u001,2025-01-01 08:03:00,192.168.1.10,0
3,u001,2025-01-01 08:04:00,192.168.1.10,0
4,u001,2025-01-01 08:05:00,192.168.1.10,0
5,u002,2025-01-01 09:10:00,10.0.0.5,0
6,u002,2025-01-01 09:11:00,10.0.0.5,1
7,u999,2025-01-01 01:00:00,203.0.113.9,0
8,u999,2025-01-01 01:01:00,203.0.113.9,0
9,u999,2025-01-01 01:02:00,203.0.113.9,0


In [3]:
failed = df[df["success"] == 0]

failed = failed.sort_values("timestamp")

failed


Unnamed: 0,user_id,timestamp,ip,success
7,u999,2025-01-01 01:00:00,203.0.113.9,0
8,u999,2025-01-01 01:01:00,203.0.113.9,0
9,u999,2025-01-01 01:02:00,203.0.113.9,0
10,u999,2025-01-01 01:03:00,203.0.113.9,0
11,u999,2025-01-01 01:04:00,203.0.113.9,0
0,u001,2025-01-01 08:01:00,192.168.1.10,0
1,u001,2025-01-01 08:02:00,192.168.1.10,0
2,u001,2025-01-01 08:03:00,192.168.1.10,0
3,u001,2025-01-01 08:04:00,192.168.1.10,0
4,u001,2025-01-01 08:05:00,192.168.1.10,0


In [4]:
alerts = []

for (user, ip), group in failed.groupby(["user_id", "ip"]):
    group = group.sort_values("timestamp")
    time_diff = group["timestamp"].diff().dt.total_seconds() / 60

    if (time_diff <= 5).sum() >= 4:
        alerts.append({
            "user_id": user,
            "ip": ip,
            "failed_attempts": len(group),
            "first_attempt": group["timestamp"].min(),
            "last_attempt": group["timestamp"].max()
        })

alerts_df = pd.DataFrame(alerts)
alerts_df


Unnamed: 0,user_id,ip,failed_attempts,first_attempt,last_attempt
0,u001,192.168.1.10,5,2025-01-01 08:01:00,2025-01-01 08:05:00
1,u999,203.0.113.9,5,2025-01-01 01:00:00,2025-01-01 01:04:00


In [5]:
alerts_df["risk_score"] = (
    alerts_df["failed_attempts"] * 10
)

alerts_df


Unnamed: 0,user_id,ip,failed_attempts,first_attempt,last_attempt,risk_score
0,u001,192.168.1.10,5,2025-01-01 08:01:00,2025-01-01 08:05:00,50
1,u999,203.0.113.9,5,2025-01-01 01:00:00,2025-01-01 01:04:00,50


In [6]:
alerts_df.sort_values(by="risk_score", ascending=False)


Unnamed: 0,user_id,ip,failed_attempts,first_attempt,last_attempt,risk_score
0,u001,192.168.1.10,5,2025-01-01 08:01:00,2025-01-01 08:05:00,50
1,u999,203.0.113.9,5,2025-01-01 01:00:00,2025-01-01 01:04:00,50


## Risk Scoring

Each detected alert is assigned a simple risk score based on the number of
failed login attempts observed within the defined time window.

This scoring mechanism allows security analysts to prioritize investigations
by severity rather than treating all alerts equally.

In real-world systems, additional factors such as IP reputation, geographic
location, and historical user behavior would further refine this score.


## Conclusion

This project demonstrates a rule-based approach to detecting brute-force
login attempts using authentication logs.

By combining time-window analysis with threshold-based alerting and simple
risk scoring, the system identifies high-confidence attack patterns while
remaining interpretable and lightweight.

Such logic is commonly deployed in Security Information and Event Management
(SIEM) systems as an initial defense layer, often complemented by statistical
or machine learning-based anomaly detection methods.
