In [3]:
import csv
from collections import Counter, defaultdict


LOG_FILE = "sample.log"
CSV_FILE = "log_analysis_results.csv"
FAILED_LOGIN_THRESHOLD = 10


def parse_log_file(file_path):

    log_entries = []
    with open(file_path, "r") as file:
        for line in file:
            parts = line.split()
            if len(parts) < 9:  # Basic validation for log structure
                continue
            ip_address = parts[0]
            endpoint = parts[6]
            status_code = parts[8]
            log_entries.append({"ip": ip_address, "endpoint": endpoint, "status": status_code})
    return log_entries


def count_requests_per_ip(log_entries):

    ip_counter = Counter(entry["ip"] for entry in log_entries)
    return ip_counter


def find_most_frequent_endpoint(log_entries):

    endpoint_counter = Counter(entry["endpoint"] for entry in log_entries)
    most_common = endpoint_counter.most_common(1)
    return most_common[0] if most_common else ("None", 0)


def detect_suspicious_activity(log_entries):
    failed_logins = defaultdict(int)
    for entry in log_entries:
        if entry["status"] == "401":  # HTTP status code for unauthorized
            failed_logins[entry["ip"]] += 1
    suspicious_ips = {ip: count for ip, count in failed_logins.items() if count > FAILED_LOGIN_THRESHOLD}
    return suspicious_ips


def save_to_csv(ip_counts, most_frequent_endpoint, suspicious_activity):

    with open(CSV_FILE, "w", newline="") as file:
        writer = csv.writer(file)

        # Write requests per IP
        writer.writerow(["Requests per IP"])
        writer.writerow(["IP Address", "Request Count"])
        for ip, count in ip_counts.items():
            writer.writerow([ip, count])

        writer.writerow([])  # Blank line

        # Write most accessed endpoint
        writer.writerow(["Most Accessed Endpoint"])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow([most_frequent_endpoint[0], most_frequent_endpoint[1]])

        writer.writerow([])  # Blank line

        # Write suspicious activity
        writer.writerow(["Suspicious Activity"])
        writer.writerow(["IP Address", "Failed Login Count"])
        for ip, count in suspicious_activity.items():
            writer.writerow([ip, count])


def main():
    # Parse the log file
    log_entries = parse_log_file(LOG_FILE)

    # Perform analyses
    ip_counts = count_requests_per_ip(log_entries)
    most_frequent_endpoint = find_most_frequent_endpoint(log_entries)
    suspicious_activity = detect_suspicious_activity(log_entries)

    # Display results
    print("\nRequests per IP:")
    for ip, count in sorted(ip_counts.items(), key=lambda x: x[1], reverse=True):
        print(f"{ip:20} {count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_frequent_endpoint[0]} (Accessed {most_frequent_endpoint[1]} times)")

    print("\nSuspicious Activity Detected:")
    for ip, count in suspicious_activity.items():
        print(f"{ip:20} {count}")

    # Save results to CSV
    save_to_csv(ip_counts, most_frequent_endpoint, suspicious_activity)
    print(f"\nResults saved to {CSV_FILE}")


# Run the script
main()



Requests per IP:
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:

Results saved to log_analysis_results.csv
