In [1]:
import re
import csv
from collections import Counter

# Function to parse the log file
def parse_log_file(file_path):
    with open(file_path, 'r') as file:
        log_data = file.readlines()
    return log_data

# Function to count requests per IP address
def count_requests_per_ip(log_data):
    ip_addresses = [re.findall(r'^\S+', line)[0] for line in log_data]
    ip_counts = Counter(ip_addresses)
    return ip_counts.most_common()

# Function to find the most accessed endpoint
def most_accessed_endpoint(log_data):
    endpoints = [re.findall(r'"(?:GET|POST) (\S+)', line)[0] for line in log_data]
    endpoint_counts = Counter(endpoints)
    most_accessed = endpoint_counts.most_common(1)
    return most_accessed[0] if most_accessed else ('', 0)

# Function to detect suspicious activity (failed login attempts)
def detect_suspicious_activity(log_data, threshold=10):
    failed_logins = [re.findall(r'(\S+) - - \[.*\] "POST /login', line)[0] for line in log_data if '401' in line]
    failed_counts = Counter(failed_logins)
    suspicious_ips = [(ip, count) for ip, count in failed_counts.items() if count > threshold]
    return suspicious_ips

# Function to save results to CSV
def save_results_to_csv(ip_results, endpoint_results, suspicious_results):
    with open('log_analysis_results.csv', 'w', newline='') as csvfile:
        fieldnames = ['IP Address', 'Request Count']
        writer = csv.writer(csvfile)
        writer.writerow(fieldnames)
        writer.writerows(ip_results)

        writer.writerow(['', ''])
        writer.writerow(['Most Accessed Endpoint', 'Access Count'])
        writer.writerow([endpoint_results[0], endpoint_results[1]])

        writer.writerow(['', ''])
        writer.writerow(['Suspicious Activity IP Address', 'Failed Login Attempts'])
        writer.writerows(suspicious_results)


In [2]:
if __name__ == "__main__":
    log_data = parse_log_file('sample.log')

    # Analysis
    ip_results = count_requests_per_ip(log_data)
    endpoint_results = most_accessed_endpoint(log_data)
    suspicious_results = detect_suspicious_activity(log_data)

    # Display Results
    print("IP Address Requests:")
    for ip, count in ip_results:
        print(f"{ip}: {count}")

    print(f"\nMost Accessed Endpoint: {endpoint_results[0]} (Accessed {endpoint_results[1]} times)")

    print("\nSuspicious Activity:")
    for ip, count in suspicious_results:
        print(f"{ip}: {count} failed login attempts")

    # Save Results
    save_results_to_csv(ip_results, endpoint_results, suspicious_results)


IP Address Requests:
203.0.113.5: 8
198.51.100.23: 8
192.168.1.1: 7
10.0.0.2: 6
192.168.1.100: 5

Most Accessed Endpoint: /login (Accessed 13 times)

Suspicious Activity:
