In [3]:
# Import required modules
import csv
from collections import defaultdict

# File paths
log_file_path = 'sample.log'
output_csv_path = 'log_analysis_results.csv'

# Default threshold for suspicious activity detection
FAILED_LOGIN_THRESHOLD = 10

def parse_log_file(file_path):
    """Parse the log file and extract necessary details."""
    ip_requests = defaultdict(int)
    endpoint_requests = defaultdict(int)
    failed_logins = defaultdict(int)

    with open(file_path, 'r') as log_file:
        for line in log_file:
            # Split the log line into parts
            parts = line.split()
            if len(parts) < 9:
                continue

            # Extract IP address
            ip_address = parts[0]
            ip_requests[ip_address] += 1

            # Extract endpoint
            endpoint = parts[6]
            endpoint_requests[endpoint] += 1

            # Check for failed login attempts (HTTP 401 or specific failure message)
            status_code = parts[8]
            if status_code == '401' or 'Invalid credentials' in line:
                failed_logins[ip_address] += 1

    return ip_requests, endpoint_requests, failed_logins

def write_results_to_csv(ip_requests, most_accessed_endpoint, suspicious_activities, output_path):
    """Write the analysis results to a CSV file."""
    with open(output_path, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)

        # Write Requests per IP
        csv_writer.writerow(['Requests per IP'])
        csv_writer.writerow(['IP Address', 'Request Count'])
        for ip, count in ip_requests:  # Directly iterate over the sorted list
            csv_writer.writerow([ip, count])
        
        # Write Most Accessed Endpoint
        csv_writer.writerow([])
        csv_writer.writerow(['Most Accessed Endpoint'])
        csv_writer.writerow(['Endpoint', 'Access Count'])
        csv_writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

        # Write Suspicious Activity
        csv_writer.writerow([])
        csv_writer.writerow(['Suspicious Activity Detected'])
        csv_writer.writerow(['IP Address', 'Failed Login Count'])
        for ip, count in suspicious_activities.items():  # This remains unchanged as it's still a dictionary
            csv_writer.writerow([ip, count])

def main():
    # Parse the log file
    ip_requests, endpoint_requests, failed_logins = parse_log_file(log_file_path)

    # Count requests per IP and sort in descending order
    sorted_ip_requests = sorted(ip_requests.items(), key=lambda x: x[1], reverse=True)

    # Find the most accessed endpoint
    most_accessed_endpoint = max(endpoint_requests.items(), key=lambda x: x[1])

    # Detect suspicious activities
    suspicious_activities = {ip: count for ip, count in failed_logins.items() if count > FAILED_LOGIN_THRESHOLD}

    # Print results to terminal
    print("Requests per IP:")
    for ip, count in sorted_ip_requests:
        print(f"{ip:20} {count}")
    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")
    print("\nSuspicious Activity Detected:")
    for ip, count in suspicious_activities.items():
        print(f"{ip:20} {count}")

    # Write results to a CSV file
    write_results_to_csv(sorted_ip_requests, most_accessed_endpoint, suspicious_activities, output_csv_path)

if __name__ == "__main__":
    main()


Requests per IP:
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
