In [8]:
import csv
from collections import defaultdict

# Constants
FAILED_LOGIN_THRESHOLD = 10

# Function to parse the log file
def parse_log(file_path):
    ip_count = defaultdict(int)
    endpoint_count = defaultdict(int)
    failed_logins = defaultdict(int)

    # Open the log file and process each line
    with open(file_path, 'r') as file:
        for line in file:
            parts = line.split()

            # Extracting the IP Address
            ip_address = parts[0]

            # Count the requests per IP
            ip_count[ip_address] += 1

            # Extracting the endpoint (resource path)
            endpoint = parts[6]  # The endpoint is the 7th element in the log line (index 6)
            endpoint_count[endpoint] += 1

            # Detecting failed login attempts (HTTP status code 401 or "Invalid credentials")
            status_code = parts[8]
            if status_code == '401' or 'Invalid credentials' in line:
                failed_logins[ip_address] += 1

    return ip_count, endpoint_count, failed_logins

# Function to output the results to the terminal and save to CSV
def output_results(ip_count, endpoint_count, failed_logins):
    # Print and save Requests per IP Address
    print("\nIP Address           Request Count")
    with open('log_analysis_results.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['IP Address', 'Request Count'])
        for ip, count in sorted(ip_count.items(), key=lambda x: x[1], reverse=True):
            print(f"{ip:<20}{count}")
            writer.writerow([ip, count])

    # Print and save Most Frequently Accessed Endpoint
    print("\nMost Frequently Accessed Endpoint:")
    most_accessed_endpoint = max(endpoint_count, key=endpoint_count.get)
    print(f"{most_accessed_endpoint:<30}(Accessed {endpoint_count[most_accessed_endpoint]} times)")
    with open('log_analysis_results.csv', 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['Endpoint', 'Access Count'])
        writer.writerow([most_accessed_endpoint, endpoint_count[most_accessed_endpoint]])

    # Print and save Suspicious Activity (Failed Login Attempts)
    print("\nSuspicious Activity Detected:")
    with open('log_analysis_results.csv', 'a', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['IP Address', 'Failed Login Count'])
        suspicious_found = False  # Flag to check if any suspicious activity is detected
        for ip, count in failed_logins.items():
            if count > FAILED_LOGIN_THRESHOLD:
                print(f"{ip:<20}{count}")
                writer.writerow([ip, count])
                suspicious_found = True

        # If no suspicious activity detected, print a message
        if not suspicious_found:
            print("No suspicious activity detected")

# Main function to run the script
def main():
    log_file_path = 'sample.log'  # Path to the log file
    ip_count, endpoint_count, failed_logins = parse_log(log_file_path)
    output_results(ip_count, endpoint_count, failed_logins)

if __name__ == "__main__":
    main()


IP Address           Request Count
203.0.113.5         8
198.51.100.23       8
192.168.1.1         7
10.0.0.2            6
192.168.1.100       5

Most Frequently Accessed Endpoint:
/login                        (Accessed 13 times)

Suspicious Activity Detected:
No suspicious activity detected
