In [6]:
log_file_path = r'D:\log_analysis.py\sample.log'


In [7]:
import re
from collections import Counter
import csv

# Path to the log file (update with actual path)
log_file_path = r'D:\log_analysis.py\sample.log'

def parse_log_file(log_file_path):
    """
    Parse the log file to extract IP addresses, endpoints, and failed login attempts.
    Returns three counters: IP counts, endpoint counts, and failed login counts.
    """
    ip_counter = Counter()
    endpoint_counter = Counter()
    failed_logins = Counter()

    try:
        with open(log_file_path, 'r') as file:
            for line in file:
                # Extract IP, endpoint, and HTTP status from the log line
                ip = re.search(r'(\d+\.\d+\.\d+\.\d+)', line)
                endpoint = re.search(r'"[A-Z]+\s(.*?)\sHTTP', line)
                status_code = re.search(r'HTTP/1.1" (\d+)', line)

                # If data found, update counters
                if ip and endpoint and status_code:
                    ip = ip.group()
                    endpoint = endpoint.group(1)
                    status_code = status_code.group(1)

                    ip_counter[ip] += 1
                    endpoint_counter[endpoint] += 1
                    if status_code == '401':  # Track failed logins
                        failed_logins[ip] += 1

    except FileNotFoundError:
        print(f"Error: File at {log_file_path} not found.")
        return Counter(), Counter(), Counter()

    return ip_counter, endpoint_counter, failed_logins

def print_and_save_results(ip_counter, endpoint_counter, failed_logins):
    """
    Print analysis results to the console and save them to a CSV file.
    """
    # Most frequently accessed endpoint
    most_accessed = endpoint_counter.most_common(1)[0] if endpoint_counter else None

    print("Requests per IP Address:")
    for ip, count in ip_counter.items():
        print(f"{ip}: {count}")

    if most_accessed:
        print(f"\nMost Accessed Endpoint:\n{most_accessed[0]} (Accessed {most_accessed[1]} times)")

    print("\nSuspicious Activity Detected:")
    for ip, count in failed_logins.items():
        if count > 10:  # Threshold for suspicious activity
            print(f"{ip}: {count} failed login attempts")

    # Save results to CSV
    with open('log_analysis_results.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(['IP Address', 'Request Count'])
        for ip, count in ip_counter.items():
            writer.writerow([ip, count])

        if most_accessed:
            writer.writerow(['Endpoint', 'Access Count'])
            writer.writerow([most_accessed[0], most_accessed[1]])

        writer.writerow(['IP Address', 'Failed Login Count'])
        for ip, count in failed_logins.items():
            if count > 10:
                writer.writerow([ip, count])

# Execute and print results
ip_counter, endpoint_counter, failed_logins = parse_log_file(log_file_path)
print_and_save_results(ip_counter, endpoint_counter, failed_logins)


Requests per IP Address:
192.168.1.1: 7
203.0.113.5: 8
10.0.0.2: 6
198.51.100.23: 8
192.168.1.100: 5

Most Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
