Import Libraries

In [3]:
import re
import csv
from collections import Counter, defaultdict
from IPython.display import display, Markdown


Define Parsing Function

In [4]:
def parse_log_file(file_path):
    ip_requests = Counter()
    endpoints = Counter()
    failed_logins = defaultdict(int)

    # Regular expressions to extract data from log lines
    log_pattern = r'(?P<ip>\d+\.\d+\.\d+\.\d+) .* "(?P<method>[A-Z]+) (?P<endpoint>.+?) HTTP.*" (?P<status>\d+)'
    failed_login_codes = ['401']  # Add other status codes/messages for failed logins if needed

    with open(file_path, 'r') as file:
        for line in file:
            match = re.search(log_pattern, line)
            if match:
                ip = match.group('ip')
                endpoint = match.group('endpoint')
                status = match.group('status')

                # Count requests per IP
                ip_requests[ip] += 1

                # Count endpoint access
                endpoints[endpoint] += 1

                # Detect failed login attempts
                if status in failed_login_codes:
                    failed_logins[ip] += 1

    return ip_requests, endpoints, failed_logins


 Define Parsing Function

In [6]:
def save_to_csv(ip_requests, most_accessed_endpoint, failed_logins, output_file):
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Write requests per IP
        writer.writerow(['Requests per IP'])
        writer.writerow(['IP Address', 'Request Count'])
        for ip, count in ip_requests.items():
            writer.writerow([ip, count])

        writer.writerow([])  # Blank line for separation

        # Write most accessed endpoint
        writer.writerow(['Most Accessed Endpoint'])
        writer.writerow(['Endpoint', 'Access Count'])
        writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

        writer.writerow([])  # Blank line for separation

        # Write suspicious activity
        writer.writerow(['Suspicious Activity'])
        writer.writerow(['IP Address', 'Failed Login Count'])
        for ip, count in failed_logins.items():
            writer.writerow([ip, count])


Save Results to CSV

In [7]:
def display_results(ip_requests, most_accessed_endpoint, suspicious_activity):
    # Display IP Requests
    display(Markdown("### IP Address Request Counts"))
    for ip, count in ip_requests.most_common():
        print(f"{ip:20} {count}")

    # Display Most Accessed Endpoint
    display(Markdown("### Most Frequently Accessed Endpoint"))
    print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

    # Display Suspicious Activity
    display(Markdown("### Suspicious Activity Detected"))
    for ip, count in suspicious_activity.items():
        print(f"{ip:20} {count}")


Display Results

In [8]:
# User input for file path
log_file_path = input("Enter the path to the log file: ")
output_file_path = "log_analysis_results.csv"
failed_login_threshold = 10

# Parse the log file
ip_requests, endpoints, failed_logins = parse_log_file(log_file_path)

# Identify the most accessed endpoint
most_accessed_endpoint = endpoints.most_common(1)[0]

# Filter suspicious activity
suspicious_activity = {ip: count for ip, count in failed_logins.items() if count > failed_login_threshold}

# Display results
display_results(ip_requests, most_accessed_endpoint, suspicious_activity)

# Save results to CSV
save_to_csv(ip_requests, most_accessed_endpoint, suspicious_activity, output_file_path)
display(Markdown(f"**Results saved to `{output_file_path}`**"))


### IP Address Request Counts

203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5


### Most Frequently Accessed Endpoint

/login (Accessed 13 times)


### Suspicious Activity Detected

**Results saved to `log_analysis_results.csv`**