In [24]:
import re
from collections import Counter
import csv


In [25]:
# Set the threshold for detecting multiple failed login attempts
FAILED_LOGIN_THRESHOLD = 3

# Function to open and read the log file content
def read_log_file(file_path):
    with open(file_path, 'r') as log_file:
        return log_file.readlines()

In [26]:
# Function to tally requests by each IP address
def tally_requests_by_ip(log_lines):
    ip_pattern = re.compile(r'^(\d+\.\d+\.\d+\.\d+)')  # Regex pattern to extract IP addresses
    ip_request_count = Counter()

    for line in log_lines:
        match = ip_pattern.match(line)
        if match:
            ip_request_count[match.group(1)] += 1

    return ip_request_count.most_common()

In [27]:
# Function to determine the most requested endpoint
def get_most_requested_endpoint(log_lines):
    endpoint_pattern = re.compile(r'\"[A-Z]+\s([^\s]+)\sHTTP')
    endpoint_counter = Counter()

    for line in log_lines:
        match = endpoint_pattern.search(line)
        if match:
            endpoint_counter[match.group(1)] += 1

    top_endpoint = endpoint_counter.most_common(1)
    return top_endpoint[0] if top_endpoint else (None, 0)

In [28]:
# Function to find IPs with suspicious failed login attempts
def find_suspicious_ips(log_lines):
    failed_login_pattern = re.compile(r'^(\d+\.\d+\.\d+\.\d+).*401')  # Match failed logins (status code 401)
    failed_login_counter = Counter()

    for line in log_lines:
        match = failed_login_pattern.match(line)
        if match:
            failed_login_counter[match.group(1)] += 1

    suspicious_ips = [(ip, count) for ip, count in failed_login_counter.items() if count > FAILED_LOGIN_THRESHOLD]
    return sorted(suspicious_ips, key=lambda x: x[1], reverse=True)


In [29]:
# Function to write the results into a CSV file
def write_to_csv(requests, most_accessed_endpoint, suspicious_ips, output_filename):
    with open(output_filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Write the IP request counts
        writer.writerow(['Requests per IP'])
        writer.writerow(['IP Address', 'Request Count'])
        writer.writerows(requests)

        # Write the most accessed endpoint details
        writer.writerow([])
        writer.writerow(['Most Accessed Endpoint'])
        writer.writerow(['Endpoint', 'Access Count'])
        writer.writerow(most_accessed_endpoint)

        # Write suspicious IPs with failed login attempts
        writer.writerow([])
        writer.writerow(['Suspicious Activity'])
        writer.writerow(['IP Address', 'Failed Login Count'])
        writer.writerows(suspicious_ips)


In [8]:
# Main function to coordinate log processing and output results
def process_log_data():
    log_file = 'sample.log'  # Define the log file name
    log_lines = read_log_file(log_file)

    # Tally IP addresses and their request counts
    ip_requests = tally_requests_by_ip(log_lines)
    print("Requests per IP Address:")
    for ip, count in ip_requests:
        print(f"{ip:<20} {count}")
    print()

    # Identify the most frequently accessed endpoint
    most_accessed_endpoint, access_count = get_most_requested_endpoint(log_lines)
    print("Most Frequently Accessed Endpoint:")
    print(f"{most_accessed_endpoint} (Accessed {access_count} times)")
    print()

    # Detect suspicious activity based on failed login attempts
    suspicious_ips = find_suspicious_ips(log_lines)
    print("Suspicious Activity Detected:")
    if suspicious_ips:
        for ip, count in suspicious_ips:
            print(f"{ip:<20} {count}")
    else:
        print("No suspicious activity detected.")
    print()

    # Save the results into a CSV file
    write_to_csv(ip_requests, (most_accessed_endpoint, access_count), suspicious_ips, 'log_analysis_results.csv')
    print("Results saved to log_analysis_results.csv")


In [30]:
# Run the script when executed directly
if __name__ == "__main__":
    process_log_data()

Requests per IP Address:
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
203.0.113.5          8
192.168.1.100        5

Results saved to log_analysis_results.csv
