Import libraries

In [7]:
import re # Used for regular expressions to parse and extract information from the log lines
import csv # Used for writing the output into a CSV file
from collections import defaultdict, Counter #From collections, these provide efficient ways to count and organize data

In [8]:
# Configurable threshold for failed login attempts
FAILED_LOGIN_THRESHOLD = 10

Read the log file as list of line

In [9]:
def parse_log_file(file_path):
    with open(file_path, 'r') as log_file:
        lines = log_file.readlines()
    return lines

For extracting Ip address

In [10]:
def count_requests_per_ip(log_lines):
    ip_count = Counter() #Counter to  keeps track of the number of requests for each IP.
    for line in log_lines:
        match = re.match(r'^(\S+)', line)
        if match:
            ip = match.group(1)
            ip_count[ip] += 1
    return ip_count

For accessing Endpoint

In [11]:
#Extracts the HTTP method and endpoint (e.g., /home) from the log line using a regex. Counts how many times each endpoint is accessed. Finds the most common endpoint
def find_most_accessed_endpoint(log_lines):
    endpoint_counts = Counter()
    for line in log_lines:
        match = re.search(r'"(GET|POST|PUT|DELETE) (\S+)', line)
        if match:
            endpoint = match.group(2)
            endpoint_counts[endpoint] += 1
    most_accessed = endpoint_counts.most_common(1)
    return most_accessed[0] if most_accessed else ("None", 0)

For finding the Suspicious Activity

In [12]:
def detect_suspicious_activity(log_lines):
    failed_attempts = defaultdict(int)
    for line in log_lines:
        if '401' in line or "Invalid credentials" in line:
            match = re.match(r'^(\S+)', line)
            if match:
                ip = match.group(1)
                failed_attempts[ip] += 1
    return {ip: count for ip, count in failed_attempts.items() if count > FAILED_LOGIN_THRESHOLD}

For calling all the above functions

In [13]:
def save_to_csv(ip_requests, most_accessed_endpoint, suspicious_ips, output_file):
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Write Requests per IP
        writer.writerow(["IP Address", "Request Count"])
        for ip, count in ip_requests.items():
            writer.writerow([ip, count])

        # Write Most Accessed Endpoint
        writer.writerow([])
        writer.writerow(["Most Accessed Endpoint", "Access Count"])
        writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

        # Write Suspicious Activity
        writer.writerow([])
        writer.writerow(["IP Address", "Failed Login Count"])
        for ip, count in suspicious_ips.items():
            writer.writerow([ip, count])

Main Function

In [14]:
#Main Function
def main():
    # File paths
    log_file_path = "sample_log.txt"  # Replace with the actual file path
    output_csv_file = "log_analysis_results.csv"

    # Parse log file
    log_lines = parse_log_file(log_file_path)

    # Perform analysis
    ip_requests = count_requests_per_ip(log_lines)
    most_accessed_endpoint = find_most_accessed_endpoint(log_lines)
    suspicious_ips = detect_suspicious_activity(log_lines)

    # Print results
    print("Requests per IP Address:")
    for ip, count in ip_requests.items():
        print(f"{ip} - {count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

    print("\nSuspicious Activity Detected:")
    for ip, count in suspicious_ips.items():
        print(f"{ip} - {count} failed login attempts")

    # Save results to CSV
    save_to_csv(ip_requests, most_accessed_endpoint, suspicious_ips, output_csv_file)
    print(f"\nResults saved to {output_csv_file}")

if __name__ == "__main__":
    main()


Requests per IP Address:
192.168.1.1 - 7
203.0.113.5 - 8
10.0.0.2 - 6
198.51.100.23 - 8
192.168.1.100 - 5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:

Results saved to log_analysis_results.csv
