In [6]:
import re
import csv
from collections import defaultdict

# Configurable threshold for suspicious activity detection
FAILED_LOGIN_THRESHOLD = 10

# Parses the log file and extracts data on IP requests, endpoints, and failed logins.
def parse_log_file(file_path):
    ip_requests = defaultdict(int)
    endpoint_counts = defaultdict(int)
    failed_logins = defaultdict(int)

    with open(file_path, 'r') as file:
        for line in file:
            ip_match = re.match(r'^(\S+)', line)
            if ip_match:
                ip = ip_match.group(1)
                ip_requests[ip] += 1

            endpoint_match = re.search(r'"[A-Z]+\s(\S+)\sHTTP/1.1"\s(\d+)', line)
            if endpoint_match:
                endpoint, status = endpoint_match.groups()
                endpoint_counts[endpoint] += 1

                if status == '401':
                    failed_logins[ip] += 1

    return ip_requests, endpoint_counts, failed_logins

# Analyzes the log file data to derive insights
def analyze_logs(file_path):
    ip_requests, endpoint_counts, failed_logins = parse_log_file(file_path)

    sorted_ips = sorted(ip_requests.items(), key=lambda x: x[1], reverse=True)
    most_accessed = max(endpoint_counts.items(), key=lambda x: x[1])
    suspicious_ips = [(ip, count) for ip, count in failed_logins.items() if count > FAILED_LOGIN_THRESHOLD]

    return sorted_ips, most_accessed, suspicious_ips

#  Saves the analyzed data to a CSV file
def save_to_csv(ip_data, endpoint_data, suspicious_data, output_file):
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        writer.writerow(["IP Address", "Request Count"])
        writer.writerows(ip_data)

        writer.writerow([])
        writer.writerow(["Most Accessed Endpoint"])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow(endpoint_data)

        writer.writerow([])
        writer.writerow(["Suspicious Activity"])
        writer.writerow(["IP Address", "Failed Login Attempts"])
        writer.writerows(suspicious_data)

# Function to display results in the terminal
def display_results(ip_data, endpoint_data, suspicious_data):
    print("\nRequests Per IP Address:")
    print("IP Address\t\tRequest Count")
    for ip, count in ip_data:
        print(f"{ip}\t\t{count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{endpoint_data[0]} (Accessed {endpoint_data[1]} times)")

    print("\nSuspicious Activity Detected:")
    print("IP Address\t\tFailed Login Attempts")
    for ip, count in suspicious_data:
        print(f"{ip}\t\t{count}")

# Main function
def main():
    log_file = "sample.log"  # Log file path
    output_file = "log_analysis_results.csv"

    ip_data, endpoint_data, suspicious_data = analyze_logs(log_file)

    display_results(ip_data, endpoint_data, suspicious_data)
    save_to_csv(ip_data, endpoint_data, suspicious_data, output_file)

if __name__ == "__main__":
    main()



Requests Per IP Address:
IP Address		Request Count
203.0.113.5		8
198.51.100.23		8
192.168.1.1		7
10.0.0.2		6
192.168.1.100		5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
IP Address		Failed Login Attempts
