In [3]:
import csv
# Threshold for failed login attempts
Failed_login_threshold = 10  # default 10 as mentioned in the description 

ip_requests = {}  # counting requests per IP
endpoint_requests = {}  # counting requests per endpoint
failed_logins = {}  # counting  failed login attempts per IP

# Input and output file paths
log_file = r"C:\Users\Admin\Desktop\sample.log"
output_csv = r"C:\Users\Admin\Desktop\DataAnalysis\log_analysis_results.csv"

def parse_log_file(log_file):
    """
    Reads the file and extracts the Data
    """
    with open(log_file, "r") as file:
        for line in file:
            # Split the line into parts
            parts = line.split() 
            if len(parts) < 9: 
                continue  # Skipping the Incorrectly formatted  lines

            ip_address = parts[0]  # 1st part is the "IP address"
            endpoint = parts[6]  # 7th part is the "endpoint"
            status_code = parts[8]  # 9th part is the "status code"

            # Counting requests per IP
            if ip_address in ip_requests:
                ip_requests[ip_address] += 1
            else:
                ip_requests[ip_address] = 1

            # Counting requests per endpoint
            if endpoint in endpoint_requests:
                endpoint_requests[endpoint] += 1
            else:
                endpoint_requests[endpoint] = 1

            # Counting failed login attempts
            if status_code == "401" or "Invalid credentials" in line:
                if ip_address in failed_logins:
                    failed_logins[ip_address] += 1
                else:
                    failed_logins[ip_address] = 1

    return ip_requests, endpoint_requests, failed_logins

def saving_to_csv(ip_requests, most_accessed_endpoint, suspicious_activity, output_file):
    """
    Saving results to a CSV file.
    """
    with open(output_file, "w", newline="") as csvfile:   # W write Mode
        writer = csv.writer(csvfile)

        # Save Requests per IP
        writer.writerow(["IP Address", "Request Count"]) # Fields / Headers / Columns
        for ip, count in sorted(ip_requests.items(), key=lambda x: x[1], reverse=True): 
            writer.writerow([ip, count])
        writer.writerow([]) # for next line

        # Save Most Accessed Endpoint
        writer.writerow(["Most Frequently Accessed Endpoint"])
        writer.writerow(["Endpoint",  "Access Count"])  # Headers
        writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])
        writer.writerow([])

        # Save Suspicious Activity
        writer.writerow(["IP Address", "Failed Login Attempts"])  # Headers 
        for i, c in suspicious_activity:
            writer.writerow([ip, count])

def main_file():
    # Parse the log file
    ip_requests, endpoint_requests, failed_logins = parse_log_file(log_file)

    # Finding the most accessed endpoint
    most_accessed_endpoint=max(endpoint_requests.values())
    for key,value in endpoint_requests.items():
        if most_accessed_endpoint==value:
            most_accessed_endpoint=key,value
    
    # Identifing suspicious activity
    suspicious_activity = [(i, c) for i, c in failed_logins.items() if c > Failed_login_threshold ] # No output because failed login attempts are < 10

    #save results to CSV
    saving_to_csv(ip_requests, most_accessed_endpoint, suspicious_activity, output_csv)
    print(f"\nResults saved to {output_csv}")

main_file()



Results saved to C:\Users\Admin\Desktop\DataAnalysis\log_analysis_results.csv
