In [1]:
import re
import csv  
from collections import Counter 

In [2]:
def read_log_file(file_path):
    with open(file_path, "r") as file:
        return file.readlines()

def count_requests_per_ip(log_lines):
    ip_pattern = r"(\d+\.\d+\.\d+\.\d+)"
    ip_counts = Counter(re.search(ip_pattern, line).group() for line in log_lines if re.search(ip_pattern, line))
    return ip_counts

def find_most_accessed_endpoint(log_lines):
    endpoint_pattern = r"\"[A-Z]+\s(\/\S*)\sHTTP\/" 
    endpoints = [re.search(endpoint_pattern, line).group(1) for line in log_lines if re.search(endpoint_pattern, line)]
    endpoint_counts = Counter(endpoints)
    return endpoint_counts.most_common(1)[0] 

def detect_suspicious_activity(log_lines, threshold):
    suspicious_pattern = r"(\d+\.\d+\.\d+\.\d+).+\"POST\s\/login.+401" 
    failed_logins = Counter(re.search(suspicious_pattern, line).group(1) for line in log_lines if re.search(suspicious_pattern, line))
    return {ip: count for ip, count in failed_logins.items() if count > threshold}


In [3]:
def save_to_csv(ip_counts, most_accessed_endpoint, suspicious_activity, output_file):
    with open(output_file, "w", newline="") as csvfile:
        writer = csv.writer(csvfile)
        
        writer.writerow(["IP Address", "Request Count"])
        for ip, count in ip_counts.items():
            writer.writerow([ip, count])
        
        writer.writerow([])  
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])
        
        writer.writerow([]) 
        writer.writerow(["IP Address", "Failed Login Count"])
        for ip, count in suspicious_activity.items():
            writer.writerow([ip, count])


In [7]:
log_file = "sample.log"  
failed_login_threshold = 10 

log_lines = read_log_file(log_file)

ip_counts = count_requests_per_ip(log_lines)
most_accessed_endpoint = find_most_accessed_endpoint(log_lines)
suspicious_activity = detect_suspicious_activity(log_lines, failed_login_threshold)

print("Requests per IP Address:")
for ip, count in ip_counts.items():
    print(f"{ip:20} {count}")

print("\nMost Frequently Accessed Endpoint:")
print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

print("\nSuspicious Activity Detected:")
for ip, count in suspicious_activity.items():
    print(f"{ip:20} {count}")

# Save results to CSV
save_to_csv(ip_counts, most_accessed_endpoint, suspicious_activity, "log_analysis_results.csv")
print("\nResults saved to 'log_analysis_results.csv'")


Requests per IP Address:
192.168.1.1          2
203.0.113.5          2
10.0.0.2             1
198.51.100.23        1

Most Frequently Accessed Endpoint:
/login (Accessed 2 times)

Suspicious Activity Detected:

Results saved to 'log_analysis_results.csv'
