<a href="https://colab.research.google.com/github/AbhilashSatheesh/Log-Analysis-Script/blob/main/log_analysis_script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import re
import csv
from collections import defaultdict

# File paths
log_file = "sample.log"
output_csv = "log_analysis_results.csv"

# Threshold for detecting suspicious activity
FAILED_LOGIN_THRESHOLD = 7

# Parse the log file and extract information
def parse_log_file(file_path):
    ip_requests = defaultdict(int)
    endpoint_requests = defaultdict(int)
    failed_logins = defaultdict(int)

    with open(file_path, "r") as file:
        for line in file:
            # Extract IP address
            ip_match = re.search(r'^(\d+\.\d+\.\d+\.\d+)', line)
            if ip_match:
                ip_address = ip_match.group(1)
                ip_requests[ip_address] += 1

            # Extract endpoint
            endpoint_match = re.search(r'"(?:GET|POST) (.*?) HTTP', line)
            if endpoint_match:
                endpoint = endpoint_match.group(1)
                endpoint_requests[endpoint] += 1

            # Detect failed login attempts
            if '401' in line or "Invalid credentials" in line:
                if ip_match:
                    failed_logins[ip_address] += 1

    return ip_requests, endpoint_requests, failed_logins

# Sort and display requests per IP address
def count_requests_per_ip(ip_requests):
    print("IP Address           Request Count")
    sorted_ips = sorted(ip_requests.items(), key=lambda x: x[1], reverse=True)
    for ip, count in sorted_ips:
        print(f"{ip:<20} {count}")
    return sorted_ips

# Identify and display the most frequently accessed endpoint
def most_frequent_endpoint(endpoint_requests):
    most_accessed = max(endpoint_requests.items(), key=lambda x: x[1])
    endpoint, count = most_accessed
    print("\nMost Frequently Accessed Endpoint:")
    print(f"{endpoint} (Accessed {count} times)")
    return endpoint, count

# Detect and display suspicious IPs with failed login attempts
def detect_suspicious_activity(failed_logins):
    print("\nSuspicious Activity Detected:")
    print("IP Address           Failed Login Attempts")
    flagged_ips = [
        (ip, count) for ip, count in failed_logins.items() if count >= FAILED_LOGIN_THRESHOLD
    ]
    for ip, count in flagged_ips:
        print(f"{ip:<20} {count}")
    return flagged_ips

# Save the results to a CSV file
def save_results_to_csv(ip_requests, endpoint_info, flagged_ips):
    with open(output_csv, "w", newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Requests per IP
        writer.writerow(["Requests per IP"])
        writer.writerow(["IP Address", "Request Count"])
        writer.writerows(ip_requests)
        writer.writerow([])

        # Most Accessed Endpoint
        writer.writerow(["Most Accessed Endpoint"])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow(endpoint_info)
        writer.writerow([])

        # Suspicious Activity
        writer.writerow(["Suspicious Activity"])
        writer.writerow(["IP Address", "Failed Login Count"])
        writer.writerows(flagged_ips)


def main():
    ip_requests, endpoint_requests, failed_logins = parse_log_file(log_file)

    print("\n--- Log Analysis Results ---")

    # Count Requests per IP
    sorted_ip_requests = count_requests_per_ip(ip_requests)

    # Identify the Most Frequently Accessed Endpoint
    endpoint_info = most_frequent_endpoint(endpoint_requests)

    # Detect Suspicious Activity
    flagged_ips = detect_suspicious_activity(failed_logins)

    # Save results to CSV
    save_results_to_csv(sorted_ip_requests, endpoint_info, flagged_ips)

    print(f"\nResults saved to {output_csv}")


if __name__ == "__main__":
    main()



--- Log Analysis Results ---
IP Address           Request Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
IP Address           Failed Login Attempts
203.0.113.5          8

Results saved to log_analysis_results.csv
