<a href="https://colab.research.google.com/github/L-SanthoshKumar/log-analysis-script/blob/main/log_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import re
import csv
from collections import defaultdict, Counter

# Define the file paths
log_file_path = "/content/sample.log"
output_csv_path = "log_analysis_results.csv"

# Function to parse the log file and extract necessary information
def parse_log_file(file_path):
    ip_requests = defaultdict(int)
    endpoint_requests = defaultdict(int)
    failed_logins = defaultdict(int)

    with open(file_path, "r") as log_file:
        for line in log_file:
            # Extract IP Address
            ip_match = re.match(r"^(\d{1,3}(?:\.\d{1,3}){3})", line)
            if ip_match:
                ip_address = ip_match.group(1)
                ip_requests[ip_address] += 1

            # Extract endpoint
            endpoint_match = re.search(r"\"(?:GET|POST) ([^\s]+)", line)
            if endpoint_match:
                endpoint = endpoint_match.group(1)
                endpoint_requests[endpoint] += 1

            # Check for failed login attempts (HTTP status code 401)
            if "401" in line or "Invalid credentials" in line:
                if ip_match:
                    failed_logins[ip_address] += 1

    return ip_requests, endpoint_requests, failed_logins

# Function to calculate results
def analyze_logs(ip_requests, endpoint_requests, failed_logins, brute_force_threshold=10):
    # Count requests per IP address
    sorted_ip_requests = sorted(ip_requests.items(), key=lambda x: x[1], reverse=True)

    # Find the most frequently accessed endpoint
    most_accessed_endpoint = max(endpoint_requests.items(), key=lambda x: x[1])

    # Detect suspicious activity
    suspicious_activity = {
        ip: count for ip, count in failed_logins.items() if count > brute_force_threshold
    }

    return sorted_ip_requests, most_accessed_endpoint, suspicious_activity

# Function to save results to a CSV file
def save_to_csv(ip_requests, most_accessed_endpoint, suspicious_activity, output_file):
    with open(output_file, "w", newline="") as csv_file:
        writer = csv.writer(csv_file)

        # Write Requests per IP
        writer.writerow(["Requests per IP"])
        writer.writerow(["IP Address", "Request Count"])
        writer.writerows(ip_requests)

        # Write Most Accessed Endpoint
        writer.writerow([])
        writer.writerow(["Most Accessed Endpoint"])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

        # Write Suspicious Activity
        writer.writerow([])
        writer.writerow(["Suspicious Activity Detected"])
        writer.writerow(["IP Address", "Failed Login Count"])
        writer.writerows(suspicious_activity.items())

# Main function to integrate everything
def main():
    # Parse the log file
    ip_requests, endpoint_requests, failed_logins = parse_log_file(log_file_path)

    # Analyze logs
    sorted_ip_requests, most_accessed_endpoint, suspicious_activity = analyze_logs(
        ip_requests, endpoint_requests, failed_logins
    )

    # Display the results
    print("Requests per IP:")
    for ip, count in sorted_ip_requests:
        print(f"{ip:<20} {count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

    print("\nSuspicious Activity Detected:")
    for ip, count in suspicious_activity.items():
        print(f"{ip:<20} {count}")

    # Save results to CSV
    save_to_csv(sorted_ip_requests, most_accessed_endpoint, suspicious_activity, output_csv_path)
    print(f"\nResults saved to {output_csv_path}")

if __name__ == "__main__":
    main()


Requests per IP:
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:

Results saved to log_analysis_results.csv
