In [2]:
import re
import csv
from collections import defaultdict, Counter


In [5]:
# Path to the log file
log_file_path = 'sample.log'
output_file = 'log_analysis_results.csv'

In [8]:
def parse_log_file(file_path):
    # Regular expression to match IPv4 addresses
    ip_regex = r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b'

    # Dictionary to store IP addresses
    ip_counter = Counter()

    # Open and read the log file
    with open(file_path, 'r') as file:
        for line in file:
            # Find all IP addresses in the line
            ips = re.findall(ip_regex, line)
            # Increment the count for each found IP
            ip_counter.update(ips)

    # Sort the results in descending order of request count
    sorted_ips = sorted(ip_counter.items(), key=lambda x: x[1], reverse=True)
    
    # Display the results
    print(f"{'IP Address':<20}{'Request Count'}")
    for ip, count in sorted_ips:
        print(f"{ip:<20}{count}")

# Call the function
parse_log_file(log_file_path)


IP Address          Request Count
192.168.1.100       12
203.0.113.5         8
198.51.100.23       8
192.168.1.1         7
10.0.0.2            6


In [None]:
def find_most_frequent_endpoint(log_file_path):
    """
    Log File through Extracts endpoints and identifies the most frequently accessed one.
    """
    # Regular expression for matching endpoints (e.g., URLs or paths)
    endpoint_pattern = r'\"[A-Z]+\s(\/[^\s]*)\s'
    endpoint_counts = defaultdict(int)

    try:
        with open(log_file_path, 'r') as file:
            for line in file:
                # Extract endpoint from the current line
                match = re.search(endpoint_pattern, line)
                if match:
                    endpoint = match.group(1)
                    endpoint_counts[endpoint] += 1
    except FileNotFoundError:
        print(f"The file {log_file_path} does not exist.")
        return None, 0
    except Exception as e:
        print(f"An error occurred: {e}")
        return None, 0

    # Find the most frequently accessed endpoint
    most_frequent_endpoint = max(endpoint_counts.items(), key=lambda item: item[1], default=(None, 0))
    return most_frequent_endpoint

# Example usage
endpoint, count = find_most_frequent_endpoint(log_file_path)

if endpoint:
    print(f"Most Frequently Accessed Endpoint:\n{endpoint} (Accessed {count} times)")
else:
    print("No endpoints found in the log file.")


Most Frequently Accessed Endpoint:
/login (Accessed 20 times)


In [None]:
def detect_brute_force_attempts(log_file_path, threshold=10):
    """
    Identifies potential brute force login attempts by recognizing IP addresses with a high number of failed login attempts.
    """
    # Patterns for matching failed login attempts and extracting IP addresses
    failed_login_pattern = r"(401|Invalid credentials)"
    ip_pattern = r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b'

    failed_attempts = defaultdict(int)

    try:
        with open(log_file_path, 'r') as file:
            for line in file:
                # Check if the line indicates a failed login attempt
                if re.search(failed_login_pattern, line, re.IGNORECASE):
                    # Extract the IP address from the line
                    match = re.search(ip_pattern, line)
                    if match:
                        ip = match.group(0)
                        failed_attempts[ip] += 1
    except FileNotFoundError:
        print(f"The file {log_file_path} does not exist.")
        return {}
    except Exception as e:
        print(f"An error occurred: {e}")
        return {}

    # Filter IPs exceeding the threshold
    flagged_ips = {ip: count for ip, count in failed_attempts.items() if count > threshold}
    return flagged_ips

# Example usage
threshold = 10  # Configurable threshold
suspicious_ips = detect_brute_force_attempts(log_file_path, threshold)

if suspicious_ips:
    print("Suspicious Activity Detected:")
    print(f"{'IP Address':<20} {'Failed Login Attempts'}")
    for ip, count in suspicious_ips.items():
        print(f"{ip:<20} {count}")
else:
    print("No suspicious activity detected.")


Suspicious Activity Detected:
IP Address           Failed Login Attempts
192.168.1.100        12


In [None]:
def count_requests_by_ip(log_file_path):
    """Made by Each IP Address thorugh Counts the number of requests."""
    ip_pattern = r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b'
    ip_request_count = defaultdict(int)

    try:
        with open(log_file_path, 'r') as file:
            for line in file:
                ips = re.findall(ip_pattern, line)
                for ip in ips:
                    ip_request_count[ip] += 1
    except FileNotFoundError:
        print(f"The file {log_file_path} does not exist.")
    except Exception as e:
        print(f"An error occurred: {e}")

    return dict(ip_request_count)

def find_most_frequent_endpoint(log_file_path):
    """Finds the most frequently accessed endpoint."""
    endpoint_pattern = r'\"[A-Z]+\s(\/[^\s]*)\s'
    endpoint_counts = defaultdict(int)

    try:
        with open(log_file_path, 'r') as file:
            for line in file:
                match = re.search(endpoint_pattern, line)
                if match:
                    endpoint = match.group(1)
                    endpoint_counts[endpoint] += 1
    except FileNotFoundError:
        print(f"The file {log_file_path} does not exist.")
    except Exception as e:
        print(f"An error occurred: {e}")

    most_frequent_endpoint = max(endpoint_counts.items(), key=lambda item: item[1], default=(None, 0))
    return most_frequent_endpoint

def detect_brute_force_attempts(log_file_path, threshold=10):
    """Detects potential brute force login attempts."""
    failed_login_pattern = r"(401|Invalid credentials)"
    ip_pattern = r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b'

    failed_attempts = defaultdict(int)

    try:
        with open(log_file_path, 'r') as file:
            for line in file:
                if re.search(failed_login_pattern, line, re.IGNORECASE):
                    match = re.search(ip_pattern, line)
                    if match:
                        ip = match.group(0)
                        failed_attempts[ip] += 1
    except FileNotFoundError:
        print(f"The file {log_file_path} does not exist.")
    except Exception as e:
        print(f"An error occurred: {e}")

    flagged_ips = {ip: count for ip, count in failed_attempts.items() if count > threshold}
    return flagged_ips

def save_to_csv(ip_requests, most_accessed_endpoint, suspicious_ips, output_file):
    """Saves the analysis results to a CSV file."""
    try:
        with open(output_file, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)

            # Write Requests per IP
            writer.writerow(["Requests per IP"])
            writer.writerow(["IP Address", "Request Count"])
            for ip, count in ip_requests.items():
                writer.writerow([ip, count])

            # Write Most Accessed Endpoint
            writer.writerow([])
            writer.writerow(["Most Accessed Endpoint"])
            writer.writerow(["Endpoint", "Access Count"])
            writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

            # Write Suspicious Activity
            writer.writerow([])
            writer.writerow(["Suspicious Activity"])
            writer.writerow(["IP Address", "Failed Login Count"])
            for ip, count in suspicious_ips.items():
                writer.writerow([ip, count])
    except Exception as e:
        print(f"Error saving to CSV: {e}")

def main():
    # Perform analysis
    ip_requests = count_requests_by_ip(log_file_path)
    most_accessed_endpoint = find_most_frequent_endpoint(log_file_path)
    suspicious_ips = detect_brute_force_attempts(log_file_path)

    # Display results in terminal
    print("Requests per IP:")
    print(f"{'IP Address':<20} {'Request Count'}")
    for ip, count in ip_requests.items():
        print(f"{ip:<20} {count}")

    print("\nMost Accessed Endpoint:")
    print(f"{most_accessed_endpoint[0]} (Accessed {most_accessed_endpoint[1]} times)")

    print("\nSuspicious Activity Detected:")
    if suspicious_ips:
        print(f"{'IP Address':<20} {'Failed Login Attempts'}")
        for ip, count in suspicious_ips.items():
            print(f"{ip:<20} {count}")
    else:
        print("No suspicious activity detected.")

    # Save results to CSV
    save_to_csv(ip_requests, most_accessed_endpoint, suspicious_ips, output_file)
    print(f"\nResults saved to {output_file}")

if __name__ == "__main__":
    main()


Requests per IP:
IP Address           Request Count
192.168.1.1          7
203.0.113.5          8
10.0.0.2             6
198.51.100.23        8
192.168.1.100        12

Most Accessed Endpoint:
/login (Accessed 20 times)

Suspicious Activity Detected:
IP Address           Failed Login Attempts
192.168.1.100        12

Results saved to log_analysis_results.csv
