# **VRV Security’s Python Intern Assignment**

Submitted by: Ambekar Tejas


#**sample input file**

Download the [sample.log](https://drive.google.com/file/d/1LUoGl_iAR4cPQXgv4ZCivNFcQzucLWti/view?usp=sharing) file here

In [2]:
import re
import csv
from collections import defaultdict

# Configuration for failed login detection
FAILED_LOGIN_THRESHOLD = 10

# Function to parse the log file and extract necessary information
def parse_log(file_path):
    """
    Parses the log file to extract:
    - IP addresses and their request counts
    - Endpoints and their access counts
    - Failed login attempts (for detecting suspicious activity)

    Returns:
        - ip_request_count: Dictionary of IP addresses with their request counts
        - endpoint_access_count: Dictionary of endpoints with their access counts
        - failed_login_attempts: Dictionary of IP addresses with their failed login counts
    """

    # Initialize dictionaries to hold the counts
    ip_request_count = defaultdict(int)
    endpoint_access_count = defaultdict(int)
    failed_login_attempts = defaultdict(int)

    # Regular expressions to match log data
    ip_pattern = r'(\d+\.\d+\.\d+\.\d+)'  # IP address pattern (IPv4)
    endpoint_pattern = r'"(?:GET|POST|PUT|DELETE) (\S+)'  # Matches endpoints (GET/POST/PUT/DELETE methods)
    failed_login_pattern = r'401|Invalid credentials'  # Detects failed login attempts based on status code or message

    # Open and read the log file line by line
    with open(file_path, 'r') as file:
        for log in file:
            # Extract IP address
            ip_match = re.search(ip_pattern, log)
            if ip_match:
                ip = ip_match.group(1)
                ip_request_count[ip] += 1  # Count requests by IP

            # Extract endpoint
            endpoint_match = re.search(endpoint_pattern, log)
            if endpoint_match:
                endpoint = endpoint_match.group(1)
                endpoint_access_count[endpoint] += 1  # Count accesses to the endpoint

            # Check for failed login attempts
            if re.search(failed_login_pattern, log):
                if ip_match:
                    failed_login_attempts[ip] += 1  # Increment failed login attempts for the IP

    return ip_request_count, endpoint_access_count, failed_login_attempts


# Function to detect suspicious activity (IP addresses with too many failed logins)
def detect_suspicious_activity(failed_login_attempts):
    """
    Detects suspicious activity by identifying IP addresses
    that have failed login attempts exceeding the defined threshold.
    """
    suspicious_ips = {ip: count for ip, count in failed_login_attempts.items() if count > FAILED_LOGIN_THRESHOLD}
    return suspicious_ips


# Function to display results in terminal
def display_results(ip_request_count, endpoint_access_count, suspicious_activity):
    """
    Displays the following information in the terminal:
    - Requests per IP Address
    - Most Frequently Accessed Endpoint
    - Suspicious Activity (IP addresses with failed login attempts above the threshold)
    """

    print("Requests per IP Address:")
    print(f"{'IP Address':<20} {'Request Count'}")
    for ip, count in sorted(ip_request_count.items(), key=lambda x: x[1], reverse=True):
        print(f"{ip:<20} {count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{'Endpoint':<30} {'Access Count'}")
    most_accessed_endpoint = max(endpoint_access_count, key=endpoint_access_count.get)
    print(f"{most_accessed_endpoint:<30} {endpoint_access_count[most_accessed_endpoint]}")

    print("\nSuspicious Activity Detected:")
    print(f"{'IP Address':<20} {'Failed Login Attempts'}")
    for ip, count in suspicious_activity.items():
        print(f"{ip:<20} {count}")


# Function to save results to CSV
def save_to_csv(ip_request_count, endpoint_access_count, suspicious_activity):
    """
    Saves the analysis results into a CSV file, formatted with:
    - Requests per IP Address
    - The most accessed endpoint
    - Suspicious IP addresses with failed login counts
    """

    with open('log_analysis_results_fin.csv', 'w', newline='') as file:
        writer = csv.writer(file)

        # Writing Requests per IP
        writer.writerow(['IP Address', 'Request Count'])
        for ip, count in sorted(ip_request_count.items(), key=lambda x: x[1], reverse=True):
            writer.writerow([ip, count])

        # Writing Most Accessed Endpoint (Only the most accessed endpoint)
        writer.writerow([])  # Blank line for separation
        writer.writerow(['Endpoint', 'Access Count'])
        most_accessed_endpoint = max(endpoint_access_count, key=endpoint_access_count.get)
        writer.writerow([most_accessed_endpoint, endpoint_access_count[most_accessed_endpoint]])

        # Writing Suspicious Activity
        writer.writerow([])  # Blank line for separation
        writer.writerow(['IP Address', 'Failed Login Count'])
        for ip, count in suspicious_activity.items():
            writer.writerow([ip, count])


# Main function to perform log analysis
def main():
    """
    Main function to:
    1. Parse the log file.
    2. Detect suspicious activity.
    3. Display results on the terminal.
    4. Save the results to a CSV file.
    """

    # File path to the log file
    file_path = '/content/sample.log'  # Update this path according to your file location

    # Parsing the log file
    ip_request_count, endpoint_access_count, failed_login_attempts = parse_log(file_path)

    # Detect suspicious activity (failed login attempts)
    suspicious_activity = detect_suspicious_activity(failed_login_attempts)

    # Display results in the terminal
    display_results(ip_request_count, endpoint_access_count, suspicious_activity)

    # Save results to CSV
    save_to_csv(ip_request_count, endpoint_access_count, suspicious_activity)


if __name__ == '__main__':
    main()


Requests per IP Address:
IP Address           Request Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
Endpoint                       Access Count
/login                         13

Suspicious Activity Detected:
IP Address           Failed Login Attempts
