# LogInsight

### Mount Google Drive(if required)

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

Mounted at /content/drive


### Importing necessary libraries:

In [4]:
import re
import csv
from collections import Counter
from collections import defaultdict

### Define the log file path

In [5]:
log_file_path = "/content/drive/MyDrive/VRV Assessment/sample.log"

### 1. Count Requests per IP Address:

In [30]:
def count_requests_per_ip(log_file_path):
    # Regular expression pattern to identify IP addresses in the log file
    ip_pattern = r'(\d+\.\d+\.\d+\.\d+)'
    ip_counts = Counter()

    try:
        # Open the log file for reading and Iterate through each line of the log file
        with open(log_file_path, 'r') as log_file:
            for line in log_file:
                # Check if the line matches the failed login pattern
                ip_addresses = re.findall(ip_pattern, line)
                if ip_addresses:
                    ip_counts[ip_addresses[0]] += 1

        # Sort the IP addresses based on the number of requests in descending order
        sorted_ip_counts = sorted(ip_counts.items(), key=lambda x: x[1], reverse=True)

        # Print the results
        print(f"{'IP Address':<20}{'Request Count':<15}")
        for ip, count in sorted_ip_counts:
            print(f"{ip:<20}{count:<15}")

        return sorted_ip_counts

    # Handle the case where the log file is missing or any other unexpected errors
    except FileNotFoundError:
        print("Error: Log file not found.")
        return []
    except Exception as e:
        print(f"Error processing log file for IP requests: {e}")
        return []

Call the function to display the results in descending order of request counts

In [31]:
ip_counts = count_requests_per_ip(log_file_path)

IP Address          Request Count  
203.0.113.5         8              
198.51.100.23       8              
192.168.1.1         7              
10.0.0.2            6              
192.168.1.100       5              


### 2. Identify the Most Frequently Accessed Endpoint:

In [28]:
def identify_most_frequent_endpoint(log_file_path):
    # Regular expression pattern to capture endpoints (URLs or resource paths) from HTTP methods like GET or POST
    endpoint_pattern = r'"(?:GET|POST|PUT|DELETE) (\S+)'
    endpoint_counts = Counter()

    try:
      # Open the log file for reading and Iterate through each line of the log file
        with open(log_file_path, 'r') as log_file:
            for line in log_file:
                # Extract endpoints from the current line using the regex pattern
                endpoints = re.findall(endpoint_pattern, line)
                if endpoints:
                    endpoint_counts[endpoints[0]] += 1

        # Identify the most frequently accessed endpoint
        most_frequent_endpoint, access_count = endpoint_counts.most_common(1)[0]

        # Print the results
        # print("\nMost Accessed Endpoint")
        print(f"Most Frequently Accessed Endpoint:\n {most_frequent_endpoint} (Accessed {access_count} times)")
        return most_frequent_endpoint, access_count

    # Handle the case where the log file is missing or any other unexpected errors
    except FileNotFoundError:
        print("Error: Log file not found.")
        return None, 0
    except Exception as e:
        print(f"Error processing log file for endpoints: {e}")
        return None, 0

Call the function to display the most frequently accessed endpoint

In [29]:
most_frequent_endpoint, access_count = identify_most_frequent_endpoint(log_file_path)

Most Frequently Accessed Endpoint:
 /login (Accessed 13 times)


### 3. Detect Suspicious Activity:

In [10]:
def detect_suspicious_activity(log_file_path, threshold=10):
    # Regular expression pattern to capture failed login attempts with status code 401
    failed_login_pattern = r'(\d+\.\d+\.\d+\.\d+) - - \[.*\] "POST .*" 401 \d+ "(.*)"'
    failed_logins = defaultdict(int)

    try:
        # Open the log file for reading and Iterate through each line of the log file
        with open(log_file_path, 'r') as log_file:
            for line in log_file:
                match = re.search(failed_login_pattern, line)
                if match:
                    ip_address = match.group(1)
                    failed_logins[ip_address] += 1

        # Filter out IP addresses with failed login attempts exceeding the threshold
        suspicious_ips = {ip: count for ip, count in failed_logins.items() if count >= threshold}
        # Print the results
        if suspicious_ips:
            print("Suspicious Activity Detected:")
            print("IP Address           Failed Login Attempts")
            for ip, count in suspicious_ips.items():
                print(f"{ip:<20} {count}")
        else:
            print("No suspicious activity detected.")

        return suspicious_ips
    # Handle the case where the log file is missing or any other unexpected errors
    except FileNotFoundError:
        print("Error: Log file not found.")
        return {}
    except Exception as e:
        print(f"Error processing log file for suspicious activity: {e}")
        return {}

Call the function to display suspicious activity

In [11]:
suspicious_ips = detect_suspicious_activity(log_file_path, threshold=5)

Suspicious Activity Detected:
IP Address           Failed Login Attempts
203.0.113.5          8
192.168.1.100        5


### 4. Output Results:

In [14]:
def save_to_csv(ip_counts, most_frequent_endpoint, access_count, suspicious_ips, filename="log_analysis_results.csv"):
    try:
        with open(filename, mode='w', newline='') as file:
            writer = csv.writer(file)

            # Writing Requests per IP Address table
            writer.writerow(["Requests per IP"])
            writer.writerow(["IP Address", "Request Count"])
            for ip, count in ip_counts:
                writer.writerow([ip, count])

            writer.writerow([])

            # Writing Most Accessed Endpoint table
            writer.writerow(["Most Accessed Endpoint"])
            writer.writerow(["Endpoint", "Access Count"])
            writer.writerow([most_frequent_endpoint, access_count])

            writer.writerow([])

            # Writing Suspicious Activity table
            writer.writerow(["Suspicious Activity"])
            writer.writerow(["IP Address", "Failed Login Count"])
            for ip, count in suspicious_ips.items():
                writer.writerow([ip, count])

        print(f"The log analysis results are saved in '{filename}'.")
    except PermissionError:
        print(f"Error: Permission denied while writing to '{filename}'.")
    except Exception as e:
        print(f"Error saving results to CSV: {e}")

Call the function to save the results to a CSV file with different tables for each topic

In [15]:
if __name__ == "__main__":
    try:
      save_to_csv(ip_counts, most_frequent_endpoint, access_count, suspicious_ips)

    except Exception as e:
        print(f"Unexpected error: {e}")

The log analysis results are saved in 'log_analysis_results.csv'.
