In [1]:
import re

# Function to read the log file
def read_log_file(file_path):
    try:
        with open(file_path, 'r') as file:
            logs = file.readlines()
        return logs
    except FileNotFoundError:
        print(f"Error: The file at {file_path} was not found.")
        return []
    except Exception as e:
        print(f"Error reading file: {e}")
        return []

# Path to the sample log file (Update the path if needed)
log_file = '/Users/noorshaik/Documents/sample.log'
logs = read_log_file(log_file)

# Check if logs were read
if logs:
    print("Log file successfully read.")
else:
    print("Log file not found or could not be read.")


Log file successfully read.


In [2]:
from collections import Counter

# Function to count requests per IP address
def count_requests_per_ip(logs):
    ip_pattern = r'^(\d+\.\d+\.\d+\.\d+)'  # Regex to match IP addresses
    ip_counts = Counter(re.match(ip_pattern, log).group(1) for log in logs if re.match(ip_pattern, log))
    return ip_counts

# Get IP counts
ip_counts = count_requests_per_ip(logs)

# Display the results
print("\nIP Address           Request Count")
for ip, count in ip_counts.most_common():
    print(f"{ip:20}{count}")



IP Address           Request Count
203.0.113.5         8
198.51.100.23       8
192.168.1.1         7
10.0.0.2            6
192.168.1.100       5


In [3]:
# Function to identify the most frequently accessed endpoint
def most_frequent_endpoint(logs):
    endpoint_pattern = r'\"[A-Z]+\s(/[\w./-]*)\s'
    endpoints = [re.search(endpoint_pattern, log).group(1) for log in logs if re.search(endpoint_pattern, log)]
    endpoint_counts = Counter(endpoints)
    most_accessed = endpoint_counts.most_common(1)[0]
    return most_accessed

# Get the most accessed endpoint
most_accessed_endpoint, access_count = most_frequent_endpoint(logs)

# Display the result
print(f"\nMost Frequently Accessed Endpoint: {most_accessed_endpoint} (Accessed {access_count} times)")



Most Frequently Accessed Endpoint: /login (Accessed 13 times)


In [4]:
# Function to detect suspicious activity (failed login attempts)
def detect_suspicious_activity(logs, threshold=10):
    suspicious_pattern = r'\"POST /login.*\" 401'
    suspicious_ips = Counter(re.match(r'^(\d+\.\d+\.\d+\.\d+)', log).group(1)
                             for log in logs if re.search(suspicious_pattern, log))
    flagged_ips = {ip: count for ip, count in suspicious_ips.items() if count > threshold}
    return flagged_ips

# Get suspicious activity (failed login attempts)
suspicious_ips = detect_suspicious_activity(logs)

# Display the result
print("\nSuspicious Activity Detected:")
print("IP Address           Failed Login Attempts")
for ip, count in suspicious_ips.items():
    print(f"{ip:20}{count}")



Suspicious Activity Detected:
IP Address           Failed Login Attempts


In [5]:
import csv

# Function to save results to CSV
def save_to_csv(ip_counts, most_accessed, suspicious_ips, file_name='log_analysis_results.csv'):
    try:
        with open(file_name, 'w', newline='') as csvfile:
            writer = csv.writer(csvfile)

            # Write Requests per IP
            writer.writerow(['Requests per IP'])
            writer.writerow(['IP Address', 'Request Count'])
            writer.writerows(ip_counts.items())

            # Write Most Accessed Endpoint
            writer.writerow([])
            writer.writerow(['Most Accessed Endpoint'])
            writer.writerow(['Endpoint', 'Access Count'])
            writer.writerow([most_accessed[0], most_accessed[1]])

            # Write Suspicious Activity
            writer.writerow([])
            writer.writerow(['Suspicious Activity'])
            writer.writerow(['IP Address', 'Failed Login Count'])
            writer.writerows(suspicious_ips.items())

        print(f"\nResults saved to {file_name}")
    except Exception as e:
        print(f"Error saving results: {e}")

# Save the results to a CSV file
csv_file_path = '/Users/noorshaik/Documents/final_log_analysis_results.csv'
save_to_csv(ip_counts, 
            (most_accessed_endpoint, access_count), 
            suspicious_ips, 
            file_name=csv_file_path)



Results saved to /Users/noorshaik/Documents/final_log_analysis_results.csv
