In [26]:
# Function to count requests per IP address
def count_requests_per_ip(log_file):
    # Dictionary-like structure for counting requests by IP address
    request_counts = {}

    # Open the log file and process each line
    with open(log_file, 'r') as file:
        for line in file:
            # Skip empty lines or lines that don't contain enough parts
            parts = line.split()
            if len(parts) < 1:  # Ensure the line has at least one part
                continue

            # Extract the IP address (assuming it's the first part)
            ip_address = parts[0]

            # Increment the request count for this IP
            if ip_address in request_counts:
                request_counts[ip_address] += 1
            else:
                request_counts[ip_address] = 1

    # Sort the results by request count in descending order
    sorted_requests = sorted(request_counts.items(), key=lambda x: x[1], reverse=True)

    # Display the results
    print("IP Address           Request Count")
    for ip_address, count in sorted_requests:
        print(f"{ip_address:<20} {count}")

# Define the path to the log file
log_file_path = "/content/sample.log"

# Call the function with the specified log file
count_requests_per_ip(log_file_path)


IP Address           Request Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5
.LOG                 1


In [27]:
# Function to identify the most frequently accessed endpoint
def most_frequent_endpoint(log_file):
    # Dictionary-like structure to count accesses per endpoint
    endpoint_counts = {}

    # Open the log file and process each line
    with open(log_file, 'r') as file:
        for line in file:
            # Example log: "192.168.1.1 - - [timestamp] \"GET /home HTTP/1.1\" 200"
            parts = line.split()
            if len(parts) < 7:  # Ensure the line has enough parts
                continue

            # Extract the endpoint (assuming it's the 7th element in quotes)
            request_part = parts[5]  # e.g., "\"GET"
            endpoint = parts[6]      # e.g., "/home"

            # Increment the count for this endpoint
            if endpoint in endpoint_counts:
                endpoint_counts[endpoint] += 1
            else:
                endpoint_counts[endpoint] = 1

    # Find the endpoint with the highest count
    most_frequent = max(endpoint_counts.items(), key=lambda x: x[1])

    # Display the result
    print("Most Frequently Accessed Endpoint:")
    print(f"{most_frequent[0]} (Accessed {most_frequent[1]} times)")

# Define the path to the log file
log_file_path = "/content/sample.log"

# Call the function with the specified log file
most_frequent_endpoint(log_file_path)


Most Frequently Accessed Endpoint:
/login (Accessed 13 times)


In [48]:
# Function to detect suspicious activity
def detect_suspicious_activity(log_file, threshold=10):
    failed_attempts = {}
    with open(log_file, 'r') as file:
        for line in file:
            if "401" in line or "Invalid credentials" in line:
                parts = line.split()
                if len(parts) < 1:
                    continue
                ip_address = parts[0]
                if ip_address in failed_attempts:
                    failed_attempts[ip_address] += 1
                else:
                    failed_attempts[ip_address] = 1
    print("\nSuspicious Activity Detected:")
    print("IP Address           Failed Login Attempts")
    for ip_address, count in failed_attempts.items():
        if count > threshold:
            print(f"{ip_address:<20} {count}")
    return failed_attempts

log_file_path = "/content/sample.log"
detect_suspicious_activity(log_file_path, threshold=1)


Suspicious Activity Detected:
IP Address           Failed Login Attempts
203.0.113.5          8
192.168.1.100        5


{'203.0.113.5': 8, '192.168.1.100': 5}

In [49]:
# Main script
log_file_path = "/content/sample.log"
output_file_path = "log_analysis_results.csv"

# Calculate results
requests_per_ip = count_requests_per_ip(log_file_path)
most_accessed = most_frequent_endpoint(log_file_path)
suspicious_activity = detect_suspicious_activity(log_file_path)  # This now returns a dictionary

# Display results
print("Requests per IP:")
for ip, count in requests_per_ip.items():
    print(f"{ip:<20} {count}")

print("\nMost Accessed Endpoint:")
print(f"{most_accessed[0]} (Accessed {most_accessed[1]} times)")

# Handle suspicious activity results
if suspicious_activity:  # Now this will work correctly
    print("\nSuspicious Activity Detected:")
    print("IP Address Failed Login Count")
    for ip, count in suspicious_activity.items():
        print(f"{ip:<20} {count}")
else:
    print("\nNo Suspicious Activity Detected.")

# Save results to CSV
save_to_csv(requests_per_ip, most_accessed, suspicious_activity, output_file_path)
print(f"\nResults saved to {output_file_path}")


Suspicious Activity Detected:
IP Address           Failed Login Attempts
Requests per IP:
.LOG                 1
192.168.1.1          7
203.0.113.5          8
10.0.0.2             6
198.51.100.23        8
192.168.1.100        5

Most Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
IP Address Failed Login Count
203.0.113.5          8
192.168.1.100        5

Results saved to log_analysis_results.csv
