In [1]:
#1 # - Count Requests per IP Address:
   # - Parse the provided log file to extract all IP addresses.
   # - Calculate the number of requests made by each IP address.
   # - Sort and display the results in descending order of request counts.
   # - Example output:
#    IP Address           Request Count
#192.168.1.1          234
#203.0.113.5          187
#10.0.0.2             92

In [17]:
#2. -Identify the Most Frequently Accessed Endpoint**:
#   - Extract the endpoints (e.g., URLs or resource paths) from the log file.
#  - Identify the endpoint accessed the highest number of times.
# - Provide the endpoint name and its access count.
    #- Example output:
        # Most Frequently Accessed Endpoint:
        #/home (Accessed 403 times)

In [9]:
#3. Detect Suspicious Activity:
#    - Identify potential brute force login attempts by:
#        - Searching for log entries with failed login attempts (e.g., HTTP status code `401` or a specific failure message like "Invalid credentials").
#        - Flagging IP addresses with failed login attempts exceeding a configurable threshold (default: 10 attempts).
#    - Display the flagged IP addresses and their failed login counts.
#    - Example output:
       # Suspicious Activity Detected:
#IP Address           Failed Login Attempts
#192.168.1.100        56
#203.0.113.34         12

In [7]:
import csv
from collections import Counter

# To open the log file
with open("sample.log", "r") as log_file:
    log_data = log_file.read()

log_lines = log_data.splitlines()

#  To Extract data from log entries and Collect all IP addresses
ip_addresses = [line.split()[0] for line in log_lines]

# To Collect all accessed URLs
accessed_urls = [line.split()[6] for line in log_lines]

# To Find failed login attempts 
failed_login_lines = [line for line in log_lines if '401' in line or 'Invalid credentials'.lower() in line.lower()]
failed_login_ips = [line.split()[0] for line in failed_login_lines]

# To Analyze data and Count requests by IP
ip_requests = Counter(ip_addresses)

# Here to Count URL accesses
url_accesses = Counter(accessed_urls)

# Here to Count failed login attempts by IP
failed_logins = Counter(failed_login_ips)

# To Detect suspicious IPs 
threshold = 10
suspicious_ips = {ip: count for ip, count in failed_logins.items() if count > threshold}

# To Display results and Requests per IP
print("\nRequests per IP Address:")
print("{:<20}{}".format("IP Address", "Request Count"))
for ip, count in ip_requests.items():
    print("{:<20}{}".format(ip, count))

# To check Most accessed endpoint
most_common_url = url_accesses.most_common(1)[0]
print("\nMost Accessed Endpoint:")
print("{} (Accessed {} times)".format(most_common_url[0], most_common_url[1]))

# See if any Suspicious activity is detected
print("\nSuspicious Activity Detected:")
if suspicious_ips:
    print("{:<20}{}".format("IP Address", "Failed Login Attempts"))
    for ip, count in suspicious_ips.items():
        print("{:<20}{}".format(ip, count))
else:
    print("No suspicious activity detected.")

# To Save results to a CSV file
with open("log_analysis_results.csv", "w", newline="") as file:
    writer = csv.writer(file)

    # Requests per IP result of 1st question
    writer.writerow(["Requests per IP"])
    writer.writerow(["IP Address", "Request Count"])
    for ip, count in ip_requests.items():
        writer.writerow([ip, count])

    writer.writerow([])  # Blank row for separation

    # Most Accessed Endpoint result for 2nd question
    writer.writerow(["Most Accessed Endpoint"])
    writer.writerow(["Endpoint", "Access Count"])
    writer.writerow([most_common_url[0], most_common_url[1]])

    writer.writerow([])  # Blank row for separation

    # Suspicious Activity result of 3rd question
    writer.writerow(["Suspicious Activity"])
    writer.writerow(["IP Address", "Failed Login Attempts"])
    if suspicious_ips:
        for ip, count in suspicious_ips.items():
            writer.writerow([ip, count])
    else:
        writer.writerow(["No suspicious activity detected."])

print("\nResults saved to 'log_analysis_results.csv'") #all output saved in csv file



Requests per IP Address:
IP Address          Request Count
192.168.1.1         7
203.0.113.5         8
10.0.0.2            6
198.51.100.23       8
192.168.1.100       5

Most Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
No suspicious activity detected.

Results saved to 'log_analysis_results.csv'
