In [1]:
import re
import csv
from collections import Counter
import os

 Check the files in the current directory

In [2]:
print("Files in current directory:", os.listdir())

Files in current directory: ['.config', 'sample.log', 'sample_data']


Open and read the content of the uploaded log file

In [3]:
with open('sample.log', 'r') as file:
    log_lines = file.readlines()

In [5]:
log_lines

['192.168.1.1 - - [03/Dec/2024:10:12:34 +0000] "GET /home HTTP/1.1" 200 512\n',
 '203.0.113.5 - - [03/Dec/2024:10:12:35 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"\n',
 '10.0.0.2 - - [03/Dec/2024:10:12:36 +0000] "GET /about HTTP/1.1" 200 256\n',
 '192.168.1.1 - - [03/Dec/2024:10:12:37 +0000] "GET /contact HTTP/1.1" 200 312\n',
 '198.51.100.23 - - [03/Dec/2024:10:12:38 +0000] "POST /register HTTP/1.1" 200 128\n',
 '203.0.113.5 - - [03/Dec/2024:10:12:39 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"\n',
 '192.168.1.100 - - [03/Dec/2024:10:12:40 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"\n',
 '10.0.0.2 - - [03/Dec/2024:10:12:41 +0000] "GET /dashboard HTTP/1.1" 200 1024\n',
 '198.51.100.23 - - [03/Dec/2024:10:12:42 +0000] "GET /about HTTP/1.1" 200 256\n',
 '192.168.1.1 - - [03/Dec/2024:10:12:43 +0000] "GET /dashboard HTTP/1.1" 200 1024\n',
 '203.0.113.5 - - [03/Dec/2024:10:12:44 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"\n',
 '

 Display the first 5 lines to check the content

In [6]:
print("Preview of log file:", log_lines[:5])
print("Total number of lines:", len(log_lines))

Preview of log file: ['192.168.1.1 - - [03/Dec/2024:10:12:34 +0000] "GET /home HTTP/1.1" 200 512\n', '203.0.113.5 - - [03/Dec/2024:10:12:35 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"\n', '10.0.0.2 - - [03/Dec/2024:10:12:36 +0000] "GET /about HTTP/1.1" 200 256\n', '192.168.1.1 - - [03/Dec/2024:10:12:37 +0000] "GET /contact HTTP/1.1" 200 312\n', '198.51.100.23 - - [03/Dec/2024:10:12:38 +0000] "POST /register HTTP/1.1" 200 128\n']
Total number of lines: 34


Threshold for suspicious activity

In [7]:
THRESHOLD = 1

Helper Functions

In [8]:
def count_requests_per_ip(lines):
    """Count the number of requests per IP address."""
    ip_counts = Counter(re.findall(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', '\n'.join(lines)))
    return ip_counts.most_common()

In [9]:
def most_accessed_endpoint(lines):
    """Identify the most accessed endpoint."""
    endpoints = re.findall(r'\"[A-Z]+\s(\/[^\s]*)', '\n'.join(lines))
    endpoint_counts = Counter(endpoints)
    return endpoint_counts.most_common(1)[0] if endpoint_counts else ("None", 0)

In [10]:
def detect_suspicious_activity(lines, threshold):
    """Detect suspicious activity based on failed login attempts."""
    failed_logins = Counter()
    for line in lines:
        if "401" in line or "Invalid credentials" in line:
            ip = re.search(r'\b(?:\d{1,3}\.){3}\d{1,3}\b', line)
            if ip:
                failed_logins[ip.group()] += 1
                print(f"Detected Suspicious Activity: {ip.group()} | Count: {failed_logins[ip.group()]}")
    return [(ip, count) for ip, count in failed_logins.items() if count > threshold]

Analysis

In [11]:
requests_per_ip = count_requests_per_ip(log_lines)
print("Requests per IP:")
for ip, count in requests_per_ip:
    print(f"{ip}: {count}")

Requests per IP:
203.0.113.5: 8
198.51.100.23: 8
192.168.1.1: 7
10.0.0.2: 6
192.168.1.100: 5


Most accessed endpoint

In [12]:
most_accessed, access_count = most_accessed_endpoint(log_lines)
print("\nMost Accessed Endpoint:")
print(f"{most_accessed} accessed {access_count} times")



Most Accessed Endpoint:
/login accessed 13 times


# 3. Suspicious activity

In [13]:
suspicious_activity = detect_suspicious_activity(log_lines, THRESHOLD)
print("\nSuspicious Activity:")
for ip, count in suspicious_activity:
    print(f"{ip}: {count} failed login attempts")

Detected Suspicious Activity: 203.0.113.5 | Count: 1
Detected Suspicious Activity: 203.0.113.5 | Count: 2
Detected Suspicious Activity: 192.168.1.100 | Count: 1
Detected Suspicious Activity: 203.0.113.5 | Count: 3
Detected Suspicious Activity: 203.0.113.5 | Count: 4
Detected Suspicious Activity: 192.168.1.100 | Count: 2
Detected Suspicious Activity: 203.0.113.5 | Count: 5
Detected Suspicious Activity: 203.0.113.5 | Count: 6
Detected Suspicious Activity: 192.168.1.100 | Count: 3
Detected Suspicious Activity: 192.168.1.100 | Count: 4
Detected Suspicious Activity: 203.0.113.5 | Count: 7
Detected Suspicious Activity: 203.0.113.5 | Count: 8
Detected Suspicious Activity: 192.168.1.100 | Count: 5

Suspicious Activity:
203.0.113.5: 8 failed login attempts
192.168.1.100: 5 failed login attempts


 Save Results to CSV

In [16]:
csv_filename = "log_analysis_results.csv"
with open(csv_filename, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)


    csv_writer.writerow(["IP Address", "Request Count"])
    csv_writer.writerows(requests_per_ip)


    csv_writer.writerow(["Endpoint", "Access Count"])
    csv_writer.writerow([most_accessed, access_count])


    if suspicious_activity:
        csv_writer.writerow(["IP Address", "Failed Login Count"])
        csv_writer.writerows(suspicious_activity)
    else:
        print("No suspicious activity detected.")

print(f"\nResults saved to {csv_filename}")


Results saved to log_analysis_results.csv
