**Importing Necessary Library**

In [1]:
import re
import csv
import pandas as pd
import numpy as np
from collections import defaultdict

**Creating Sample File**

In [2]:
log_content = """\
192.168.1.1 - - [03/Dec/2024:10:12:34 +0000] "GET /home HTTP/1.1" 200 512
203.0.113.5 - - [03/Dec/2024:10:12:35 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
10.0.0.2 - - [03/Dec/2024:10:12:36 +0000] "GET /about HTTP/1.1" 200 256
192.168.1.1 - - [03/Dec/2024:10:12:37 +0000] "GET /contact HTTP/1.1" 200 312
198.51.100.23 - - [03/Dec/2024:10:12:38 +0000] "POST /register HTTP/1.1" 200 128
203.0.113.5 - - [03/Dec/2024:10:12:39 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
192.168.1.100 - - [03/Dec/2024:10:12:40 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
10.0.0.2 - - [03/Dec/2024:10:12:41 +0000] "GET /dashboard HTTP/1.1" 200 1024
198.51.100.23 - - [03/Dec/2024:10:12:42 +0000] "GET /about HTTP/1.1" 200 256
192.168.1.1 - - [03/Dec/2024:10:12:43 +0000] "GET /dashboard HTTP/1.1" 200 1024
203.0.113.5 - - [03/Dec/2024:10:12:44 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
203.0.113.5 - - [03/Dec/2024:10:12:45 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
192.168.1.100 - - [03/Dec/2024:10:12:46 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
10.0.0.2 - - [03/Dec/2024:10:12:47 +0000] "GET /profile HTTP/1.1" 200 768
192.168.1.1 - - [03/Dec/2024:10:12:48 +0000] "GET /home HTTP/1.1" 200 512
198.51.100.23 - - [03/Dec/2024:10:12:49 +0000] "POST /feedback HTTP/1.1" 200 128
203.0.113.5 - - [03/Dec/2024:10:12:50 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
192.168.1.1 - - [03/Dec/2024:10:12:51 +0000] "GET /home HTTP/1.1" 200 512
198.51.100.23 - - [03/Dec/2024:10:12:52 +0000] "GET /about HTTP/1.1" 200 256
203.0.113.5 - - [03/Dec/2024:10:12:53 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
192.168.1.100 - - [03/Dec/2024:10:12:54 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
10.0.0.2 - - [03/Dec/2024:10:12:55 +0000] "GET /contact HTTP/1.1" 200 512
198.51.100.23 - - [03/Dec/2024:10:12:56 +0000] "GET /home HTTP/1.1" 200 512
192.168.1.100 - - [03/Dec/2024:10:12:57 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
203.0.113.5 - - [03/Dec/2024:10:12:58 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
10.0.0.2 - - [03/Dec/2024:10:12:59 +0000] "GET /dashboard HTTP/1.1" 200 1024
192.168.1.1 - - [03/Dec/2024:10:13:00 +0000] "GET /about HTTP/1.1" 200 256
198.51.100.23 - - [03/Dec/2024:10:13:01 +0000] "POST /register HTTP/1.1" 200 128
203.0.113.5 - - [03/Dec/2024:10:13:02 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
192.168.1.100 - - [03/Dec/2024:10:13:03 +0000] "POST /login HTTP/1.1" 401 128 "Invalid credentials"
10.0.0.2 - - [03/Dec/2024:10:13:04 +0000] "GET /profile HTTP/1.1" 200 768
198.51.100.23 - - [03/Dec/2024:10:13:05 +0000] "GET /about HTTP/1.1" 200 256
192.168.1.1 - - [03/Dec/2024:10:13:06 +0000] "GET /home HTTP/1.1" 200 512
198.51.100.23 - - [03/Dec/2024:10:13:07 +0000] "POST /feedback HTTP/1.1" 200 128
"""

In [3]:
# Saving the sample data into log extension
with open("sample.log", "w") as file:
    file.write(log_content)

**Parsing the log file to extract data**

In [4]:
def parse_log(file_path):
    log_data = []
    log_pattern = re.compile(r'(?P<ip>\d+\.\d+\.\d+\.\d+) - - \[.*?\] ".*? (?P<endpoint>/[^\s]*) .*?" (?P<status>\d+)')
    with open(file_path, 'r') as file:
        for line in file:
            match = log_pattern.search(line)
            if match:
                log_data.append({
                    'ip': match.group('ip'),
                    'endpoint': match.group('endpoint'),
                    'status': int(match.group('status'))
                })
    return log_data

**Requests per IP**

In [5]:
def count_requests_per_ip(log_data):
    ip_counts = defaultdict(int)
    for entry in log_data:
        ip_counts[entry['ip']] += 1
    return sorted(ip_counts.items(), key=lambda x: x[1], reverse=True)

In [6]:
# Loading the log data
log_file = 'sample.log'
log_data = parse_log(log_file)

In [7]:
# Counting and displaying requests per IP
ip_counts = count_requests_per_ip(log_data)
print("IP Address          Request Count")
for ip, count in ip_counts:
    print(f"{ip:<20}{count}")

IP Address          Request Count
203.0.113.5         8
198.51.100.23       8
192.168.1.1         7
10.0.0.2            6
192.168.1.100       5


**Most Accessed Endpoint**

In [8]:
def most_frequent_endpoint(log_data):
    endpoint_counts = defaultdict(int)
    for entry in log_data:
        endpoint_counts[entry['endpoint']] += 1
    most_accessed = max(endpoint_counts.items(), key=lambda x: x[1])
    return most_accessed

In [9]:
# Identifying the most frequently accessed endpoint
most_accessed = most_frequent_endpoint(log_data)
print("Most Frequently Accessed Endpoint:")
print(f"{most_accessed[0]} (Accessed {most_accessed[1]} times)")

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)


**Suspicious Activity**

In [10]:
def detect_suspicious_activity(log_data, threshold=10):
    failed_attempts = defaultdict(int)
    for entry in log_data:
        if entry['status'] == 401:
            failed_attempts[entry['ip']] += 1
    return {ip: count for ip, count in failed_attempts.items() if count > threshold}

In [11]:
# Detecting suspicious activity
failed_login_threshold = 10
suspicious_ips = detect_suspicious_activity(log_data, failed_login_threshold)

In [12]:
# Displaying suspicious activity
print("Suspicious Activity Detected:\nIP Address        Failed Login Attempts")
if suspicious_ips:
    for ip, count in suspicious_ips.items():
        print(f"{ip:<20}{count}")
else:
    print("None")

Suspicious Activity Detected:
IP Address        Failed Login Attempts
None


**Saving the result in the desired format**

In [13]:
def save_results_to_csv(ip_counts, most_accessed, suspicious_activities, file_path):
    with open(file_path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Requests per IP
        writer.writerow(["********** Requests per IP **********"])
        writer.writerow(["IP Address", "Request Count"])
        writer.writerows(ip_counts)
        writer.writerow([])

        # Most Accessed Endpoint
        writer.writerow(["********** Most Frequently Accessed Endpoint **********"])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow(most_accessed)
        writer.writerow([])

        # Suspicious Activity
        writer.writerow(["********** Suspicious Activity **********"])
        writer.writerow(["IP Address", "Failed Login Count"])
        writer.writerows(suspicious_activities.items())
        writer.writerow([])


save_results_to_csv(ip_counts, most_accessed, suspicious_ips, "log_analysis_results.csv")