# Log Analysis 


## Import Necessary Libraries

In [30]:
import re
from collections import defaultdict, Counter
import pandas as pd


## Load the Log File

In [31]:
log_file_path = "sample.log"

with open(log_file_path, 'r') as file:
    logs = file.readlines()


## Step 1: Count Requests per IP Address

In [32]:
ip_counts = Counter()

for line in logs:
    match = re.search(r'(\d+\.\d+\.\d+\.\d+)', line)
    if match:
        ip = match.group(1)
        ip_counts[ip] += 1

# Display results
sorted_ip_counts = sorted(ip_counts.items(), key=lambda x: x[1], reverse=True)
print("IP Address           Request Count")
for ip, count in sorted_ip_counts:
    print(f"{ip:<20}{count}")


IP Address           Request Count
203.0.113.5         8
198.51.100.23       8
192.168.1.1         7
10.0.0.2            6
192.168.1.100       5


## Step 2: Identify the Most Frequently Accessed Endpoint

In [33]:
endpoint_counts = Counter()

for line in logs:
    match = re.search(r'\"[A-Z]+\s+(\S+)', line)
    if match:
        endpoint = match.group(1)
        endpoint_counts[endpoint] += 1

most_accessed = endpoint_counts.most_common(1)[0]
print(f"Most Frequently Accessed Endpoint: {most_accessed[0]} (Accessed {most_accessed[1]} times)")


Most Frequently Accessed Endpoint: /login (Accessed 13 times)


## Step 3: Detect Suspicious Activity

In [34]:
failed_attempts = Counter()

for line in logs: 
    if "401" in line or "Invalid credentials" in line: 
        match = re.search(r'(\d+\.\d+\.\d+\.\d+)', line) 
        if match: 
            ip = match.group(1) 
            failed_attempts[ip] += 1

# Filter IPs with failed attempts exceeding a threshold
threshold = 10
suspicious_ips = {ip: count for ip, count in failed_attempts.items() if count > threshold}

print("Suspicious Activity Detected:")
print("IP Address           Failed Login Attempts")
for ip, count in suspicious_ips.items(): 
    print(f"{ip:<20}{count}")


Suspicious Activity Detected:
IP Address           Failed Login Attempts


## Step 4: Save Results to CSV

In [37]:
# Create DataFrames
ip_df = pd.DataFrame(sorted_ip_counts, columns=["IP Address", "Request Count"])
endpoint_df = pd.DataFrame([most_accessed], columns=["Endpoint", "Access Count"])
suspicious_df = pd.DataFrame(suspicious_ips.items(), columns=["IP Address", "Failed Login Count"])

# Concatenate all DataFrames into one
combined_df = pd.concat([ip_df, endpoint_df, suspicious_df], axis=1)

# Save the combined DataFrame to a CSV file
output_file = "log_analysis_results.csv"
combined_df.to_csv(output_file, index=False)

print(f"Results saved to {output_file}")



Results saved to log_analysis_results.csv
