### Import necessary modules

In [10]:
import re  # For regular expression matching
import pandas as pd
import csv


### Parsing the Log file


In [19]:
# Change the path of log file to perform the analysis on different log file 
log_file = "sample.log"  # Path to the log file, Here we can also prompt the user to upload the log file if necessary
logs = [] 

log_pattern = r'(?P<ip>\S+) - - \[.*\] "(?P<method>\S+) (?P<path>\S+) \S+" (?P<status>\d+) .*'  # Regular expression for parsing logs

with open(log_file, "r") as file:
    for line in file:
        match = re.match(log_pattern, line)  # Match the line with the log pattern
        if match:
            logs.append(match.groupdict())  # Store parsed data as a dictionary


### Analyzing the Log File

In [16]:
# Create a DataFrame from the parsed logs as it will be helpful in the analysis
df = pd.DataFrame(logs)

# Requests per IP Address
ip_counts = df['ip'].value_counts()

# Most Accessed Endpoint
endpoint_counts = df['path'].value_counts().head(1) 

# Suspicious Activity (Failed Login Attempts)
failed_login_counts = df[df['status'] == '401']['ip'].value_counts() 
suspicious_activity = failed_login_counts[failed_login_counts > 10]  # Filter IPs with more than 10 failed attempts

df.head()


Unnamed: 0,ip,method,path,status
0,192.168.1.1,GET,/home,200
1,203.0.113.5,POST,/login,401
2,10.0.0.2,GET,/about,200
3,192.168.1.1,GET,/contact,200
4,198.51.100.23,POST,/register,200


### Saving Results to a CSV File

In [17]:
output_file = "log_analysis_results.csv"  # Output file path

with open(output_file, "w", newline='') as csvfile:
    writer = csv.writer(csvfile)

    # Write Requests per IP to CSV
    writer.writerow(["Requests per IP"])
    writer.writerow(["IP Address", "Request Count"])
    writer.writerows(ip_counts.items())

    # Write Most Accessed Endpoint to CSV
    writer.writerow([])
    writer.writerow(["Most Accessed Endpoint"])
    writer.writerow(["Endpoint", "Access Count"])
    writer.writerow([endpoint_counts.index[0], endpoint_counts.iloc[0]])

    # Write Suspicious Activity to CSV
    writer.writerow([])
    writer.writerow(["Suspicious Activity"])
    writer.writerow(["IP Address", "Failed Login Count"])
    writer.writerows(suspicious_activity.items())


### Printing Results to the Console

In [18]:
# Print Requests per IP
print("\nRequests per IP:")
print(f"{'IP Address':<20} {'Request Count'}")
for ip, count in ip_counts.items():
    print(f"{ip:<20} {count}")

# Print Most Accessed Endpoint
print("\nMost Frequently Accessed Endpoint:")
print(f"{endpoint_counts.index[0]} (Accessed {endpoint_counts.iloc[0]} times)")

# Print Suspicious Activity
print("\nSuspicious Activity Detected:")
print(f"{'IP Address':<20} {'Failed Login Attempts'}")
for ip, count in suspicious_activity.items():
    print(f"{ip:<20} {count}")



Requests per IP:
IP Address           Request Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
IP Address           Failed Login Attempts
