<a href="https://colab.research.google.com/github/TamannaSheikh15/log-analysis/blob/main/log_analysis_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import re
import csv
from collections import defaultdict
from google.colab import files

In [19]:
# Log file name
log_file = 'sample.log'

In [29]:
#  Initialize variables for analysis
ip_requests = defaultdict(int)
endpoint_requests = defaultdict(int)
failed_logins = defaultdict(int)

In [30]:
# Regular expressions to extract data
ip_pattern = re.compile(r'(\d+\.\d+\.\d+\.\d+)')
endpoint_pattern = re.compile(r'"[A-Z]+\s(/[\w\-/]+)\s')
status_code_pattern = re.compile(r'\s(\d{3})\s')
failed_login_message = "Invalid credentials"

In [31]:
#  Process the log file
try:
    with open(log_file, 'r') as file:
        for line in file:
            # Extract IP address
            ip_match = ip_pattern.search(line)
            if ip_match:
                ip_requests[ip_match.group(1)] += 1

            # Extract endpoint
            endpoint_match = endpoint_pattern.search(line)
            if endpoint_match:
                endpoint_requests[endpoint_match.group(1)] += 1

            # Detect failed login attempts
            if '401' in line or failed_login_message in line:
                if ip_match:
                    failed_logins[ip_match.group(1)] += 1
except FileNotFoundError:
    print(f"Error: {log_file} not found. Please upload the file correctly.")


In [32]:
#  Analyze results
# Most accessed endpoint
most_accessed_endpoint = max(endpoint_requests.items(), key=lambda x: x[1])

In [33]:
#  Save results to a CSV file
csv_file = 'log_analysis_results.csv'
with open(csv_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)

    # Write IP request counts
    writer.writerow(['IP Address', 'Request Count'])
    for ip, count in ip_requests.items():
        writer.writerow([ip, count])

    # Write the most accessed endpoint
    writer.writerow([])
    writer.writerow(['Most Accessed Endpoint', 'Access Count'])
    writer.writerow([most_accessed_endpoint[0], most_accessed_endpoint[1]])

    # Write failed login attempts
    writer.writerow([])
    writer.writerow(['IP Address', 'Failed Login Attempts'])
    for ip, count in failed_logins.items():
        writer.writerow([ip, count])

print(f"Analysis complete. Results saved to {csv_file}")

Analysis complete. Results saved to log_analysis_results.csv


In [34]:
# Download the results
files.download(csv_file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>