<a href="https://colab.research.google.com/github/Abi01Er/Abi01Er/blob/main/IP_Address.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import re
import csv
from collections import defaultdict

# Function to parse the log file and return log entries
def parse_log_file(file_path):
    with open(file_path, 'r') as file:
        logs = file.readlines()
    return logs

# Function to count requests per IP address
def count_requests_by_ip(logs):
    ip_counts = defaultdict(int)
    ip_pattern = r'^(\d+\.\d+\.\d+\.\d+)'
    for log in logs:
        match = re.match(ip_pattern, log)
        if match:
            ip = match.group(1)
            ip_counts[ip] += 1
    return dict(sorted(ip_counts.items(), key=lambda x: x[1], reverse=True))

# Function to find the most frequently accessed endpoint
def most_frequent_endpoint(logs):
    endpoint_counts = defaultdict(int)
    endpoint_pattern = r'"[A-Z]+\s(/[^ ]*)\s'
    for log in logs:
        match = re.search(endpoint_pattern, log)
        if match:
            endpoint = match.group(1)
            endpoint_counts[endpoint] += 1
    most_accessed = max(endpoint_counts.items(), key=lambda x: x[1])
    return most_accessed

# Function to detect suspicious activity
def detect_suspicious_activity(logs, threshold=10):
    failed_attempts = defaultdict(int)
    failed_pattern = r'^(\d+\.\d+\.\d+\.\d+).+"POST\s.+HTTP/1\.\d"\s401'
    for log in logs:
        match = re.search(failed_pattern, log)
        if match:
            ip = match.group(1)
            failed_attempts[ip] += 1
    suspicious_ips = {ip: count for ip, count in failed_attempts.items() if count > threshold}
    return suspicious_ips

# Function to save results to a CSV file
def save_to_csv(ip_counts, most_accessed, suspicious_ips, output_file="log_analysis_results.csv"):
    with open(output_file, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)

        # Requests per IP
        writer.writerow(["IP Address", "Request Count"])
        for ip, count in ip_counts.items():
            writer.writerow([ip, count])

        # Most Accessed Endpoint
        writer.writerow([])
        writer.writerow(["Most Frequently Accessed Endpoint"])
        writer.writerow(["Endpoint", "Access Count"])
        writer.writerow([most_accessed[0], most_accessed[1]])

        # Suspicious Activity
        writer.writerow([])
        writer.writerow(["Suspicious Activity Detected"])
        writer.writerow(["IP Address", "Failed Login Attempts"])
        for ip, count in suspicious_ips.items():
            writer.writerow([ip, count])

# Main function
def main():
    log_file = "sample.log"
    logs = parse_log_file(log_file)

    # Analysis
    ip_counts = count_requests_by_ip(logs)
    most_accessed = most_frequent_endpoint(logs)
    suspicious_ips = detect_suspicious_activity(logs)

    # Display results
    print("IP Address           Request Count")
    for ip, count in ip_counts.items():
        print(f"{ip:<20} {count}")

    print("\nMost Frequently Accessed Endpoint:")
    print(f"{most_accessed[0]} (Accessed {most_accessed[1]} times)")

    print("\nSuspicious Activity Detected:")
    if suspicious_ips:
        print("IP Address           Failed Login Attempts")
        for ip, count in suspicious_ips.items():
            print(f"{ip:<20} {count}")
    else:
        print("No suspicious activity detected.")

    # Save results to CSV
    save_to_csv(ip_counts, most_accessed, suspicious_ips)
    print("\nResults saved to log_analysis_results.csv")

if __name__ == "__main__":
    main()


IP Address           Request Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5

Most Frequently Accessed Endpoint:
/login (Accessed 13 times)

Suspicious Activity Detected:
No suspicious activity detected.

Results saved to log_analysis_results.csv
