# Code to do Analysis on network logs

In [20]:
import re
import csv
import pandas as pd
from collections import Counter


def count_requests_per_ip(file_name):
    ip_requests = Counter()
    log_pattern = r'(?P<ip>\d+\.\d+\.\d+\.\d+) - - \[(?P<timestamp>.*?)\] "(?P<method>GET|POST) (?P<endpoint>.*?) HTTP/1.1" (?P<status>\d+) (?P<size>\d+)( "(?P<error_message>.*?)")?'
    
    with open(file_name, "r") as file:
        for line in file:
            match = re.match(log_pattern, line)
            if match:
                ip = match.group("ip")
                ip_requests[ip] += 1

    return ip_requests



def find_most_accessed_endpoint(file_name):
    endpoint_hits = Counter()
    log_pattern = r'(?P<ip>\d+\.\d+\.\d+\.\d+) - - \[(?P<timestamp>.*?)\] "(?P<method>GET|POST) (?P<endpoint>.*?) HTTP/1.1" (?P<status>\d+) (?P<size>\d+)( "(?P<error_message>.*?)")?'
    
    with open(file_name, "r") as file:
        for line in file:
            match = re.match(log_pattern, line)
            if match:
                endpoint = match.group("endpoint")
                endpoint_hits[endpoint] += 1

    if endpoint_hits:
        most_accessed = endpoint_hits.most_common()
        return most_accessed
    else:
        return None
    
    
    
def detect_suspicious_activity(log_file, threshold):
    failed_attempts = Counter()  # Store IP and count of failed attempts
    pattern = r'(?P<ip>\d+\.\d+\.\d+\.\d+) - - \[.*?\] "POST /login HTTP/1.1" 401 .*?"Invalid credentials"'

    with open(log_file, 'r') as file:
        for line in file:
            match = re.search(pattern, line)
            if match:
                ip = match.group('ip')
                failed_attempts[ip] += 1

    # Filter IPs exceeding the threshold
    suspicious_activity = {ip: count for ip, count in failed_attempts.items() if count > threshold}
    return suspicious_activity



def save_to_csv_suspicious_activity(suspicious_activity, filename='suspicious_activity.csv'):
    with open(filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['IP Address', 'Failed Login Count'])
        for ip, count in suspicious_activity.items():
            writer.writerow([ip, count])
    print(f"Suspicious activity saved to '{filename}'.")
    
    
    
def save_results_to_csv(ip_requests, most_accessed_endpoint):
    # Save requests per IP to a CSV file
    with open('requests_per_ip.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['IP Address', 'Request Count'])
        for ip, count in ip_requests.items():
            writer.writerow([ip, count])
    print("Requests per IP saved to 'requests_per_ip.csv'.")

    # Save most accessed endpoints to a CSV file
    with open('most_accessed_endpoint.csv', mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Endpoint', 'Access Count'])
        for endpoint, count in most_accessed_endpoint:  
            writer.writerow([endpoint, count])
    print("All accessed endpoints saved to 'most_accessed_endpoint.csv'.")

# Function to display the results
def display_results(ip_requests, most_accessed_endpoint, suspicious_activity):
    # Display the most requested IP addresses
    print("IP Address           Request Count")
    for ip, count in ip_requests.most_common():
        print(f"{ip:20} {count}")
  
    if most_accessed_endpoint:
        most_frequent = most_accessed_endpoint[0]  
        print("\nMost Frequently Accessed Endpoint:")
        print(f"{most_frequent[0]} (Accessed {most_frequent[1]} times)")
    else:
        print("\nNo endpoints were found in the log file.")
    
    # Display suspicious activity
    print("\nSuspicious Activity Detected:")
    if suspicious_activity:
        print("IP Address           Failed Login Attempts")
        for ip, count in suspicious_activity.items():
            print(f"{ip:20} {count}")
    else:
        print("No suspicious activity detected.")

# Main function to run the log analysis
def run_log_analysis(file_name, threshold=5):
    ip_requests = count_requests_per_ip(file_name)   
    most_accessed_endpoint = find_most_accessed_endpoint(file_name)
    suspicious_activity = detect_suspicious_activity(file_name, threshold)

    save_results_to_csv(ip_requests, most_accessed_endpoint)
    save_to_csv_suspicious_activity(suspicious_activity)
    display_results(ip_requests, most_accessed_endpoint, suspicious_activity)

# Run the analysis
LOG_FILE = "access_log.txt"  # Replace with your log file
FAILURE_THRESHOLD = 7
run_log_analysis(LOG_FILE, FAILURE_THRESHOLD)


Requests per IP saved to 'requests_per_ip.csv'.
All accessed endpoints saved to 'most_accessed_endpoint.csv'.


TypeError: open() missing 1 required positional argument: 'file'

# Analysis over log result

In [11]:
df1= pd.read_csv("requests_per_ip.csv")

In [12]:
df1

Unnamed: 0,IP Address,Request Count
0,192.168.1.1,7
1,203.0.113.5,8
2,10.0.0.2,6
3,198.51.100.23,8
4,192.168.1.100,5


In [13]:
df2 = pd.read_csv("most_accessed_endpoint.csv")

In [14]:
df2

Unnamed: 0,Endpoint,Access Count
0,/login,13
1,/home,5
2,/about,5
3,/dashboard,3
4,/contact,2
5,/register,2
6,/profile,2
7,/feedback,2


In [15]:
df3 = pd.read_csv("suspicious_activity.csv")

In [16]:
df3

Unnamed: 0,IP Address,Failed Login Count
0,203.0.113.5,8
