<a href="https://colab.research.google.com/github/Shashikant311/Log-Analysis/blob/main/sample_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#import libraries
import re
import csv

In [None]:
from collections import Counter

In [None]:
#read log file

In [None]:
file = "sample.log"
with open(file,'r') as file:
    lines = file.readlines()

## (1) Count requests per IP Address

In [None]:
#extract ip addresses using regrex

In [None]:
ip_pattern = r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b'

In [None]:
ip_addresses = [re.search(ip_pattern,i).group() for i in lines if re.search(ip_pattern,i)]

In [None]:
#count the request per ip address

In [None]:
ip_count = Counter(ip_addresses)

In [None]:
#sorting and printing results

In [None]:
sorted_counts = sorted(ip_count.items(),key = lambda x: x[1],reverse=True)

In [None]:
print(f"{'IP Address':<20}{'Request Count'}")
print("=" * 30)
for ip,count in sorted_counts:
    print(f"{ip:<20} {count}")

IP Address          Request Count
203.0.113.5          8
198.51.100.23        8
192.168.1.1          7
10.0.0.2             6
192.168.1.100        5


## (2) identifying most frequently accessed endpoints

In [None]:
#extract endpoints using regrex
endpoint_pattern = r'\"[A-Z]+\s(/[\w\-/.]*)'
end_points = [re.search(endpoint_pattern,i).group(1) for i in lines if re.search(endpoint_pattern,i)]

In [None]:
#count endpoint access
end_points_count = Counter(end_points)

In [None]:
#find most frequently accessed endpoints
most_accessed = end_points_count.most_common(1)[0]

In [None]:
#result print
print(f"Most frequently accessed endpoints: {most_accessed[0]}")
print(f"Accessed Count: {most_accessed[1]}")

Most frequently accessed endpoints: /login
Accessed Count: 13


## Detect suspicious activity

In [None]:
threshold = 10

In [None]:
#log entries with HTTP status code 401 or message "Invalid credentials"

In [None]:
failed_login_pattern = r'\b401\b|Invalid credentials'

In [None]:
failed_ip = [
    re.search(r'\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b',i).group()
    for i in lines if re.search(failed_login_pattern,i)
]

In [None]:
#count failed ligin attempts per ip

In [None]:
failed_counts = Counter(failed_ip)

In [None]:
#ip exceeding threshold

In [None]:
suspicious_ip = {ip: count for ip,count in failed_counts.items() if count > threshold}

In [None]:
#print results

In [None]:
if suspicious_ip:
    print(f"Suspicious IPs with more than {threshold} failed login attempts.")
    print(f"{'IP Address':<20} {'Failed Attempts'}")
    print("=" * 40)
    for ip,count in suspicious_ip.items():
        print(f"{ip:<20} {count}")
else:
    print(f"No IPs exceeded the threshold of {threshold} failed login attempts.")

No IPs exceeded the threshold of 10 failed login attempts.


## csv file

In [None]:
with open("log_analysis_results.csv",'w',newline='') as csvfile:
    writer = csv.writer(csvfile)


    writer.writerow(["Requests Per IP"])
    writer.writerow(["IP Address", "Request Count"])
    for ip, count in ip_count.items():
        writer.writerow([ip, count])

    writer.writerow([])

    writer.writerow(["Most Accessed Endpoint"])
    writer.writerow(["Endpoint","Access Count"])
    writer.writerow(most_accessed)

    writer.writerow([])


    writer.writerow(["Suspicious Activity"])
    writer.writerow(["IP Address", "Failed LOgin Count"])
    for ip, count in suspicious_ip.items():
        writer.writerow([ip, count])
print("\nResults saved to 'log_analysis_results.csv'")


Results saved to 'log_analysis_results.csv'
