In [1]:
log_data = """127.0.0.1 - - [10/Oct/2024:13:55:36] "GET /home HTTP/1.1" 200
192.168.1.10 - - [10/Oct/2024:13:56:01] "POST /login HTTP/1.1" 401
192.168.1.10 - - [10/Oct/2024:13:56:10] "POST /login HTTP/1.1" 401
10.0.0.5 - - [10/Oct/2024:13:57:22] "GET /dashboard HTTP/1.1" 200
10.0.0.5 - - [10/Oct/2024:13:57:45] "GET /admin HTTP/1.1" 403
"""

with open("access.log", "w") as f:
    f.write(log_data)

print("access.log created")


access.log created


In [2]:
import pandas as pd
import re

with open("access.log", "r") as f:
    logs = f.readlines()

pattern = re.compile(
    r'(\d+\.\d+\.\d+\.\d+).+"(GET|POST)\s(.*?)\sHTTP.*"\s(\d{3})'
)

data = []
for line in logs:
    match = pattern.search(line)
    if match:
        data.append(match.groups())

df = pd.DataFrame(
    data,
    columns=["IP", "Method", "Endpoint", "Status"]
)

df


Unnamed: 0,IP,Method,Endpoint,Status
0,127.0.0.1,GET,/home,200
1,192.168.1.10,POST,/login,401
2,192.168.1.10,POST,/login,401
3,10.0.0.5,GET,/dashboard,200
4,10.0.0.5,GET,/admin,403


In [3]:
kpis = {
    "Total Requests": len(df),
    "Unique IPs": df["IP"].nunique(),
    "GET Requests": (df["Method"] == "GET").sum(),
    "POST Requests": (df["Method"] == "POST").sum(),
    "Error Requests (4xx/5xx)": (df["Status"].astype(int) >= 400).sum()
}

kpis


{'Total Requests': 5,
 'Unique IPs': 3,
 'GET Requests': np.int64(3),
 'POST Requests': np.int64(2),
 'Error Requests (4xx/5xx)': np.int64(3)}

In [4]:
with open("day2_log_kpi_report.txt", "w") as f:
    for k, v in kpis.items():
        f.write(f"{k}: {v}\n")

print("day2_log_kpi_report.txt created")


day2_log_kpi_report.txt created
