In [1]:
pip install faker

Note: you may need to restart the kernel to use updated packages.


In [2]:
# Installing important libraries
import pandas as pd
import random
from faker import Faker
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings("ignore")

In [3]:
fake = Faker()

In [4]:
# Configuration
num_logs = 10000
output_csv = "b2b_logs.csv"
output_json = "b2b_logs.json"

companies = ["Tcs", "Infosys", "Wipro", "IBM", "Accenture", "Cognizant", "HCL", "Tech Mahindra"]
applications = ["ERP", "CRM", "HRMS", "Ticketing", "API-Gateway", "Inventory", "Finance"]
endpoints = ["/api/login", "/api/logout", "/api/invoice", "/api/orders", "/api/tickets", "/api/employees", "/api/payrolls", "/api/attendence"]
status_codes = [200,200,200,200,400,401,403,404,500,502,503]
messages = {
    200: "Request Successfull",
    400: "Bad request - Invalid Input",
    401: "Unauthorized access attempt",
    403: "Forbidden resource access",
    404: "Resource not found",
    500: "Internal server error",
    502: "Bad gateway",
    503: "Service unavailable"
}

# Generating timestamp for 10 days
start_time = datetime.now() - timedelta(days=10)

In [5]:
logs = []
for i in range(num_logs):
    ts = start_time + timedelta(seconds=i*60)
    status = random.choice(status_codes)
    log_level = "ERROR" if status >= 400 else random.choice(["INFO", "DEBUG", "WARN"])

    log_entry = {
         "timestamp": ts.strftime("%Y-%m-%d %H:%M:%S"),
         "company": random.choice(companies),
         "application": random.choice(applications),
         "log_level": log_level,
         "endpoint": random.choice(endpoints),
         "status_code": status,
         "response_time_ms": random.randint(50,3000),
         "message": messages.get(status, fake.sentence(nb_words = 6)),
         "user_id": f"emp_{random.randint(1000,9999)}"
    }
    logs.append(log_entry)

In [6]:
df = pd.DataFrame(logs)
df

Unnamed: 0,timestamp,company,application,log_level,endpoint,status_code,response_time_ms,message,user_id
0,2025-08-17 23:08:15,Tcs,CRM,ERROR,/api/payrolls,401,1160,Unauthorized access attempt,emp_9396
1,2025-08-17 23:09:15,Accenture,Finance,ERROR,/api/orders,500,1854,Internal server error,emp_2708
2,2025-08-17 23:10:15,Wipro,HRMS,DEBUG,/api/attendence,200,1507,Request Successfull,emp_4117
3,2025-08-17 23:11:15,Infosys,HRMS,ERROR,/api/login,400,1996,Bad request - Invalid Input,emp_3616
4,2025-08-17 23:12:15,IBM,ERP,ERROR,/api/orders,502,1004,Bad gateway,emp_4342
...,...,...,...,...,...,...,...,...,...
9995,2025-08-24 21:43:15,Accenture,Inventory,DEBUG,/api/orders,200,1579,Request Successfull,emp_2498
9996,2025-08-24 21:44:15,Infosys,API-Gateway,INFO,/api/logout,200,588,Request Successfull,emp_7090
9997,2025-08-24 21:45:15,Wipro,HRMS,ERROR,/api/employees,500,1962,Internal server error,emp_9900
9998,2025-08-24 21:46:15,Tcs,Ticketing,ERROR,/api/login,502,1733,Bad gateway,emp_5000


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 9 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   timestamp         10000 non-null  object
 1   company           10000 non-null  object
 2   application       10000 non-null  object
 3   log_level         10000 non-null  object
 4   endpoint          10000 non-null  object
 5   status_code       10000 non-null  int64 
 6   response_time_ms  10000 non-null  int64 
 7   message           10000 non-null  object
 8   user_id           10000 non-null  object
dtypes: int64(2), object(7)
memory usage: 703.3+ KB


In [8]:
# Dropping duplicates
df.drop_duplicates(inplace = True)

In [9]:
# Cheking missing value
df.isna().sum()

timestamp           0
company             0
application         0
log_level           0
endpoint            0
status_code         0
response_time_ms    0
message             0
user_id             0
dtype: int64

In [10]:
# Standardize datatype 
df['timestamp'] = pd.to_datetime(df['timestamp']) 

In [11]:
df.to_csv(output_csv, index=False)
df.to_json(output_json, index =False)