In [1]:
%pip install pyarrow

import pandas as pd
import random
from datetime import datetime, timedelta

# Generate simulated log data
log_levels = ['INFO', 'ERROR', 'WARNING']
messages = [
    'User logged in',
    'File not found',
    'System overload',
    'Database connection failed',
    'Process completed'
]

def generate_logs(num_entries=50):
    logs = []
    current_time = datetime.now()
    for _ in range(num_entries):
        log_time = current_time - timedelta(minutes=random.randint(0, 60))
        log_level = random.choice(log_levels)
        message = random.choice(messages)
        logs.append([log_time.strftime('%Y-%m-%d %H:%M:%S'), log_level, message])
    return logs

# Create DataFrame of logs
log_data = generate_logs()
log_df = pd.DataFrame(log_data, columns=['Timestamp', 'LogLevel', 'Message'])

# Function to filter logs by log level
def filter_logs_by_level(log_df, log_level):
    return log_df[log_df['LogLevel'] == log_level]

# Filter ERROR logs
error_logs = filter_logs_by_level(log_df, 'ERROR')

# Correlate ERROR and WARNING events within a 10-minute window
def correlate_events(log_df, event1, event2, time_window=10):
    correlation_list = []
    for i in range(len(log_df) - 1):
        if log_df.iloc[i]['LogLevel'] == event1:
            for j in range(i + 1, len(log_df)):
                time_diff = pd.to_datetime(log_df.iloc[j]['Timestamp']) - pd.to_datetime(log_df.iloc[i]['Timestamp'])
                if log_df.iloc[j]['LogLevel'] == event2 and time_diff <= timedelta(minutes=time_window):
                    correlation_list.append((log_df.iloc[i], log_df.iloc[j]))
                    break
    return correlation_list

# Correlate ERROR and WARNING events
correlated_events = correlate_events(log_df, 'ERROR', 'WARNING')

# Convert correlation list to DataFrame for visualization
correlated_df = pd.DataFrame([{
    'Event 1 Time': event[0]['Timestamp'],
    'Event 1': event[0]['LogLevel'] + " - " + event[0]['Message'],
    'Event 2 Time': event[1]['Timestamp'],
    'Event 2': event[1]['LogLevel'] + " - " + event[1]['Message'],
} for event in correlated_events])

# Display the correlated events
correlated_df





Unnamed: 0,Event 1 Time,Event 1,Event 2 Time,Event 2
0,2024-10-03 08:29:11,ERROR - Process completed,2024-10-03 08:07:11,WARNING - System overload
1,2024-10-03 08:03:11,ERROR - User logged in,2024-10-03 08:07:11,WARNING - System overload
2,2024-10-03 08:02:11,ERROR - User logged in,2024-10-03 08:07:11,WARNING - System overload
3,2024-10-03 08:38:11,ERROR - Process completed,2024-10-03 08:40:11,WARNING - Database connection failed
4,2024-10-03 08:07:11,ERROR - File not found,2024-10-03 07:46:11,WARNING - File not found
5,2024-10-03 08:25:11,ERROR - System overload,2024-10-03 08:30:11,WARNING - File not found
6,2024-10-03 08:19:11,ERROR - File not found,2024-10-03 08:28:11,WARNING - File not found
7,2024-10-03 08:35:11,ERROR - Process completed,2024-10-03 08:28:11,WARNING - File not found
8,2024-10-03 08:11:11,ERROR - Database connection failed,2024-10-03 07:46:11,WARNING - File not found
9,2024-10-03 07:45:11,ERROR - User logged in,2024-10-03 07:46:11,WARNING - File not found
