In [9]:
import re
from collections import defaultdict
from functools import reduce
from datetime import datetime, timedelta  


# Example log file (you can replace this with your actual log file)
log_file_path = "C:\\Users\\Rrish\\Downloads\\log.txt"  # Replace with the actual path to your file
try:
    with open(log_file_path, 'r') as file:
        file_content = file.read()
        # Parse log entries into tuples (timestamp, log_level, message)
        log_entries = [line.strip().split(" - ") for line in file_content.splitlines()]
        parsed_log = [(entry[0], entry[1], entry[2]) for entry in log_entries if len(entry) == 3]

except FileNotFoundError:
    print(f"File '{log_file_path}' not found.")

# Extract timestamps
timestamps = [entry[0] for entry in parsed_log]

# Extract log levels
log_levels = [entry[1] for entry in parsed_log]

# Extract message
log_messages = [entry[2] for entry in parsed_log]

# Calculate log level counts
log_level_counts = {
    "INFO": log_levels.count("INFO"),
    "ERROR": log_levels.count("ERROR"),
    "WARNING": log_levels.count("WARNING"),
    "DEBUG": log_levels.count("DEBUG"),
}

# Extract usernames
username_pattern = r"User logged in: (\w+)|User logged out: (\w+)"
usernames = []
for message in log_messages:
    match = re.search(username_pattern, message)
    if match:
        username = match.group(1) or match.group(2)
        usernames.append(username)

# Count user logins
user_logins = {}
for username in set(usernames):
    user_logins[username] = usernames.count(username)

# Extract file names
file_pattern = r"File uploaded: (\w+\.\w+)"
file_names = []
for message in log_messages:
    match = re.search(file_pattern, message)
    if match:
        file_name = match.group(1)
        file_names.append(file_name)

# Extract failed file uploads
failed_upload_pattern = r"File not found: (\w+\.\w+)"
failed_uploads = [
    match.group(1)
    for message in log_messages
    for match in [re.search(failed_upload_pattern, message)]
    if match
]

# Extract user reminders and count total failures
reminders = map(lambda line: f"Reminder to user: Please upload the file again. Log entry: {line.strip()}",
                failed_uploads)
total_failures = reduce(lambda count, _: count + 1, failed_uploads, 0)

# Extract low disk space warnings
low_disk_space_pattern = r"Disk space (low|critically low)"
low_disk_space_warnings = list(filter(lambda message: re.search(low_disk_space_pattern, message), log_messages))


# Extract invalid authentication token errors
invalid_auth_token_pattern = r"Invalid authentication token"
invalid_auth_token_errors = [
    message
    for message in log_messages
    if re.search(invalid_auth_token_pattern, message)
]

# Extract internal server errors
internal_server_error_pattern = r"Internal server error: database connection failed"
internal_server_errors = list(filter(lambda message: re.search(internal_server_error_pattern, message), log_messages))

# Extract connection reset by peer errors
connection_reset_pattern = r"Connection reset by peer"
connection_reset_errors = list(filter(lambda message: re.search(connection_reset_pattern, message), log_messages))

# Calculate error handling time (time between error and debug)
error_debug_times = []
error_timestamps = [timestamps[i] for i, level in enumerate(log_levels) if level == "ERROR"]
debug_timestamps = [timestamps[i] for i, level in enumerate(log_levels) if level == "DEBUG"]

for error_time in error_timestamps:
    try:
        debug_time = next(time for time in debug_timestamps if time > error_time)
        error_debug_times.append((error_time, debug_time))
    except StopIteration:
        pass


# Convert timestamps to datetime objects for calculation
error_debug_times_datetime = [(datetime.strptime(error_time, "%Y-%m-%d %H:%M:%S"), datetime.strptime(debug_time, "%Y-%m-%d %H:%M:%S")) for error_time, debug_time in error_debug_times]



# Print statistics
print("Log Level Counts:")
for level, count in log_level_counts.items():
    print(f"{level}: {count}")

# Print reminders and total failures
#for reminder in reminders:
#    print(reminder)
print(f"Total number of files failed to upload: {total_failures}")

# Example to organize data by specific user - login time, low disk space warning, authentication error.
user_data = defaultdict(lambda: {"logins": [], "disk_space_warnings": [], "invalid_auth_tokens": [], "failed_uploads": []})
for username, timestamp, message in zip(usernames, timestamps, log_messages):
    user_data[username]["logins"].append(timestamp)

    low_disk_space_pattern = r"Disk space (low|critically low)"
    if re.search(low_disk_space_pattern, message):
        user_data[username]["disk_space_warnings"].append(message)

    invalid_auth_token_pattern = r"Invalid authentication token"
    if re.search(invalid_auth_token_pattern, message):
        user_data[username]["invalid_auth_tokens"].append(message)

    failed_upload_match = re.search(failed_upload_pattern, message)
    if failed_upload_match:
        user_data[username]["failed_uploads"].append(failed_upload_match.group(1))

# Internal Server Errors 
user_internal_server_errors = defaultdict(int)
for username, message in zip(usernames, log_messages):
  if re.search(internal_server_error_pattern, message):
    user_internal_server_errors[username] += 1
      
# Connection Reset by Peer Counts with Username Association
user_connection_reset_counts = defaultdict(int)
for username, message in zip(usernames, log_messages):
  if re.search(connection_reset_pattern, message):
    user_connection_reset_counts[username] += 1


# Example to produce output from data organised for specific user
print("\nUser Data:")
for username, user_info in user_data.items():
    print(f"\nUsername: {username}")
    print("Login timestamps:")
    print("\n".join(user_info["logins"]))
    print("Login frequency: ",user_logins.get(username))
    if user_info["disk_space_warnings"]:
        print(f"  Disk space warnings: {','.join(user_info['disk_space_warnings'])}")
        print(f"  Please increase storage space for this user.")
    if user_info["invalid_auth_tokens"]:
        print(f"  Invalid authentication token errors: {','.join(user_info['invalid_auth_tokens'])}")
        print(f"  This user may have experienced difficulties logging in. This could indicate a security breach.")
    if user_info['failed_uploads']:
        print(f"  Failed Uploads: {user_info['failed_uploads']}")
        for file in user_info['failed_uploads']:
            print(f"  Reminder to user: Please upload the file '{file}' again.")
    if user_internal_server_errors[username]:
        print(f"  Internal Server Errors: {user_internal_server_errors[username]}")
        print("  Database connection failed: database service is down")
    if user_connection_reset_counts[username]:
        print(f"  Connection Reset by Peer: {user_connection_reset_counts[username]}")
        print("  Potential network connectivity issues or server instability")


# Usernames with Internal Server Errors (calculated once)
users_with_internal_errors = [username for username, message in zip(usernames, log_messages) if re.search(internal_server_error_pattern, message)]

# List of Usernames with Internal Server Errors 
if users_with_internal_errors:
  print("\nUsernames with Internal Server Errors:")
  for username in users_with_internal_errors:
    print(username)

# Usernames with Connection Reset (calculated once)
users_with_connection_reset = [username for username, message in zip(usernames, log_messages) if re.search(connection_reset_pattern, message)]

# Print Usernames with Errors (if any)
if users_with_connection_reset:
  print("\nUsernames with Connection Reset by Peer:")
  for username in users_with_connection_reset:
    print(username)



      
# Calculate total time difference and average error-debug time (handle potential division by zero)
total_time_delta = timedelta(seconds=0)  # Initialize total time difference as 0
if error_debug_times_datetime:
  for error_time, debug_time in error_debug_times_datetime:
    time_delta = debug_time - error_time
    total_time_delta += time_delta
  average_error_debug_time = total_time_delta / len(error_debug_times_datetime)
  print(f"\nAverage Error-Debug Time: {average_error_debug_time}")



Log Level Counts:
INFO: 500
ERROR: 214
DEBUG: 143
Total number of files failed to upload: 71

User Data:

Username: username123
Login timestamps:
2024-02-29 08:15:23
2024-02-29 08:16:45
Login frequency:  2

Username: user456
Login timestamps:
2024-02-29 08:17:30
2024-02-29 08:18:02
Login frequency:  2
  Please increase storage space for this user.

Username: user789
Login timestamps:
2024-02-29 08:19:55
2024-02-29 08:20:10
Login frequency:  2

Username: user101
Login timestamps:
2024-02-29 08:21:30
2024-02-29 08:22:15
2024-02-29 11:13:15
2024-02-29 11:13:30
Login frequency:  4
  Internal Server Errors: 1
  Database connection failed: database service is down

Username: user202
Login timestamps:
2024-02-29 08:23:05
2024-02-29 08:24:20
2024-02-29 11:14:45
2024-02-29 11:15:30
Login frequency:  4
  Invalid authentication token errors: Invalid authentication token
  This user may have experienced difficulties logging in. This could indicate a security breach.
  Failed Uploads: ['missing_fil