# Reading the real world log file 

In [6]:
# Read the content of the uploaded log file again
log_file_path = 'cisco_log.txt'

with open(log_file_path, 'r') as file:
    logs = file.readlines()

# Function to parse a single log entry
def parse_log(log):
    pattern = r'^(?P<timestamp>[A-Za-z]+\s+\d+\s+\d+\s+\d+:\d+:\d+(:\d+)?\s*(?:\w{3})?):\s+%(?P<message_id>[^:]+):\s+(?P<message>.+)$'
    match = re.match(pattern, log)
    if match:
        return match.groupdict()
    return None

# Parse all logs
parsed_logs = [parse_log(log) for log in logs]
parsed_logs = [log for log in parsed_logs if log is not None]

# Convert to DataFrame for better readability and further processing
df_logs = pd.DataFrame(parsed_logs)
df_logs.head() #df_logs


Unnamed: 0,timestamp,message_id,message
0,Apr 15 2013 09:36:50,ASA-4-106023,Deny tcp src dmz:10.1.2.30/63016 dst outside:1...
1,Apr 15 2014 09:34:34 EDT,ASA-session-5-106100,access-list acl_in permitted tcp inside/10.1.2...
2,Apr 29 2013 12:59:50,ASA-6-305011,Built dynamic TCP translation from outside:10....
3,Apr 29 2013 12:59:50,ASA-6-302013,Built outbound TCP connection 89743274 for out...
4,Apr 29 2013 12:59:50,ASA-6-305011,Built dynamic UDP translation from outside:10....


In [8]:
# Function to enhance the log message with improved error handling
def enhance_log_message(row):
    message_id = row['message_id']
    message = row['message']
    
    try:
        if message_id.startswith("ASA-4-106023"):
            src = message.split('src ')[1].split(' ')[0]
            dst = message.split('dst ')[1].split(' ')[0]
            access_group = message.split('by access-group ')[1].split(' ')[0]
            return f"Deny TCP connection from source {src} to destination {dst} by access group {access_group}"
        elif message_id.startswith("ASA-session-5-106100"):
            acl = message.split('access-list ')[1].split(' ')[0]
            inside = message.split('inside/')[1].split(' ')[0]
            outside = message.split('-> outside/')[1].split(' ')[0]
            return f"Access-list {acl} permitted TCP connection from inside {inside} to outside {outside}"
        elif message_id.startswith("ASA-6-305011"):
            dynamic_type = message.split('Built dynamic ')[1].split(' ')[0]
            outside = message.split('from outside:')[1].split(' ')[0]
            return f"Built dynamic {dynamic_type} translation from outside {outside}"
        elif message_id.startswith("ASA-6-302013"):
            outside = message.split('for outside:')[1].split(' ')[0]
            return f"Built outbound TCP connection from outside {outside}"
        else:
            return message  # Default to the original message if not matched
    except (IndexError, AttributeError) as e:
        return f"Error parsing message: {message}"

# Apply the enhancement to all log messages
df_logs['enhanced_message'] = df_logs.apply(enhance_log_message, axis=1)

# Display the first few enhanced log messages
df_logs[['timestamp', 'message_id', 'enhanced_message']].head()


Unnamed: 0,timestamp,message_id,enhanced_message
0,Apr 15 2013 09:36:50,ASA-4-106023,Deny TCP connection from source dmz:10.1.2.30/...
1,Apr 15 2014 09:34:34 EDT,ASA-session-5-106100,Access-list acl_in permitted TCP connection fr...
2,Apr 29 2013 12:59:50,ASA-6-305011,Built dynamic TCP translation from outside 10....
3,Apr 29 2013 12:59:50,ASA-6-302013,Built outbound TCP connection from outside 192...
4,Apr 29 2013 12:59:50,ASA-6-305011,Built dynamic UDP translation from outside 10....


In [9]:
# Further enhancing the log messages for better readability
def enhance_log_message_v2(row):
    message_id = row['message_id']
    message = row['enhanced_message']
    
    if message_id.startswith("ASA-4-106023"):
        return f"Security Alert: {message}"
    elif message_id.startswith("ASA-session-5-106100"):
        return f"Session Info: {message}"
    elif message_id.startswith("ASA-6-305011"):
        return f"Translation Event: {message}"
    elif message_id.startswith("ASA-6-302013"):
        return f"Connection Event: {message}"
    else:
        return f"General Log: {message}"

# Apply the enhanced readability to all log messages
df_logs['readable_message'] = df_logs.apply(enhance_log_message_v2, axis=1)

# Display the first few enhanced readable log messages
df_logs[['timestamp', 'message_id', 'readable_message']].head()


Unnamed: 0,timestamp,message_id,readable_message
0,Apr 15 2013 09:36:50,ASA-4-106023,Security Alert: Deny TCP connection from sourc...
1,Apr 15 2014 09:34:34 EDT,ASA-session-5-106100,Session Info: Access-list acl_in permitted TCP...
2,Apr 29 2013 12:59:50,ASA-6-305011,Translation Event: Built dynamic TCP translati...
3,Apr 29 2013 12:59:50,ASA-6-302013,Connection Event: Built outbound TCP connectio...
4,Apr 29 2013 12:59:50,ASA-6-305011,Translation Event: Built dynamic UDP translati...


In [10]:
# Define the path for the output CSV file
output_csv_path = 'enhanced_cisco_logs.csv'

# Select relevant columns to export
export_columns = ['timestamp', 'message_id', 'readable_message']

# Export the DataFrame to a CSV file
df_logs.to_csv(output_csv_path, columns=export_columns, index=False)

print(f"Enhanced logs have been exported to: {output_csv_path}")


Enhanced logs have been exported to: enhanced_cisco_logs.csv
