In [None]:
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras import regularizers

In [None]:
def generate_log_dataset(num_samples=1000):
    # Normal log pattern: lower entropy
    normal_logs = [random.choices([0, 1], weights=[0.8, 0.2], k=20) for _ in range(num_samples)]
    
    # Anomalous logs: higher entropy / strange activity
    anomaly_logs = [random.choices([0, 1], weights=[0.5, 0.5], k=20) for _ in range(num_samples // 5)]

    X = np.array(normal_logs + anomaly_logs)
    y = np.array([0]*len(normal_logs) + [1]*len(anomaly_logs))  # 0 = normal, 1 = anomaly

    return pd.DataFrame(X), y

In [None]:
def build_autoencoder(input_dim):
    input_layer = Input(shape=(input_dim,))
    encoded = Dense(16, activation="relu", activity_regularizer=regularizers.l1(1e-5))(input_layer)
    encoded = Dense(8, activation="relu")(encoded)
    decoded = Dense(16, activation='relu')(encoded)
    output_layer = Dense(input_dim, activation='sigmoid')(decoded)

    autoencoder = Model(inputs=input_layer, outputs=output_layer)
    autoencoder.compile(optimizer='adam', loss='mse')
    return autoencoder

In [None]:
def train_anomaly_detector(X, y):
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

    # Train only on normal logs
    X_train_normal = X_train[y_train == 0]
    autoencoder = build_autoencoder(X.shape[1])
    autoencoder.fit(X_train_normal, X_train_normal, epochs=20, batch_size=32, validation_split=0.1, verbose=1)

    # Predict on test set
    X_test_pred = autoencoder.predict(X_test)
    mse = np.mean(np.power(X_test - X_test_pred, 2), axis=1)

    threshold = np.percentile(mse, 95)
    print(f"Anomaly Threshold: {threshold:.4f}")

    y_pred = (mse > threshold).astype(int)