In [None]:
import json
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix

def load_data(path):
    with open(path, "r") as f:
        data = json.load(f)

    df = pd.DataFrame(d["kernel"]["syscall_counts"] for d in data).fillna(0)
    labels = [d["label"] for d in data]
    return df, labels

train = "/content/training_data_kernel_activity.json"
validate = "/content/normal_validation.json"
test = "/content/all_attacks.json"

X_train, _ = load_data(train)
X_validate, y_validate = load_data(validate)
X_test, y_test = load_data(test)

all_columns = sorted(set(X_train.columns) | set(X_validate.columns) | set(X_test.columns))
X_train = X_train.reindex(columns=all_columns, fill_value=0)
X_validate = X_validate.reindex(columns=all_columns, fill_value=0)
X_test = X_test.reindex(columns=all_columns, fill_value=0)

model = IsolationForest(n_estimators=200, contamination=0.05, random_state=42)
model.fit(X_train)

# Predictions
pred_val = model.predict(X_validate)
pred_test = model.predict(X_test)

# Build true labels
y_val_true = [0] * len(y_validate)  # normal validation
y_test_true = [1] * len(y_test)     # attack test

# Build predictions (convert IsolationForest outputs)
pred_val = [1 if p == -1 else 0 for p in pred_val]
pred_test = [1 if p == -1 else 0 for p in pred_test]

# Combine both sets
y_true = y_val_true + y_test_true
y_pred = pred_val + pred_test

# Now report
print(classification_report(y_true, y_pred, target_names=["Normal", "Attack"]))

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
tn, fp, fn, tp = cm.ravel()

print(f"Model Performance (based on anomaly prediction vs actual attack):")
print(f"True Positives (correctly identified attacks): {tp}")
print(f"True Negatives (correctly identified normal): {tn}")
print(f"False Positives (normal incorrectly flagged as attack): {fp}")
print(f"False Negatives (attacks missed, flagged as normal): {fn}")

              precision    recall  f1-score   support

      Normal       0.84      0.91      0.88      4372
      Attack       0.00      0.00      0.00       746

    accuracy                           0.78      5118
   macro avg       0.42      0.46      0.44      5118
weighted avg       0.72      0.78      0.75      5118

Model Performance (based on anomaly prediction vs actual attack):
True Positives (correctly identified attacks): 1
True Negatives (correctly identified normal): 3994
False Positives (normal incorrectly flagged as attack): 378
False Negatives (attacks missed, flagged as normal): 745


Advanve implementation of isolation forest with Feature Engineering and scaling  

In [None]:
import json
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import numpy as np

# Paths to data files
train = "/content/training_data_kernel_activity.json"
validate = "/content/normal_validation.json"
test = "/content/all_attacks.json"

def optimized_feature_extraction(logs):
    # Pre-defined security-relevant weights
    syscall_weights = {
        # Critical (5x weight)
        'reboot': 5.0, 'kexec_load': 5.0, 'syscall_265': 5.0,
        # High risk (3x weight)
        'capset': 3.0, 'setuid': 3.0, 'setgid': 3.0,
        'syscall_252': 3.0, 'syscall_258': 3.0,
        # Medium risk (2x weight)
        'execve': 2.0, 'clone': 2.0, 'ptrace': 2.0,
        # Low risk (0.5x weight - suppress noise)
        'getuid': 0.5, 'read': 0.5, 'write': 0.5
    }

    features = []
    for log in logs:
        syscalls = log["kernel"]["syscall_counts"]
        total = sum(syscalls.values())

        # 1. Weighted syscall counts
        weighted_counts = []
        for sc, cnt in syscalls.items():
            weight = syscall_weights.get(sc, 1.0)  # Default weight=1
            weighted_counts.append(cnt * weight)

        # 2. Security-relevant ratios
        priv_esc = sum(syscalls.get(sc, 0) for sc in ['capset', 'setuid', 'setgid'])
        suspicious = sum(syscalls.get(sc, 0) for sc in ['reboot', 'kexec_load', 'syscall_265'])
        process_mgmt = sum(syscalls.get(sc, 0) for sc in ['execve', 'clone', 'fork'])

        # 3. Non-linear transforms
        features.append([
            np.log1p(sum(weighted_counts)),  # Total weighted activity
            priv_esc / max(1, total),        # Privilege escalation index
            suspicious / max(1, total),       # Suspicious activity index
            process_mgmt / max(1, total),     # Process manipulation index
            int('reboot' in syscalls),        # Critical red flag
            int('syscall_265' in syscalls),   # Unknown syscall flag
            len(syscalls),                    # Unique syscall variety
            total                             # Total system activity
        ])

    return pd.DataFrame(features, columns=[
        'weighted_activity',
        'priv_esc_ratio',
        'suspicious_ratio',
        'process_mgmt_ratio',
        'has_reboot',
        'has_unknown265',
        'unique_syscalls',
        'total_activity'
    ])

def load_data_and_extract_features(path):
    """Load JSON data and extract features + labels"""
    with open(path, "r") as f:
        data = json.load(f)
    features = optimized_feature_extraction(data)
    labels = [1 if d["label"] == "attack" else 0 for d in data]
    return features, labels

# Load and process all datasets
X_train, y_train = load_data_and_extract_features(train)
X_validate, y_validate = load_data_and_extract_features(validate)
X_test, y_test = load_data_and_extract_features(test)

# Create model pipeline
model = Pipeline([
    ('scaler', StandardScaler()),
    ('iso_forest', IsolationForest(
        n_estimators=500,
        max_samples=256,
        contamination=0.1,
        random_state=42,
        verbose=1
    ))
])

# Train only on training data
model.fit(X_train)

# Make predictions
pred_val = model.predict(X_validate)
pred_test = model.predict(X_test)

# Convert predictions: -1=anomaly(attack), 1=normal
pred_val_binary = [1 if p == -1 else 0 for p in pred_val]
pred_test_binary = [1 if p == -1 else 0 for p in pred_test]

# Combine validation and test results
y_true_combined = y_validate + y_test
y_pred_combined = pred_val_binary + pred_test_binary

# Generate evaluation report
print(classification_report(
    y_true_combined,
    y_pred_combined,
    target_names=["Normal", "Attack"]
))

# Confusion matrix
cm = confusion_matrix(y_true_combined, y_pred_combined)
tn, fp, fn, tp = cm.ravel()

print("Model Performance:")
print(f"True Positives (correctly identified attacks): {tp}")
print(f"True Negatives (correctly identified normal): {tn}")
print(f"False Positives (normal flagged as attack): {fp}")
print(f"False Negatives (attacks missed): {fn}")

[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s finished
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    0.1s finished
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


              precision    recall  f1-score   support

      Normal       0.91      0.78      0.84      4372
      Attack       0.31      0.57      0.40       746

    accuracy                           0.75      5118
   macro avg       0.61      0.68      0.62      5118
weighted avg       0.83      0.75      0.78      5118

Model Performance:
True Positives (correctly identified attacks): 426
True Negatives (correctly identified normal): 3430
False Positives (normal flagged as attack): 942
False Negatives (attacks missed): 320


[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    0.2s finished
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 500 out of 500 | elapsed:    0.1s finished


advance feature engineering

In [None]:
import json
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import numpy as np
from collections import Counter

# Paths to data files
train = "/content/training_data_kernel_activity.json"
validate = "/content/normal_validation.json"
test = "/content/all_attacks.json"

def enhanced_feature_extraction(logs):
    """Extract comprehensive features with focus on attack patterns"""

    # Enhanced syscall weights based on security research
    syscall_weights = {
        # Critical system modification (10x weight)
        'reboot': 10.0, 'kexec_load': 10.0, 'pivot_root': 10.0, 'mount': 8.0,
        'umount2': 8.0, 'quotactl': 8.0, 'swapon': 8.0,

        # Privilege escalation (8x weight)
        'capset': 8.0, 'setuid': 8.0, 'setgid': 8.0, 'setfsuid': 7.0,
        'setgroups': 7.0, 'setregid': 7.0, 'setdomainname': 7.0,

        # Unknown/suspicious syscalls (9x weight)
        'syscall_265': 9.0, 'syscall_252': 8.0, 'syscall_258': 8.0,
        'syscall_254': 7.0, 'syscall_255': 7.0, 'syscall_256': 7.0,
        'syscall_259': 7.0, 'syscall_266': 7.0, 'syscall_268': 7.0,
        'syscall_269': 7.0, 'syscall_270': 7.0, 'syscall_272': 7.0,

        # Process manipulation (6x weight)
        'execve': 6.0, 'clone': 6.0, 'ptrace': 8.0, 'unshare': 7.0,

        # Memory manipulation (5x weight)
        'mlock': 5.0, 'mlockall': 5.0, 'munlock': 4.0, 'munlockall': 4.0,
        'mprotect': 5.0, 'mremap': 5.0, 'remap_file_pages': 6.0,

        # IPC and advanced features (4x weight)
        'keyctl': 6.0, 'msgctl': 4.0, 'msgget': 4.0, 'semctl': 4.0,
        'shmctl': 4.0, 'nfsservctl': 6.0,

        # Scheduler manipulation (4x weight)
        'sched_setscheduler': 4.0, 'sched_setparam': 4.0, 'ioprio_set': 4.0,

        # Common benign syscalls (reduce noise)
        'getuid': 0.3, 'getgid': 0.3, 'getpid': 0.3, 'getppid': 0.3,
        'read': 0.4, 'write': 0.4, 'close': 0.3, 'brk': 0.3,
        'geteuid': 0.3, 'getegid': 0.3
    }

    features = []
    for log in logs:
        syscalls = log["kernel"]["syscall_counts"]
        total = max(1, sum(syscalls.values()))

        # 1. Enhanced weighted counts
        weighted_counts = []
        critical_count = 0
        unknown_syscall_count = 0
        privilege_ops = 0
        memory_ops = 0
        process_ops = 0

        for sc, cnt in syscalls.items():
            weight = syscall_weights.get(sc, 1.0)
            weighted_counts.append(cnt * weight)

            # Category counts
            if weight >= 8.0:
                critical_count += cnt
            if sc.startswith('syscall_'):
                unknown_syscall_count += cnt
            if sc in ['capset', 'setuid', 'setgid', 'setfsuid', 'setgroups', 'setregid']:
                privilege_ops += cnt
            if sc in ['mlock', 'mlockall', 'mprotect', 'mremap', 'remap_file_pages']:
                memory_ops += cnt
            if sc in ['execve', 'clone', 'ptrace', 'unshare']:
                process_ops += cnt

        # 2. Statistical features
        syscall_counts = list(syscalls.values())
        max_syscall_count = max(syscall_counts) if syscall_counts else 0
        variance = np.var(syscall_counts) if len(syscall_counts) > 1 else 0

        # 3. Ratio features (normalized by total)
        critical_ratio = critical_count / total
        unknown_ratio = unknown_syscall_count / total
        privilege_ratio = privilege_ops / total
        memory_ratio = memory_ops / total
        process_ratio = process_ops / total

        # 4. Red flag indicators
        red_flags = 0
        for dangerous_sc in ['reboot', 'kexec_load', 'syscall_265', 'pivot_root', 'ptrace']:
            if dangerous_sc in syscalls:
                red_flags += 1

        # 5. Activity patterns
        unique_syscalls = len(syscalls)
        entropy = -sum((c/total) * np.log2(c/total) for c in syscall_counts if c > 0)

        # 6. Frequency analysis
        rare_syscalls = sum(1 for sc in syscalls.keys()
                           if sc in ['kexec_load', 'pivot_root', 'quotactl', 'nfsservctl',
                                   'keyctl', 'unshare', 'remap_file_pages'] +
                           [f'syscall_{i}' for i in range(252, 340)])

        features.append([
            np.log1p(sum(weighted_counts)),    # Total weighted activity (log transformed)
            critical_ratio,                    # Critical operations ratio
            unknown_ratio,                     # Unknown syscalls ratio
            privilege_ratio,                   # Privilege escalation ratio
            memory_ratio,                      # Memory manipulation ratio
            process_ratio,                     # Process manipulation ratio
            red_flags,                         # Number of red flag syscalls
            rare_syscalls,                     # Count of rare syscalls
            unique_syscalls,                   # Diversity of syscalls
            max_syscall_count / total,         # Most frequent syscall ratio
            np.log1p(variance),               # Syscall count variance (log)
            entropy,                          # Syscall distribution entropy
            np.log1p(total),                  # Total activity (log)
            int('reboot' in syscalls),        # Specific red flags
            int('kexec_load' in syscalls),
            int('syscall_265' in syscalls),
            int('ptrace' in syscalls),
            int('pivot_root' in syscalls),
            # Combination features
            critical_ratio * unique_syscalls,  # Critical diversity
            unknown_ratio * red_flags,         # Suspicious combination
        ])

    return pd.DataFrame(features, columns=[
        'weighted_activity', 'critical_ratio', 'unknown_ratio', 'privilege_ratio',
        'memory_ratio', 'process_ratio', 'red_flags', 'rare_syscalls',
        'unique_syscalls', 'max_syscall_ratio', 'variance', 'entropy',
        'total_activity', 'has_reboot', 'has_kexec', 'has_unknown265',
        'has_ptrace', 'has_pivot_root', 'critical_diversity', 'suspicious_combo'
    ])

def load_data_and_extract_features(path):
    """Load JSON data and extract features + labels"""
    with open(path, "r") as f:
        data = json.load(f)
    features = enhanced_feature_extraction(data)
    labels = [1 if d["label"] == "attack" else 0 for d in data]
    return features, labels

def find_optimal_threshold(y_true, anomaly_scores, priority='recall'):
    """Find optimal threshold to minimize false negatives"""
    thresholds = np.percentile(anomaly_scores, range(1, 100))

    best_threshold = None
    best_score = 0
    results = []

    for threshold in thresholds:
        predictions = (anomaly_scores <= threshold).astype(int)

        tn = np.sum((y_true == 0) & (predictions == 0))
        fp = np.sum((y_true == 0) & (predictions == 1))
        fn = np.sum((y_true == 1) & (predictions == 0))
        tp = np.sum((y_true == 1) & (predictions == 1))

        if tp + fn == 0:  # No actual attacks
            continue

        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0

        if priority == 'recall':
            # Prioritize recall but penalize too many false positives
            score = recall - 0.1 * (fp / max(1, len(y_true)))
        else:
            score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

        results.append((threshold, recall, precision, score, fn, fp))

        if score > best_score:
            best_score = score
            best_threshold = threshold

    return best_threshold, results

# Load and process all datasets
print("Loading and processing datasets...")
X_train, y_train = load_data_and_extract_features(train)
X_validate, y_validate = load_data_and_extract_features(validate)
X_test, y_test = load_data_and_extract_features(test)

print(f"Training set: {len(X_train)} samples ({sum(y_train)} attacks)")
print(f"Validation set: {len(X_validate)} samples ({sum(y_validate)} attacks)")
print(f"Test set: {len(X_test)} samples ({sum(y_test)} attacks)")

# Create enhanced model pipeline
model = Pipeline([
    ('scaler', StandardScaler()),
    ('iso_forest', IsolationForest(
        n_estimators=1000,           # More trees for better stability
        max_samples='auto',          # Use more samples
        contamination=0.05,          # Lower contamination rate
        max_features=0.8,            # Use most features
        bootstrap=False,             # Don't bootstrap for consistency
        random_state=42,
        verbose=1,
        n_jobs=-1                    # Use all cores
    ))
])

# Train model
print("Training model...")
model.fit(X_train)

# Get anomaly scores instead of just predictions
print("Getting anomaly scores...")
val_scores = model.decision_function(X_validate)
test_scores = model.decision_function(X_test)

# Combine validation and test data for threshold optimization
combined_scores = np.concatenate([val_scores, test_scores])
combined_labels = np.array(y_validate + y_test)

# Find optimal threshold to minimize false negatives
print("Finding optimal threshold...")
optimal_threshold, threshold_results = find_optimal_threshold(
    combined_labels, combined_scores, priority='recall'
)

print(f"Optimal threshold: {optimal_threshold:.4f}")

# Make predictions with optimal threshold
pred_val_binary = (val_scores <= optimal_threshold).astype(int)
pred_test_binary = (test_scores <= optimal_threshold).astype(int)
pred_combined = (combined_scores <= optimal_threshold).astype(int)

# Generate evaluation report
print("\n" + "="*50)
print("FINAL MODEL PERFORMANCE")
print("="*50)
print(classification_report(
    combined_labels,
    pred_combined,
    target_names=["Normal", "Attack"]
))

# Detailed confusion matrix
cm = confusion_matrix(combined_labels, pred_combined)
tn, fp, fn, tp = cm.ravel()

print(f"\nConfusion Matrix:")
print(f"True Positives (attacks correctly identified): {tp}")
print(f"True Negatives (normal correctly identified): {tn}")
print(f"False Positives (normal flagged as attack): {fp}")
print(f"False Negatives (attacks missed): {fn}")

# Additional metrics
total_attacks = tp + fn
total_normal = tn + fp
attack_detection_rate = tp / total_attacks if total_attacks > 0 else 0
false_positive_rate = fp / total_normal if total_normal > 0 else 0

print(f"\nKey Metrics:")
print(f"Attack Detection Rate (Recall): {attack_detection_rate:.3f} ({tp}/{total_attacks})")
print(f"False Positive Rate: {false_positive_rate:.3f} ({fp}/{total_normal})")
print(f"Missed Attacks: {fn}")

# Show top threshold results
print(f"\nTop 5 threshold options (sorted by recall):")
print("Threshold | Recall | Precision | FN | FP")
print("-" * 45)
sorted_results = sorted(threshold_results, key=lambda x: x[1], reverse=True)[:5]
for thresh, recall, precision, score, fn, fp in sorted_results:
    print(f"{thresh:8.4f} | {recall:6.3f} | {precision:9.3f} | {fn:2d} | {fp:2d}")

# Feature importance analysis (approximate)
print(f"\nFeature names for reference:")
for i, col in enumerate(X_train.columns):
    print(f"{i:2d}: {col}")

print(f"\nConsider using threshold: {optimal_threshold:.4f} for maximum recall")
print("You can adjust the threshold lower to catch more attacks (reduce FN) at cost of more false positives.")

Loading and processing datasets...
Training set: 833 samples (0 attacks)
Validation set: 4372 samples (0 attacks)
Test set: 746 samples (746 attacks)
Training model...


[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   2 out of   2 | elapsed:    2.1s finished
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1000 out of 1000 | elapsed:    0.2s finished
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s


Getting anomaly scores...


[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 1000 out of 1000 | elapsed:    0.4s finished


Finding optimal threshold...
Optimal threshold: 0.1737

FINAL MODEL PERFORMANCE
              precision    recall  f1-score   support

      Normal       1.00      0.03      0.06      4372
      Attack       0.15      1.00      0.26       746

    accuracy                           0.17      5118
   macro avg       0.57      0.52      0.16      5118
weighted avg       0.88      0.17      0.09      5118


Confusion Matrix:
True Positives (attacks correctly identified): 746
True Negatives (normal correctly identified): 140
False Positives (normal flagged as attack): 4232
False Negatives (attacks missed): 0

Key Metrics:
Attack Detection Rate (Recall): 1.000 (746/746)
False Positive Rate: 0.968 (4232/4372)
Missed Attacks: 0

Top 5 threshold options (sorted by recall):
Threshold | Recall | Precision | FN | FP
---------------------------------------------
  0.1737 |  1.000 |     0.150 |  0 | 4232
  0.1745 |  1.000 |     0.149 |  0 | 4269
  0.1767 |  1.000 |     0.147 |  0 | 4326
  0.1693 | 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 1000 out of 1000 | elapsed:    0.2s finished


In [None]:
import json
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import numpy as np
from collections import Counter

# Paths to data files
train = "/content/training_data_kernel_activity.json"
validate = "/content/normal_validation.json"
test = "/content/all_attacks.json"

def smart_feature_extraction(logs):
    """Extract features with intelligent red flag handling and noise reduction"""

    # Refined syscall weights - more conservative
    syscall_weights = {
        # Ultra-critical (only when frequent) - 8x weight
        'reboot': 8.0, 'kexec_load': 8.0, 'pivot_root': 7.0,

        # High privilege operations - 6x weight
        'capset': 6.0, 'setuid': 6.0, 'setgid': 6.0, 'ptrace': 7.0,

        # Unknown syscalls (suspicious but need frequency) - 5x weight
        'syscall_265': 5.0, 'syscall_252': 5.0, 'syscall_258': 5.0,
        'syscall_254': 4.0, 'syscall_256': 4.0, 'syscall_259': 4.0,

        # Process/memory manipulation - 4x weight
        'execve': 4.0, 'clone': 4.0, 'mprotect': 4.0, 'mremap': 4.0,
        'unshare': 5.0, 'mount': 5.0,

        # Moderate risk - 3x weight
        'keyctl': 3.0, 'quotactl': 3.0, 'swapon': 3.0,

        # Noise reduction - very common benign calls
        'getuid': 0.2, 'getgid': 0.2, 'getpid': 0.2, 'getppid': 0.2,
        'read': 0.3, 'write': 0.3, 'close': 0.2, 'brk': 0.2,
        'geteuid': 0.2, 'getegid': 0.2, 'getdents64': 0.3
    }

    # Define attack pattern signatures
    privilege_escalation_calls = ['capset', 'setuid', 'setgid', 'setfsuid', 'setgroups']
    system_modification_calls = ['reboot', 'kexec_load', 'pivot_root', 'mount', 'umount2']
    unknown_dangerous_calls = [f'syscall_{i}' for i in [252, 254, 255, 256, 258, 259, 265, 266]]
    process_injection_calls = ['ptrace', 'clone', 'unshare']

    features = []
    for log in logs:
        syscalls = log["kernel"]["syscall_counts"]
        total = max(1, sum(syscalls.values()))

        # 1. Smart weighted activity (log transform to reduce outlier impact)
        weighted_sum = sum(cnt * syscall_weights.get(sc, 1.0) for sc, cnt in syscalls.items())

        # 2. Pattern-based features (frequency matters, not just presence)
        priv_esc_intensity = sum(syscalls.get(sc, 0) for sc in privilege_escalation_calls) / total
        system_mod_intensity = sum(syscalls.get(sc, 0) for sc in system_modification_calls) / total
        unknown_intensity = sum(syscalls.get(sc, 0) for sc in unknown_dangerous_calls) / total
        injection_intensity = sum(syscalls.get(sc, 0) for sc in process_injection_calls) / total

        # 3. Smart red flag system - requires meaningful frequency
        red_flag_score = 0
        critical_syscalls = ['reboot', 'kexec_load', 'syscall_265', 'ptrace', 'pivot_root']

        for critical_sc in critical_syscalls:
            if critical_sc in syscalls:
                count = syscalls[critical_sc]
                # Red flag only if it appears multiple times OR with other suspicious activity
                if count > 1 or (count >= 1 and len([s for s in critical_syscalls if s in syscalls]) > 1):
                    red_flag_score += count * 2
                elif count == 1:
                    red_flag_score += 0.5  # Minor flag for single occurrence

        # 4. Behavioral anomaly indicators
        syscall_counts = list(syscalls.values())
        unique_syscalls = len(syscalls)

        # Concentration: how concentrated is the activity?
        max_concentration = max(syscall_counts) / total if syscall_counts else 0

        # Diversity vs intensity trade-off
        diversity_intensity_ratio = unique_syscalls / np.log1p(total)

        # 5. Noise filtering features
        common_benign_ratio = sum(syscalls.get(sc, 0) for sc in
                                ['getuid', 'getgid', 'getpid', 'read', 'write', 'close']) / total

        # 6. Advanced pattern detection
        # Burst detection - look for syscalls that appear in unusually high frequency
        burst_score = 0
        if syscall_counts:
            mean_count = np.mean(syscall_counts)
            std_count = np.std(syscall_counts)
            if std_count > 0:
                burst_score = max((c - mean_count) / std_count for c in syscall_counts)

        # 7. Composite risk indicators
        # High-risk combination: privilege + system modification
        combined_risk = (priv_esc_intensity + system_mod_intensity) * unknown_intensity

        # Stealth indicator: high activity with low diversity (potential focused attack)
        stealth_indicator = np.log1p(total) / max(1, unique_syscalls)

        features.append([
            np.log1p(weighted_sum),           # Weighted activity (log-normalized)
            priv_esc_intensity,               # Privilege escalation density
            system_mod_intensity,             # System modification density
            unknown_intensity,                # Unknown syscall density
            injection_intensity,              # Process injection density
            red_flag_score,                   # Smart red flag score
            unique_syscalls,                  # Syscall diversity
            max_concentration,                # Activity concentration
            diversity_intensity_ratio,        # Diversity/intensity balance
            1 - common_benign_ratio,          # Non-benign activity ratio
            burst_score,                      # Burst activity score
            combined_risk,                    # Multi-pattern risk
            stealth_indicator,                # Stealth attack indicator
            np.log1p(total),                  # Total activity (log)
            # Specific dangerous combinations
            int(priv_esc_intensity > 0.01 and system_mod_intensity > 0.01),  # Priv+System
            int(unknown_intensity > 0.02 and injection_intensity > 0.01),    # Unknown+Injection
            int(red_flag_score > 2),          # Multiple critical flags
            # Activity patterns
            int(total > 1000 and unique_syscalls < 20),  # High activity, low diversity
            int(burst_score > 3),             # Significant burst detected
            min(5.0, red_flag_score / max(1, total/100))  # Red flag intensity normalized
        ])

    return pd.DataFrame(features, columns=[
        'weighted_activity', 'priv_esc_intensity', 'system_mod_intensity',
        'unknown_intensity', 'injection_intensity', 'red_flag_score',
        'unique_syscalls', 'max_concentration', 'diversity_intensity_ratio',
        'non_benign_ratio', 'burst_score', 'combined_risk', 'stealth_indicator',
        'total_activity', 'priv_system_combo', 'unknown_injection_combo',
        'multiple_red_flags', 'focused_attack_pattern', 'burst_detected',
        'normalized_red_flags'
    ])

def load_data_and_extract_features(path):
    """Load JSON data and extract features + labels"""
    with open(path, "r") as f:
        data = json.load(f)
    features = smart_feature_extraction(data)
    labels = [1 if d["label"] == "attack" else 0 for d in data]
    return features, labels

def find_three_tier_thresholds(y_true, anomaly_scores):
    """Find optimal thresholds for ATTACK/SUSPICIOUS/SAFE classification"""

    # Sort scores to understand distribution
    sorted_indices = np.argsort(anomaly_scores)
    sorted_scores = anomaly_scores[sorted_indices]
    sorted_labels = y_true[sorted_indices]

    print(f"Score range: {sorted_scores.min():.3f} to {sorted_scores.max():.3f}")

    # Find threshold that catches most attacks (high recall threshold)
    attack_threshold_candidates = np.percentile(sorted_scores, range(5, 50, 5))

    best_attack_threshold = None
    best_recall = 0

    for threshold in attack_threshold_candidates:
        predicted_attacks = (anomaly_scores <= threshold).astype(int)
        recall = np.sum((y_true == 1) & (predicted_attacks == 1)) / max(1, np.sum(y_true == 1))
        precision = np.sum((y_true == 1) & (predicted_attacks == 1)) / max(1, np.sum(predicted_attacks == 1))

        # We want high recall (>0.98) with reasonable precision (>0.1)
        if recall >= 0.98 and precision >= 0.1:
            if recall > best_recall:
                best_recall = recall
                best_attack_threshold = threshold

    # If no good threshold found, use one that gives perfect recall
    if best_attack_threshold is None:
        for threshold in np.percentile(sorted_scores, range(1, 30)):
            predicted_attacks = (anomaly_scores <= threshold).astype(int)
            recall = np.sum((y_true == 1) & (predicted_attacks == 1)) / max(1, np.sum(y_true == 1))
            if recall >= 0.999:
                best_attack_threshold = threshold
                break

    # Final fallback for attack threshold
    if best_attack_threshold is None:
        best_attack_threshold = np.percentile(sorted_scores, 10)

    # Find safe threshold (high precision threshold)
    safe_threshold_candidates = np.percentile(sorted_scores, range(60, 95, 5))

    best_safe_threshold = None
    best_precision = 0

    for threshold in safe_threshold_candidates:
        predicted_attacks = (anomaly_scores <= threshold).astype(int)
        precision = np.sum((y_true == 1) & (predicted_attacks == 1)) / max(1, np.sum(predicted_attacks == 1))
        recall = np.sum((y_true == 1) & (predicted_attacks == 1)) / max(1, np.sum(y_true == 1))

        # We want high precision (>0.5) while maintaining decent recall (>0.8)
        if precision >= 0.3 and recall >= 0.8:
            if precision > best_precision:
                best_precision = precision
                best_safe_threshold = threshold

    # Default safe threshold if none found
    if best_safe_threshold is None:
        best_safe_threshold = np.percentile(sorted_scores, 80)

    return best_attack_threshold, best_safe_threshold

def classify_three_tier(scores, attack_threshold, safe_threshold):
    """Classify into ATTACK/SUSPICIOUS/SAFE based on thresholds"""
    classifications = []
    for score in scores:
        if score <= attack_threshold:
            classifications.append('ATTACK')
        elif score <= safe_threshold:
            classifications.append('SUSPICIOUS')
        else:
            classifications.append('SAFE')
    return classifications

def evaluate_three_tier(y_true, classifications):
    """Custom evaluation for three-tier system"""

    # Convert to arrays
    y_true = np.array(y_true)
    classifications = np.array(classifications)

    # Count outcomes
    total_attacks = np.sum(y_true == 1)
    total_normal = np.sum(y_true == 0)

    # Attack detection
    attacks_flagged_as_attack = np.sum((y_true == 1) & (classifications == 'ATTACK'))
    attacks_flagged_as_suspicious = np.sum((y_true == 1) & (classifications == 'SUSPICIOUS'))
    attacks_flagged_as_safe = np.sum((y_true == 1) & (classifications == 'SAFE'))

    # Normal classification
    normal_flagged_as_attack = np.sum((y_true == 0) & (classifications == 'ATTACK'))
    normal_flagged_as_suspicious = np.sum((y_true == 0) & (classifications == 'SUSPICIOUS'))
    normal_flagged_as_safe = np.sum((y_true == 0) & (classifications == 'SAFE'))

    # Calculate metrics
    attack_detection_rate = attacks_flagged_as_attack / total_attacks if total_attacks > 0 else 0
    suspicious_detection_rate = attacks_flagged_as_suspicious / total_attacks if total_attacks > 0 else 0
    missed_attack_rate = attacks_flagged_as_safe / total_attacks if total_attacks > 0 else 0

    false_attack_rate = normal_flagged_as_attack / total_normal if total_normal > 0 else 0
    normal_suspicious_rate = normal_flagged_as_suspicious / total_normal if total_normal > 0 else 0
    correct_safe_rate = normal_flagged_as_safe / total_normal if total_normal > 0 else 0

    print("="*70)
    print("THREE-TIER CLASSIFICATION RESULTS")
    print("="*70)
    print(f"ATTACK LOGS ({total_attacks} total):")
    print(f"  ✓ Flagged as ATTACK:     {attacks_flagged_as_attack:4d} ({attack_detection_rate:.1%})")
    print(f"  ⚠ Flagged as SUSPICIOUS: {attacks_flagged_as_suspicious:4d} ({suspicious_detection_rate:.1%})")
    print(f"  ✗ Missed (flagged SAFE): {attacks_flagged_as_safe:4d} ({missed_attack_rate:.1%}) ← CRITICAL")

    print(f"\nNORMAL LOGS ({total_normal} total):")
    print(f"  ✗ False ATTACK flags:    {normal_flagged_as_attack:4d} ({false_attack_rate:.1%}) ← BAD")
    print(f"  ⚠ SUSPICIOUS flags:      {normal_flagged_as_suspicious:4d} ({normal_suspicious_rate:.1%}) ← OK")
    print(f"  ✓ Correctly SAFE:        {normal_flagged_as_safe:4d} ({correct_safe_rate:.1%})")

    print(f"\nKEY PERFORMANCE INDICATORS:")
    print(f"  🎯 Attack Detection (ATTACK+SUSPICIOUS): {(attack_detection_rate + suspicious_detection_rate):.1%}")
    print(f"  🚨 Critical Misses (attacks as SAFE):    {missed_attack_rate:.1%}")
    print(f"  ❌ False Attack Rate:                     {false_attack_rate:.1%}")
    print(f"  📊 Efficiency (correct classifications):  {(attacks_flagged_as_attack + normal_flagged_as_safe)/(total_attacks + total_normal):.1%}")

    # Return key metrics
    return {
        'attack_detection_rate': attack_detection_rate + suspicious_detection_rate,
        'critical_miss_rate': missed_attack_rate,
        'false_attack_rate': false_attack_rate,
        'efficiency': (attacks_flagged_as_attack + normal_flagged_as_safe)/(total_attacks + total_normal),
        'missed_attacks': attacks_flagged_as_safe,
        'false_attacks': normal_flagged_as_attack
    }

# Load and process all datasets
print("Loading and processing datasets...")
X_train, y_train = load_data_and_extract_features(train)
X_validate, y_validate = load_data_and_extract_features(validate)
X_test, y_test = load_data_and_extract_features(test)

print(f"Training set: {len(X_train)} samples ({sum(y_train)} attacks)")
print(f"Validation set: {len(X_validate)} samples ({sum(y_validate)} attacks)")
print(f"Test set: {len(X_test)} samples ({sum(y_test)} attacks)")

# Create optimized model
print("\nTraining optimized model...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Use more conservative contamination and more trees
iso_forest = IsolationForest(
    n_estimators=1500,
    max_samples=0.8,
    contamination=0.02,  # Very low contamination
    max_features=0.9,
    bootstrap=True,
    random_state=42,
    verbose=1,
    n_jobs=-1
)

iso_forest.fit(X_train_scaled)

# Get anomaly scores
print("Computing anomaly scores...")
X_validate_scaled = scaler.transform(X_validate)
X_test_scaled = scaler.transform(X_test)

val_scores = iso_forest.decision_function(X_validate_scaled)
test_scores = iso_forest.decision_function(X_test_scaled)

# Combine for threshold finding
combined_scores = np.concatenate([val_scores, test_scores])
combined_labels = np.array(y_validate + y_test)

# Find optimal thresholds
print("Finding optimal three-tier thresholds...")
attack_threshold, safe_threshold = find_three_tier_thresholds(combined_labels, combined_scores)

print(f"Attack threshold: {attack_threshold:.4f}")
print(f"Safe threshold: {safe_threshold:.4f}")

# Classify all samples
combined_classifications = classify_three_tier(combined_scores, attack_threshold, safe_threshold)

# Evaluate
metrics = evaluate_three_tier(combined_labels, combined_classifications)

print(f"\nFEATURE IMPORTANCE INSIGHTS:")
feature_names = X_train.columns.tolist()
print("Top features to monitor:")
for i, name in enumerate(feature_names[:10]):
    print(f"  {i+1:2d}. {name}")

print(f"\nRECOMMENDATIONS:")
if metrics['critical_miss_rate'] > 0.02:
    print(f"⚠️  Consider lowering attack threshold to: {attack_threshold * 1.1:.4f}")
if metrics['false_attack_rate'] > 0.1:
    print(f"⚠️  Consider raising attack threshold to: {attack_threshold * 0.9:.4f}")

print(f"\nThreshold Configuration:")
print(f"  attack_threshold = {attack_threshold:.6f}")
print(f"  safe_threshold = {safe_threshold:.6f}")

Loading and processing datasets...
Training set: 833 samples (0 attacks)
Validation set: 4372 samples (0 attacks)
Test set: 746 samples (746 attacks)

Training optimized model...


[Parallel(n_jobs=2)]: Using backend ThreadingBackend with 2 concurrent workers.
[Parallel(n_jobs=2)]: Done   2 out of   2 | elapsed:    3.1s finished
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 1500 out of 1500 | elapsed:    0.3s finished
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s


Computing anomaly scores...


[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 1500 out of 1500 | elapsed:    0.6s finished
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.1s


Finding optimal three-tier thresholds...
Score range: -0.172 to 0.226
Attack threshold: -0.0017
Safe threshold: 0.1803
THREE-TIER CLASSIFICATION RESULTS
ATTACK LOGS (746 total):
  ✓ Flagged as ATTACK:      197 (26.4%)
  ⚠ Flagged as SUSPICIOUS:  506 (67.8%)
  ✗ Missed (flagged SAFE):   43 (5.8%) ← CRITICAL

NORMAL LOGS (4372 total):
  ✗ False ATTACK flags:     316 (7.2%) ← BAD
  ⚠ SUSPICIOUS flags:      3088 (70.6%) ← OK
  ✓ Correctly SAFE:         968 (22.1%)

KEY PERFORMANCE INDICATORS:
  🎯 Attack Detection (ATTACK+SUSPICIOUS): 94.2%
  🚨 Critical Misses (attacks as SAFE):    5.8%
  ❌ False Attack Rate:                     7.2%
  📊 Efficiency (correct classifications):  22.8%

FEATURE IMPORTANCE INSIGHTS:
Top features to monitor:
   1. weighted_activity
   2. priv_esc_intensity
   3. system_mod_intensity
   4. unknown_intensity
   5. injection_intensity
   6. red_flag_score
   7. unique_syscalls
   8. max_concentration
   9. diversity_intensity_ratio
  10. non_benign_ratio

RECOMMENDA

[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 1500 out of 1500 | elapsed:    0.3s finished
