In [27]:
import river
import pandas as pd

In [28]:
data_path = "../data/resource_events2.csv"
df = pd.read_csv(data_path)

print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())

Dataset shape: (5024, 25)
Columns: ['pid', 'comm', 'uid', 'gid', 'ppid', 'user_pid', 'user_ppid', 'cgroup_id', 'cgroup_name', 'user', 'cpu_ns', 'user_faults', 'kernel_faults', 'vm_mmap_bytes', 'vm_munmap_bytes', 'vm_brk_grow_bytes', 'vm_brk_shrink_bytes', 'bytes_written', 'bytes_read', 'isActive', 'wall_time_dt', 'wall_time_ms', 'container_id', 'container_image', 'container_labels_json']


In [29]:
feature_cols = [
    "cpu_ns",
    "user_faults",
    "kernel_faults",
    "vm_mmap_bytes",
    "vm_munmap_bytes",
    "vm_brk_grow_bytes",
    "vm_brk_shrink_bytes",
    "bytes_written",
    "bytes_read"
]

context_cols = [
    "pid",
    "comm",
    "user",
    "container_id",
    "container_image",
    "wall_time_ms"
]

In [30]:
X = df[feature_cols].copy()
context = df[context_cols].copy()

print("\nFeature matrix sample:")
display(X.head())

print("\nContext sample:")
display(context.head())


Feature matrix sample:


Unnamed: 0,cpu_ns,user_faults,kernel_faults,vm_mmap_bytes,vm_munmap_bytes,vm_brk_grow_bytes,vm_brk_shrink_bytes,bytes_written,bytes_read
0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0
2,562689,2,0,0,0,0,0,0,0
3,275860759,2757,5,0,0,0,0,304006,173388
4,0,0,0,0,0,0,0,0,0



Context sample:


Unnamed: 0,pid,comm,user,container_id,container_image,wall_time_ms
0,307,weston,aleyi,,,1757522094277
1,680,systemd-logind,root,,,1757522094277
2,2444,buildkitd,root,,,1757522094277
3,3798,ThreadPool,systemd-resolve,f111e1925e8b07a6b917d9c963da0ce52f4d23bf654dc5...,clickhouse/clickhouse-server:latest,1757522094277
4,2461,Relay(1457),root,,,1757522094277


In [31]:
X = X.fillna(0)

In [32]:
records = []
for i, row in X.iterrows():
    record = row.to_dict()
    # Keep context attached
    records.append((record, context.iloc[i].to_dict()))

print("\nExample record with context:")
print(records[0])


Example record with context:
({'cpu_ns': 0, 'user_faults': 0, 'kernel_faults': 0, 'vm_mmap_bytes': 0, 'vm_munmap_bytes': 0, 'vm_brk_grow_bytes': 0, 'vm_brk_shrink_bytes': 0, 'bytes_written': 0, 'bytes_read': 0}, {'pid': 307, 'comm': 'weston', 'user': 'aleyi', 'container_id': nan, 'container_image': nan, 'wall_time_ms': 1757522094277})


In [33]:
from river import compose, preprocessing, anomaly
import numpy as np

In [44]:
from collections import defaultdict
import math
def build_hst_model():
    return compose.Pipeline(
    preprocessing.MinMaxScaler(),
    anomaly.HalfSpaceTrees(
        n_trees=50,
        height=8,
        window_size=250,
        )
    )

first_nonzero = {}
seen_counts = defaultdict(int)
WARMUP_EVENTS = 200
models = defaultdict(build_hst_model)
scores = []

for features, meta in records:
    container = meta.get("container_image")
    if not container or (isinstance(container, float) and math.isnan(container)):
        continue

    model = models[container]
    seen_counts[container] += 1
    idx = seen_counts[container]
    
    score = model.score_one(features)
    model.learn_one(features) 
    if score == 0:
        continue
    if container not in first_nonzero:
        first_nonzero[container] = idx
        print(f"‚ö° First non-zero anomaly score for {container} "
              f"at event #{idx} (score={score:.4f})")
    
    record_out = {**meta, **features, "anomaly_score": score}
    scores.append(record_out)

container_names = list(models.keys())
print(f"‚úÖ Trained {len(models)} per-container models:")
for name in container_names:
    print(f"   - {name}")


‚úÖ Trained 4 per-container models:
   - clickhouse/clickhouse-server:latest
   - apache/kafka:latest
   - grafana/grafana:latest
   - provectuslabs/kafka-ui:latest


In [45]:
for container, count in seen_counts.items():
    if count < WARMUP_EVENTS:
        print(f"‚ö†Ô∏è Container {container} has only {count} events (< {WARMUP_EVENTS}), skipping metrics")


‚ö†Ô∏è Container clickhouse/clickhouse-server:latest has only 89 events (< 200), skipping metrics
‚ö†Ô∏è Container apache/kafka:latest has only 89 events (< 200), skipping metrics
‚ö†Ô∏è Container grafana/grafana:latest has only 175 events (< 200), skipping metrics
‚ö†Ô∏è Container provectuslabs/kafka-ui:latest has only 89 events (< 200), skipping metrics


In [46]:
if scores:
    results = pd.DataFrame(scores)
    for container, df_group in results.groupby("container_image"):
        if df_group.empty:
            continue
        mean_score = df_group["anomaly_score"].mean()
        std_score = df_group["anomaly_score"].std()
        min_score = df_group["anomaly_score"].min()
        max_score = df_group["anomaly_score"].max()
        q95 = np.percentile(df_group["anomaly_score"], 95)
        q99 = np.percentile(df_group["anomaly_score"], 99)
        rate_q95 = (df_group["anomaly_score"] > q95).mean()
        rate_q99 = (df_group["anomaly_score"] > q99).mean()
    
        print(f"\nüìä Metrics for container: {container}")
        print(f"Mean: {mean_score:.4f}, Std: {std_score:.4f}")
        print(f"Min: {min_score:.4f}, Max: {max_score:.4f}")
        print(f"95th percentile: {q95:.4f} ‚Üí anomaly rate ~ {rate_q95*100:.2f}%")
        print(f"99th percentile: {q99:.4f} ‚Üí anomaly rate ~ {rate_q99*100:.2f}%")
else:
    print("‚ö†Ô∏è No containers reached warm-up threshold, no metrics available yet.")

‚ö†Ô∏è No containers reached warm-up threshold, no metrics available yet.


In [42]:
display(results)

Unnamed: 0,pid,comm,user,container_id,container_image,wall_time_ms,cpu_ns,user_faults,kernel_faults,vm_mmap_bytes,vm_munmap_bytes,vm_brk_grow_bytes,vm_brk_shrink_bytes,bytes_written,bytes_read,anomaly_score
0,3936,grafana,,919236c9febc1b0ca2a6baf24fe5ab4b2fa35e0b7f838c...,grafana/grafana:latest,1757521555605,14190424,2,0,4096,4096,0,0,360,6300,0
1,6662,altinity-clickh,,919236c9febc1b0ca2a6baf24fe5ab4b2fa35e0b7f838c...,grafana/grafana:latest,1757521555605,146132,0,0,0,0,0,0,8,8,0
2,3936,grafana,,919236c9febc1b0ca2a6baf24fe5ab4b2fa35e0b7f838c...,grafana/grafana:latest,1757521565598,18106526,1,0,4096,4096,0,0,424,424,0
3,6662,altinity-clickh,,919236c9febc1b0ca2a6baf24fe5ab4b2fa35e0b7f838c...,grafana/grafana:latest,1757521565598,0,0,0,0,0,0,0,0,0,0
4,3936,grafana,,919236c9febc1b0ca2a6baf24fe5ab4b2fa35e0b7f838c...,grafana/grafana:latest,1757521575537,20540297,1,0,4096,4096,0,0,512,512,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
161,3798,ThreadPool,systemd-resolve,f111e1925e8b07a6b917d9c963da0ce52f4d23bf654dc5...,clickhouse/clickhouse-server:latest,1757522074363,241193049,22979,5,0,0,0,0,1248136,181580,0
162,3797,ExpirationReape,aleyi,8f1c9b9e79a5fc17b1d8041c747462a33ed682a85abf04...,apache/kafka:latest,1757522074363,102319621,6,0,0,0,0,0,11946,10851,0
163,3936,grafana,,919236c9febc1b0ca2a6baf24fe5ab4b2fa35e0b7f838c...,grafana/grafana:latest,1757522074363,16044085,0,0,0,0,0,0,400,400,0
164,6662,altinity-clickh,,919236c9febc1b0ca2a6baf24fe5ab4b2fa35e0b7f838c...,grafana/grafana:latest,1757522074363,0,0,0,0,0,0,0,0,0,0
