In [1]:
from minio_service import MinioService

In [2]:

# configure and instantiate
minio_svc = MinioService()

# list and fetch raw JSON blobs
objects = minio_svc.list_all_objects()
print(f"Found {len(objects)} objects in bucket.")

# pick one and load it
sample_obj = objects[0].object_name
raw = minio_svc.get_object_data(sample_obj)
print(raw[:2], "…")


INFO:root:Found 28668 objects in bucket 'warehouse'.


Found 28668 objects in bucket.
{" …


In [3]:
# cell 2
from data_parser import DataParser

parsed = DataParser.parse_minio_data(raw)
print(f"Parsed {len(parsed)} records; sample:", parsed[0])


Parsed 1000 records; sample: {'name': 'node_systemd_unit_state', 'value': '1', 'labels': {'instance': '10.71.0.59:9102', '__name__': 'node_systemd_unit_state', 'os_type': 'linux', 'vm': 'Ubuntu', 'name': 'initrd-switch-root.service', 'state': 'inactive', 'job': 'node', 'type': 'oneshot'}, 'timestamp': datetime.datetime(2025, 4, 24, 14, 17, 18, tzinfo=<UTC>)}


In [4]:
# Add after parsing data
print("Value types in parsed data:")
value_types = {}
for record in parsed:
    value_type = type(record.get('value')).__name__
    value_types[value_type] = value_types.get(value_type, 0) + 1
print(value_types)

# Check for string 'NaN'
string_nan_count = sum(1 for record in parsed if record.get('value') == 'NaN')
print(f"Records with 'NaN' as string: {string_nan_count}")

Value types in parsed data:
{'str': 1000}
Records with 'NaN' as string: 0


In [5]:
# cell 3
from metrics_processor import MetricsProcessor
import pandas as pd

# keep only your “CRITICAL_METRICS”
critical = MetricsProcessor.filter_critical_data(parsed)

# turn into list of dicts with {vm, metric, value, …}
structured = MetricsProcessor.structure_metrics(critical)

# DataFrame for downstream
df = pd.DataFrame(structured)
display(df.head())


Unnamed: 0,vm,metric,value,state,service,timestamp
0,Ubuntu,node_systemd_unit_state,1.0,inactive,initrd-switch-root.service,2025-04-24 14:17:18+00:00
1,Ubuntu,node_systemd_unit_state,0.0,activating,initrd-switch-root.target,2025-04-24 14:17:18+00:00
2,Ubuntu,node_systemd_unit_state,0.0,active,initrd-switch-root.target,2025-04-24 14:17:18+00:00
3,Ubuntu,node_systemd_unit_state,0.0,deactivating,initrd-switch-root.target,2025-04-24 14:17:18+00:00
4,Ubuntu,node_systemd_unit_state,0.0,failed,initrd-switch-root.target,2025-04-24 14:17:18+00:00


In [11]:
# cell 4
from snapshot_generator import SnapshotGenerator

snap_gen = SnapshotGenerator()
# group into 5-minute windows, per VM
snapshots, per_vm_prompts, multi_vm = snap_gen.generate_prompts_from_df(df, window_minutes=5)

print(f"→ {len(snapshots)} snapshots")
print("Example prompt:\n", per_vm_prompts[0])


→ 1 snapshots


IndexError: list index out of range

In [7]:
# cell 5
from feature_extractor import FeatureExtractor
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset

# extract feature vectors
features = np.vstack([FeatureExtractor.extract_features(s) for s in snapshots])

# normalize
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(features)
fn = scaler.transform(features)

# torch DataLoader
tensor = torch.tensor(fn, dtype=torch.float32)
dl = DataLoader(TensorDataset(tensor), batch_size=2, shuffle=True)


array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [8]:
# cell 6
from anomaly_detector import AnomalyDetector
import torch

# build and train
model = AnomalyDetector(input_dim=tensor.shape[1], latent_dim=15, dropout_rate=0.2)
model.train_autoencoder(dl, num_epochs=300, lr=1e-3, weight_decay=1e-5)

# compute threshold on the same normalized dataset
model.compute_threshold(tensor)
print("Threshold:", model.threshold.item())


Epoch 1: Loss = 0.0759
Epoch 2: Loss = 0.0633
Epoch 3: Loss = 0.0621
Epoch 4: Loss = 0.0619
Epoch 5: Loss = 0.0649
Epoch 6: Loss = 0.0587
Epoch 7: Loss = 0.0596
Epoch 8: Loss = 0.0373
Epoch 9: Loss = 0.0413
Epoch 10: Loss = 0.0573
Epoch 11: Loss = 0.0451
Epoch 12: Loss = 0.0511
Epoch 13: Loss = 0.0396
Epoch 14: Loss = 0.0508
Epoch 15: Loss = 0.0441
Epoch 16: Loss = 0.0354
Epoch 17: Loss = 0.0408
Epoch 18: Loss = 0.0308
Epoch 19: Loss = 0.0385
Epoch 20: Loss = 0.0357
Epoch 21: Loss = 0.0354
Epoch 22: Loss = 0.0364
Epoch 23: Loss = 0.0326
Epoch 24: Loss = 0.0326
Epoch 25: Loss = 0.0320
Epoch 26: Loss = 0.0269
Epoch 27: Loss = 0.0260
Epoch 28: Loss = 0.0251
Epoch 29: Loss = 0.0209
Epoch 30: Loss = 0.0264
Epoch 31: Loss = 0.0261
Epoch 32: Loss = 0.0241
Epoch 33: Loss = 0.0218
Epoch 34: Loss = 0.0201
Epoch 35: Loss = 0.0179
Epoch 36: Loss = 0.0227
Epoch 37: Loss = 0.0208
Epoch 38: Loss = 0.0172
Epoch 39: Loss = 0.0201
Epoch 40: Loss = 0.0195
Epoch 41: Loss = 0.0158
Epoch 42: Loss = 0.0138
E

In [5]:

# 1. First, inspect your raw parsed data more thoroughly
def inspect_parsed_data(parsed_data):
    print(f"Total records: {len(parsed_data)}")

    # Check for string 'NaN' vs actual np.nan values
    string_nan_count = sum(1 for record in parsed_data if record.get('value') == 'NaN')

    # Check data types of values
    value_types = {}
    for record in parsed_data:
        value_type = type(record.get('value')).__name__
        value_types[value_type] = value_types.get(value_type, 0) + 1

    print(f"Records with 'NaN' as string: {string_nan_count}")
    print("Value types distribution:", value_types)

    # Sample of each value type
    for val_type in value_types:
        sample = next((rec for rec in parsed_data if type(rec.get('value')).__name__ == val_type), None)
        if sample:
            print(f"\nSample of {val_type}:", sample)

# 2. Modify MetricsProcessor to handle NaN values properly
def patch_structure_metrics():
    from metrics_processor import MetricsProcessor
    import numpy as np

    # Store the original method
    original_structure_metrics = MetricsProcessor.structure_metrics

    # Create a wrapper that handles NaN values
    def structure_metrics_fixed(data):
        for item in data:
            # Convert string 'NaN' to actual np.nan
            if item.get('value') == 'NaN':
                item['value'] = np.nan
            # Try converting string numeric values to float
            elif isinstance(item.get('value'), str):
                try:
                    item['value'] = float(item['value'])
                except ValueError:
                    # If conversion fails, set to np.nan
                    item['value'] = np.nan

        # Now call the original method
        return original_structure_metrics(data)

    # Replace the method
    MetricsProcessor.structure_metrics = structure_metrics_fixed
    print("Patched MetricsProcessor.structure_metrics to handle NaN values")

# 3. Add NaN handling to the feature extraction process
def inspect_features(features):
    import numpy as np

    print(f"Features shape: {features.shape}")

    # Check for NaN values
    nan_count = np.isnan(features).sum()
    nan_rows = np.isnan(features).any(axis=1).sum()

    print(f"Total NaN values: {nan_count}")
    print(f"Rows with at least one NaN: {nan_rows} out of {features.shape[0]}")

    # Show distribution of NaN by column
    if nan_count > 0:
        nan_by_col = np.isnan(features).sum(axis=0)
        print("NaN distribution by column:")
        for i, count in enumerate(nan_by_col):
            if count > 0:
                print(f"  Column {i}: {count} NaN values")

# 4. Fix feature extraction to handle NaN values
def patch_feature_extractor():
    from feature_extractor import FeatureExtractor
    import numpy as np

    original_extract_features = FeatureExtractor.extract_features

    def extract_features_fixed(snapshot):
        # First get features using the original method
        features = original_extract_features(snapshot)

        # Replace any NaN values with appropriate defaults (0 or means)
        features = np.nan_to_num(features, nan=0.0)

        return features

    # Replace the method
    FeatureExtractor.extract_features = extract_features_fixed
    print("Patched FeatureExtractor.extract_features to handle NaN values")

# 5. Modify the model to handle potential NaN inputs
def patch_anomaly_detector():
    from anomaly_detector import AnomalyDetector
    import torch

    # Store original forward method
    original_forward = AnomalyDetector.forward

    # Create a wrapper that validates inputs
    def forward_fixed(self, x):
        # Check for NaN in input tensor
        if torch.isnan(x).any():
            print("WARNING: NaN values detected in model input")
            # Replace NaN with zeros
            x = torch.nan_to_num(x, nan=0.0)

        return original_forward(self, x)

    # Replace the method
    AnomalyDetector.forward = forward_fixed
    print("Patched AnomalyDetector.forward to handle NaN inputs")

# 6. Create a function to validate tensors before training
def validate_tensor(tensor, name="tensor"):
    import torch

    nan_count = torch.isnan(tensor).sum().item()
    inf_count = torch.isinf(tensor).sum().item()

    print(f"Validation of {name}:")
    print(f"  Shape: {tensor.shape}")
    print(f"  NaN values: {nan_count}")
    print(f"  Inf values: {inf_count}")
    print(f"  Min value: {tensor.min().item()}")
    print(f"  Max value: {tensor.max().item()}")

    return nan_count == 0 and inf_count == 0

# 7. Main debugging workflow
def main_debug_workflow(raw, parsed=None, df=None):
    import pandas as pd
    import numpy as np

    # If parsed data not provided, parse it
    if parsed is None:
        from data_parser import DataParser
        parsed = DataParser.parse_minio_data(raw)

    # Inspect parsed data
    print("\n=== INSPECTING PARSED DATA ===")
    inspect_parsed_data(parsed)

    # Patch MetricsProcessor
    print("\n=== PATCHING METRICS PROCESSOR ===")
    patch_structure_metrics()

    # Apply MetricsProcessor if df not provided
    if df is None:
        from metrics_processor import MetricsProcessor
        critical = MetricsProcessor.filter_critical_data(parsed)
        structured = MetricsProcessor.structure_metrics(critical)
        df = pd.DataFrame(structured)

    # Check DataFrame for NaN values
    print("\n=== INSPECTING DATAFRAME ===")
    print(f"DataFrame shape: {df.shape}")
    print(f"NaN values per column:")
    print(df.isna().sum())

    # Fill NaN values in the DataFrame
    print("\n=== FIXING DATAFRAME ===")
    # For numeric columns, replace NaN with 0
    numeric_cols = df.select_dtypes(include=['number']).columns
    for col in numeric_cols:
        nan_count = df[col].isna().sum()
        if nan_count > 0:
            print(f"Replacing {nan_count} NaN values in column '{col}' with 0")
            df[col] = df[col].fillna(0)

    # For non-numeric columns, replace NaN with appropriate defaults
    cat_cols = df.select_dtypes(exclude=['number']).columns
    for col in cat_cols:
        nan_count = df[col].isna().sum()
        if nan_count > 0:
            default = "unknown" if col != 'timestamp' else pd.Timestamp.now()
            print(f"Replacing {nan_count} NaN values in column '{col}' with '{default}'")
            df[col] = df[col].fillna(default)

    # Patch FeatureExtractor
    print("\n=== PATCHING FEATURE EXTRACTOR ===")
    patch_feature_extractor()

    # Generate snapshots and extract features
    print("\n=== GENERATING SNAPSHOTS AND FEATURES ===")
    from snapshot_generator import SnapshotGenerator
    snap_gen = SnapshotGenerator()
    snapshots, per_vm_prompts, multi_vm = snap_gen.generate_prompts_from_df(df, window_minutes=5)
    print(f"Generated {len(snapshots)} snapshots")

    # Extract and inspect features
    from feature_extractor import FeatureExtractor
    features = np.vstack([FeatureExtractor.extract_features(s) for s in snapshots])
    print("\n=== INSPECTING FEATURES ===")
    inspect_features(features)

    # Normalize features
    from sklearn.preprocessing import StandardScaler
    print("\n=== NORMALIZING FEATURES ===")
    # Handle potential NaN values before scaling
    features = np.nan_to_num(features, nan=0.0)
    scaler = StandardScaler().fit(features)
    fn = scaler.transform(features)
    print(f"Normalized features shape: {fn.shape}")
    # Check for NaN after normalization
    nan_after_norm = np.isnan(fn).sum()
    print(f"NaN values after normalization: {nan_after_norm}")

    # Convert to tensor
    import torch
    tensor = torch.tensor(fn, dtype=torch.float32)
    print("\n=== VALIDATING TENSOR ===")
    is_valid = validate_tensor(tensor, "features tensor")

    # Patch AnomalyDetector
    print("\n=== PATCHING ANOMALY DETECTOR ===")
    patch_anomaly_detector()

    # Create DataLoader
    from torch.utils.data import DataLoader, TensorDataset
    dl = DataLoader(TensorDataset(tensor), batch_size=2, shuffle=True)

    # Train model if tensor is valid
    if is_valid:
        print("\n=== TRAINING MODEL ===")
        from anomaly_detector import AnomalyDetector
        model = AnomalyDetector(input_dim=tensor.shape[1], latent_dim=15, dropout_rate=0.2)
        model.train_autoencoder(dl, num_epochs=10, lr=1e-3, weight_decay=1e-5)  # Reduced epochs for testing

        # Compute threshold
        print("\n=== COMPUTING THRESHOLD ===")
        model.compute_threshold(tensor)
        print("Threshold:", model.threshold.item())
    else:
        print("\nSkipping model training due to invalid tensor data")

    return df, features, tensor


df_fixed, features_fixed, tensor_fixed = main_debug_workflow(raw, parsed)


=== INSPECTING PARSED DATA ===
Total records: 1000
Records with 'NaN' as string: 40
Value types distribution: {'str': 1000}

Sample of str: {'name': 'node_systemd_unit_state', 'value': 'NaN', 'labels': {'instance': '10.71.0.59:9103', '__name__': 'node_systemd_unit_state', 'os_type': 'linux', 'vm': 'Lubuntu V2', 'name': 'sysstat-rotate.service', 'state': 'inactive', 'job': 'node', 'type': 'oneshot'}, 'timestamp': datetime.datetime(2025, 5, 14, 13, 36, 28, tzinfo=<UTC>)}

=== PATCHING METRICS PROCESSOR ===
Patched MetricsProcessor.structure_metrics to handle NaN values

=== INSPECTING DATAFRAME ===
DataFrame shape: (793, 6)
NaN values per column:
vm            0
metric        0
value        37
state        61
service      61
timestamp     0
dtype: int64

=== FIXING DATAFRAME ===
Replacing 37 NaN values in column 'value' with 0
Replacing 61 NaN values in column 'state' with 'unknown'
Replacing 61 NaN values in column 'service' with 'unknown'

=== PATCHING FEATURE EXTRACTOR ===
Patched Fe

AttributeError: type object 'AnomalyDetector' has no attribute 'forward'