In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 📁 Ensure folders exist
os.makedirs("output", exist_ok=True)
os.makedirs("output/plots", exist_ok=True)

# 📥 Load order files
normal_orders = pd.read_csv("data/normal_orders.csv")
drifted_orders = pd.read_csv("data/drifted_orders.csv")

# 🧪 Detect anomalies: out-of-order order_ids
normal_sequence = list(normal_orders["order_id"])
drifted_sequence = list(drifted_orders["order_id"])

anomalies = []
last_id = -1

for i, oid in enumerate(drifted_sequence):
    if oid < last_id:
        anomalies.append({
            "position_in_stream": i,
            "current_order_id": oid,
            "previous_order_id": last_id
        })
    last_id = oid

# ✅ Always include columns, even if anomalies is empty
anomaly_df = pd.DataFrame(anomalies, columns=["position_in_stream", "current_order_id", "previous_order_id"])

# 📤 Save anomaly log
anomaly_df.to_csv("output/anomaly_log.csv", index=False)

print(f"[OK] Anomaly detection completed. {len(anomalies)} out-of-order violations detected.")
print("Anomaly log saved to: output/anomaly_log.csv")

# 🛑 Skip heatmap if no anomalies
if anomaly_df.empty:
    print("[WARN] No anomalies found. Skipping heatmap generation.")
else:
    # 📊 Heatmap Visualization
    plt.figure(figsize=(12, 5))
    sns.heatmap(
        anomaly_df[["position_in_stream", "current_order_id"]].T,
        cmap="Reds", cbar=True, annot=True, fmt=".0f"
    )
    plt.title("Anomaly Heatmap (Order ID Violations due to Clock Drift)")
    plt.yticks([0.5, 1.5], ['Position in Stream', 'Order ID'], rotation=0)
    plt.tight_layout()
    plt.savefig("output/plots/anomaly_heatmap.png")
    plt.close()
    print("Anomaly heatmap saved to: output/plots/anomaly_heatmap.png")

[ERROR] Required CSV files not found.
[Errno 2] No such file or directory: 'data/normal_orders.csv'


NameError: name 'normal_orders' is not defined

: 

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# 📁 Ensure folders exist
os.makedirs("output", exist_ok=True)
os.makedirs("output/plots", exist_ok=True)

# 📥 Load order files
try:
    normal_orders = pd.read_csv("data/normal_orders.csv")
    drifted_orders = pd.read_csv("data/drifted_orders.csv")
except FileNotFoundError as e:
    print("[ERROR] Required CSV files not found.")
    print(e)
    exit()

# 🧪 Detect anomalies: out-of-order order_ids
normal_sequence = list(normal_orders["order_id"])
drifted_sequence = list(drifted_orders["order_id"])

anomalies = []
last_id = -1

for i, oid in enumerate(drifted_sequence):
    if oid < last_id:
        anomalies.append({
            "position_in_stream": i,
            "current_order_id": oid,
            "previous_order_id": last_id
        })
    last_id = oid

# ✅ Always include columns, even if anomalies is empty
anomaly_df = pd.DataFrame(anomalies, columns=["position_in_stream", "current_order_id", "previous_order_id"])

# 📤 Save anomaly log
anomaly_df.to_csv("output/anomaly_log.csv", index=False)

print(f"[OK] Anomaly detection completed. {len(anomalies)} out-of-order violations detected.")
print("Anomaly log saved to: output/anomaly_log.csv")

# 🛑 Skip heatmap if no anomalies
if anomaly_df.empty:
    print("[WARN] No anomalies found. Skipping heatmap generation.")
else:
    # 📊 Heatmap Visualization
    plt.figure(figsize=(12, 5))
    sns.heatmap(
        anomaly_df[["position_in_stream", "current_order_id"]].T,
        cmap="Reds", cbar=True, annot=True, fmt=".0f"
    )
    plt.title("Anomaly Heatmap (Order ID Violations due to Clock Drift)")
    plt.yticks([0.5, 1.5], ['Position in Stream', 'Order ID'], rotation=0)
    plt.tight_layout()
    plt.savefig("output/plots/anomaly_heatmap.png")
    plt.close()
    print("Anomaly heatmap saved to: output/plots/anomaly_heatmap.png")


[ERROR] Required CSV files not found.
[Errno 2] No such file or directory: 'data/normal_orders.csv'


NameError: name 'normal_orders' is not defined

: 