In [None]:
# --- SmartZip Dashboard ---
import pandas as pd
import json
import matplotlib.pyplot as plt
import os

# ----------------------------
# 1. Load thresholds & logs
# ----------------------------
threshold_file = "smartzip_thresholds.json"
log_file = "adaptive_log.jsonl"

# Load thresholds
if os.path.exists(threshold_file):
    with open(threshold_file) as f:
        thresholds = json.load(f)
else:
    thresholds = {"entropy_threshold": 3.5, "size_threshold": 5_000_000}

print("📌 Current thresholds:", thresholds)

# Load logs
if os.path.exists(log_file):
    with open(log_file) as f:
        records = [json.loads(line) for line in f]
    df = pd.DataFrame(records)
else:
    df = pd.DataFrame()
    print("⚠️ No log data found!")

df.head()
# ----------------------------
# 2. Summary Stats
# ----------------------------
if not df.empty:
    print("\nAlgorithm choices:")
    print(df["algorithm"].value_counts())

    print("\nAverage metrics per algorithm:")
    print(df.groupby("algorithm")[["compression_ratio","comp_time_sec","entropy"]].mean())
# ----------------------------
# 3. Algo Choices Over Time
# ----------------------------
if not df.empty:
    plt.figure(figsize=(10,4))
    plt.plot(df.index, df["algorithm"], marker="o", linestyle="--")
    plt.title("Algorithm Decisions Over Time")
    plt.xlabel("Log Entry")
    plt.ylabel("Algorithm")
    plt.xticks(rotation=45)
    plt.show()
# ----------------------------
# 4. Entropy vs Ratio Scatter
# ----------------------------
if not df.empty:
    plt.figure(figsize=(8,6))
    colors = df["algorithm"].astype("category").cat.codes
    plt.scatter(df["entropy"], df["compression_ratio"], c=colors, cmap="tab10")
    plt.axvline(thresholds["entropy_threshold"], color="red", linestyle="--", label="Entropy cutoff")
    plt.xlabel("Entropy (bits per byte)")
    plt.ylabel("Compression Ratio")
    plt.title("Entropy vs Compression Ratio (colored by algo)")
    plt.legend()
    plt.show()
# ----------------------------
# 5. Compression Time Trend
# ----------------------------
if not df.empty:
    plt.figure(figsize=(10,4))
    plt.plot(df.index, df["comp_time_sec"], label="Compression Time (s)", marker="x")
    plt.plot(df.index, df["decomp_time_sec"], label="Decompression Time (s)", marker="o")
    plt.title("Compression/Decompression Times Over Time")
    plt.xlabel("Log Entry")
    plt.ylabel("Seconds")
    plt.legend()
    plt.show()
# ----------------------------
# 6. Accuracy Check (Rules vs Logs)
# ----------------------------

def smartzip_decision(file_type, entropy, size, thresholds):
    if entropy > 7.5: return "SKIP"
    if "json" in file_type or "text" in file_type:
        return "brotli" if entropy < thresholds["entropy_threshold"] else "zstd"
    if "audio" in file_type or "video" in file_type or "image" in file_type:
        return "lz4" if size > thresholds["size_threshold"] else "zstd"
    return "zstd"

if not df.empty:
    df["predicted_algo"] = df.apply(
        lambda row: smartzip_decision(row["type"], row["entropy"], row["original_size"], thresholds),
        axis=1
    )
    df["match"] = df["algorithm"] == df["predicted_algo"]

    acc = df["match"].mean()*100
    print(f"\n✅ Current rule accuracy vs logs: {acc:.2f}%")

    confusion = pd.crosstab(df["algorithm"], df["predicted_algo"], rownames=["Actual"], colnames=["Predicted"])
    print("\nConfusion Matrix:")
    print(confusion)
# ----------------------------
# 7. Live Auto-Refresh Dashboard
# ----------------------------
import time
from IPython.display import clear_output, display

def live_dashboard(refresh_interval=5, iterations=10):
    """
    Auto-refresh dashboard that updates every N seconds.
    refresh_interval: how many seconds to wait between updates
    iterations: how many times to refresh (set to None for infinite loop)
    """
    for i in range(iterations if iterations else 999999):
        clear_output(wait=True)

        # Reload thresholds
        if os.path.exists(threshold_file):
            with open(threshold_file) as f:
                thresholds = json.load(f)
        else:
            thresholds = {"entropy_threshold": 3.5, "size_threshold": 5_000_000}

        # Reload logs
        if os.path.exists(log_file):
            with open(log_file) as f:
                records = [json.loads(line) for line in f]
            df = pd.DataFrame(records)
        else:
            df = pd.DataFrame()

        print(f"📊 SmartZip Live Dashboard (iteration {i+1})")
        print("📌 Current thresholds:", thresholds)

        if not df.empty:
            # Summary stats
            print("\nAlgorithm counts:", df["algorithm"].value_counts().to_dict())
            print("Latest decision:", df.tail(1).to_dict(orient="records")[0])

            # Algo over time
            plt.figure(figsize=(10,4))
            plt.plot(df.index, df["algorithm"], marker="o", linestyle="--")
            plt.title("Algorithm Decisions Over Time")
            plt.xlabel("Log Entry")
            plt.ylabel("Algorithm")
            plt.xticks(rotation=45)
            plt.show()

            # Entropy vs Ratio
            plt.figure(figsize=(8,6))
            colors = df["algorithm"].astype("category").cat.codes
            plt.scatter(df["entropy"], df["compression_ratio"], c=colors, cmap="tab10")
            plt.axvline(thresholds["entropy_threshold"], color="red", linestyle="--", label="Entropy cutoff")
            plt.xlabel("Entropy")
            plt.ylabel("Compression Ratio")
            plt.title("Entropy vs Compression Ratio (colored by algo)")
            plt.legend()
            plt.show()
        else:
            print("⚠️ No log data available yet.")

        # Wait before next refresh
        time.sleep(refresh_interval)

# Example: refresh 5 times, every 5s
live_dashboard(refresh_interval=5, iterations=5)
