In [3]:
import pickle

state_path = "../state/wiki-consumer-advanced-run1_state.pkl"

with open(state_path, "rb") as f:
    state = pickle.load(f)

print("\n=== CONTENTS OF STATE FILE ===")
for k, v in state.items():
    print(f"{k}: type={type(v)} | length={len(v) if hasattr(v,'__len__') else 'N/A'}")

print("\nDone.")



=== CONTENTS OF STATE FILE ===
edges: type=<class 'set'> | length=103689
nodes: type=<class 'set'> | length=7115
edges_count: type=<class 'int'> | length=N/A
start_time: type=<class 'float'> | length=N/A
metrics_history: type=<class 'list'> | length=31
timestamp: type=<class 'str'> | length=26

Done.


In [1]:
import pickle
import matplotlib.pyplot as plt
import os

# state_path = "../state/wiki-consumer-advanced-run1_state.pkl"
state_path = "../state/wiki-consumer-advanced_state.pkl"
os.makedirs("plots", exist_ok=True)

# -----------------------------
# Load state file
# -----------------------------
with open(state_path, "rb") as f:
    state = pickle.load(f)

metrics = state["metrics_history"]
print(f"[✓] Loaded {len(metrics)} metric snapshots")

# Extract timeseries arrays
edges = [m.get("edges", 0) for m in metrics]
nodes = [m.get("nodes", 0) for m in metrics]
rate = [m.get("rate", 0) for m in metrics]
window_rate = [m.get("window_rate", 0) for m in metrics]
progress = [m.get("progress", 0) for m in metrics]
timestamps = list(range(len(metrics)))   # simple index, since timestamps vary


def save_plot(title):
    file = "plots/" + title.replace(" ", "_").lower() + ".png"
    plt.savefig(file, dpi=200, bbox_inches="tight")
    plt.close()
    print(f"[✓] Saved {file}")


# -----------------------------
# 1. Processing Rate
# -----------------------------
plt.figure(figsize=(10,5))
plt.plot(timestamps, rate)
plt.title("Processing Rate Over Time")
plt.xlabel("Checkpoint")
plt.ylabel("Edges/sec")
plt.grid(True)
save_plot("processing_rate")


# -----------------------------
# 2. Sliding Window Rate
# -----------------------------
plt.figure(figsize=(10,5))
plt.plot(timestamps, window_rate)
plt.title("Sliding Window Rate (10s)")
plt.xlabel("Checkpoint")
plt.ylabel("Edges/sec")
plt.grid(True)
save_plot("sliding_window_rate")


# -----------------------------
# 3. Node Growth
# -----------------------------
plt.figure(figsize=(10,5))
plt.plot(timestamps, nodes)
plt.title("Node Growth Over Time")
plt.xlabel("Checkpoint")
plt.ylabel("Node Count")
plt.grid(True)
save_plot("node_growth")


# -----------------------------
# 4. Edge Growth
# -----------------------------
plt.figure(figsize=(10,5))
plt.plot(timestamps, edges)
plt.title("Edge Growth Over Time")
plt.xlabel("Checkpoint")
plt.ylabel("Edge Count")
plt.grid(True)
save_plot("edge_growth")


# -----------------------------
# 5. Progress Graph
# -----------------------------
plt.figure(figsize=(10,5))
plt.plot(timestamps, progress)
plt.title("Progress Over Time")
plt.xlabel("Checkpoint")
plt.ylabel("Progress (%)")
plt.grid(True)
save_plot("progress")


# -----------------------------
# 6. Rate Histogram
# -----------------------------
plt.figure(figsize=(10,5))
plt.hist(rate, bins=10)
plt.title("Rate Distribution")
plt.xlabel("Edges/sec")
plt.ylabel("Frequency")
plt.grid(True)
save_plot("rate_histogram")


print("\n✅ ALL GRAPHS GENERATED SUCCESSFULLY in /plots folder!\n")


[✓] Loaded 35 metric snapshots
[✓] Saved plots/processing_rate.png
[✓] Saved plots/sliding_window_rate.png
[✓] Saved plots/node_growth.png
[✓] Saved plots/edge_growth.png
[✓] Saved plots/progress.png
[✓] Saved plots/rate_histogram.png

✅ ALL GRAPHS GENERATED SUCCESSFULLY in /plots folder!

