In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def load_recall_matrix(base_output_folder):
    """
    Loads the recall values from each run folder under base_output_folder
    and returns a 2D NumPy array of shape (n_runs, n_snapshots).

    Parameters:
    - base_output_folder (str): Path to the folder containing run_* subfolders.

    Returns:
    - np.ndarray: A 2D array with shape (n_runs, n_snapshots)
    """
    run_folders = sorted([
        f for f in os.listdir(base_output_folder)
        if os.path.isdir(os.path.join(base_output_folder, f)) and f.startswith("run_")
    ])

    recall_matrix = []

    for run_folder in run_folders:
        path = os.path.join(base_output_folder, run_folder, "recall_values.txt")
        recalls = []
        with open(path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) == 2:
                    _, pr = parts
                    recalls.append(float(pr))
        recall_matrix.append(recalls)

    return np.array(recall_matrix)

In [None]:
def run_analysis_over_c(base_dir, c_values, analysis_fn):
    """
    Runs a given analysis function over multiple values of c.

    Parameters:
    - base_dir (str): Path to the folder containing 'c{value}' subfolders.
    - c_values (list of int): Values of c to iterate over.
    - analysis_fn (function): A function that takes a recall matrix as input and prints or returns analysis results.
    """
    for c in c_values:
        print(f"\n=== Results for c = {c} ===")
        folder = os.path.join(base_dir, f"c{c}")
        recall_matrix = load_recall_matrix(folder)
        analysis_fn(recall_matrix)


In [None]:
def analyze_recall_matrix(recall_matrix):
    """
    Analyzes the recall matrix by computing:
    - Average recall across snapshots for each run
    - Mean, sample standard deviation, and sample variance of these averages across runs

    Parameters:
    - recall_matrix (np.ndarray): A 2D array of shape (n_runs, n_snapshots)

    Prints:
    - Mean of average recalls across runs
    - Sample standard deviation of average recalls across runs
    - Sample variance of average recalls across runs
    """
    per_run_avg = recall_matrix.mean(axis=1)
    mean_of_avgs = per_run_avg.mean()
    # We are normalizing by (n-1) instead of n. Is this what we should do in our case?
    std_of_avgs = per_run_avg.std(ddof=1)
    var_of_avgs = per_run_avg.var(ddof=1)

    print(f"Mean of per-run average recalls: {mean_of_avgs:.6f}")
    print(f"Sample standard deviation:            {std_of_avgs:.6f}")
    print(f"Sample variance:                      {var_of_avgs:.6f}")

In [None]:
def analyze_recall_matrix_by_snapshot(recall_matrix):
    """
    Analyzes the recall matrix snapshot-wise:
    - For each snapshot, computes mean, sample std, and sample var across runs
    - Averages those statistics across all snapshots

    Parameters:
    - recall_matrix (np.ndarray): Shape (n_runs, n_snapshots)

    Prints:
    - Average mean recall across snapshots
    - Average sample std across snapshots
    - Average sample var across snapshots
    """
    snapshot_means = recall_matrix.mean(axis=0)
    snapshot_stds = recall_matrix.std(axis=0, ddof=1)
    snapshot_vars = recall_matrix.var(axis=0, ddof=1)

    mean_of_snapshot_means = snapshot_means.mean()
    mean_of_snapshot_stds = snapshot_stds.mean()
    mean_of_snapshot_vars = snapshot_vars.mean()

    print(f"Mean of snapshot means:             {mean_of_snapshot_means:.6f}")
    print(f"Mean of snapshot sample stds:       {mean_of_snapshot_stds:.6f}")
    print(f"Mean of snapshot sample variances:  {mean_of_snapshot_vars:.6f}")


In [None]:
def plot_snapshot_recall_from_folder(folder_path, label=None, color=None, marker='o'):
    """
    Loads recall matrix from a given folder and plots mean ± std with markers at each snapshot.

    Parameters:
    - folder_path (str): Path to folder (e.g., .../c1)
    - label (str): Label for plot legend
    - color (str): Optional matplotlib color
    - marker (str): Marker style for data points (e.g., 'o', '^', 's', 'x')
    """
    recall_matrix = load_recall_matrix(folder_path)

    snapshot_means = recall_matrix.mean(axis=0)
    snapshot_stds = recall_matrix.std(axis=0, ddof=1)
    snapshots = np.arange(len(snapshot_means))

    # Line + markers
    plt.plot(snapshots, snapshot_means, label=label, color=color, marker=marker, markersize=4, linewidth=1.5)
    
    # Shaded region = mean ± std
    plt.fill_between(snapshots,
                     snapshot_means - snapshot_stds,
                     snapshot_means + snapshot_stds,
                     alpha=0.3,
                     color=color)

    plt.xlabel("Snapshot Index")
    plt.ylabel("recall @ n̄")
    plt.title("Per-Snapshot recall with Std Dev")
    plt.grid(True)

# Average over runs first


In [None]:
base_dir = "/home/nikolabulat/Snapshot_Update/Tonic/preliminary_analysis/outputs/as_caida_122_uss_50_trials_cpp"
run_analysis_over_c(base_dir, [1, 2, 3], analyze_recall_matrix)

In [None]:
base_dir = "/home/nikolabulat/Snapshot_Update/Tonic/preliminary_analysis/outputs/as_733_uss_50_trials_cpp"
run_analysis_over_c(base_dir, [1, 2, 3], analyze_recall_matrix)

# Average over snapshots first (fine-grained)

In [None]:
base_dir = "/home/nikolabulat/Snapshot_Update/Tonic/preliminary_analysis/outputs/as_caida_122_uss_50_trials_cpp"
run_analysis_over_c(base_dir, [1, 2, 3], analyze_recall_matrix_by_snapshot)

In [None]:
base_dir = "/home/nikolabulat/Snapshot_Update/Tonic/preliminary_analysis/outputs/as_733_uss_50_trials_cpp"
run_analysis_over_c(base_dir, [1, 2, 3], analyze_recall_matrix_by_snapshot)

# Plotting the fine-grained analysis

In [None]:
base_dir = "/home/nikolabulat/Snapshot_Update/Tonic/preliminary_analysis/outputs/as_caida_122_uss_50_trials_cpp"
colors = ['blue', 'green', 'orange']
markers = ['o', '^', 's']

plt.figure(figsize=(10, 5))
for c, color, marker in zip([1, 2, 3], colors, markers):
    folder = os.path.join(base_dir, f"c{c}")
    plot_snapshot_recall_from_folder(folder, label=f"c = {c}", color=color, marker=marker)

plt.legend()
plt.tight_layout()
plt.show()

In [None]:
base_dir = "/home/nikolabulat/Snapshot_Update/Tonic/preliminary_analysis/outputs/as_733_uss_50_trials_cpp"
colors = ['blue', 'green', 'orange']
markers = ['o', '^', 's']

plt.figure(figsize=(10, 5))
for c, color, marker in zip([1, 2, 3], colors, markers):
    folder = os.path.join(base_dir, f"c{c}")
    plot_snapshot_recall_from_folder(folder, label=f"c = {c}", color=color, marker=marker)

plt.legend()
plt.tight_layout()
plt.show()