In [None]:
import subprocess
import pandas as pd
import matplotlib.pyplot as plt

# Function to run experiment and extract accuracy & epsilon
def run_experiment(dataset, feature_aware=False, dp_ppr=False, dp_sgd=False,gravity = False,heat = False, topk = 2,sigma_values=0):
    print(f"Running experiment with data_file={dataset}")
    
    command = [
        "python", "main.py",
        f"--data_file={dataset}",
        f"--feature_aware={feature_aware}",
        f"--dp_ppr={dp_ppr}",
        f"--dp_sgd={dp_sgd}",
        f"--sigma_ista={sigma_values}",
        f"--gravity={gravity}",
        f"--heat={heat}"
        f"--topk = {topk}"
    ]
    
    try:
        result = subprocess.run(
            command, capture_output=True, text=True, encoding='utf-8', check=True
        )
    except subprocess.CalledProcessError as e:
        print(f"Error running the command: {e}")
        print(f"Standard Output:\n{e.stdout}")
        print(f"Standard Error:\n{e.stderr}")
        return None, None

    output = result.stdout
    error_output = result.stderr

    print(output)
    
    accuracy, epsilon = None, None
    for line in output.splitlines()[-10:]:  # Look at last 10 lines
        if "Testing accuracy" in line:
            try:
                accuracy = float(line.split(":")[1].strip())
            except ValueError:
                accuracy = None
        if "Epsilon" in line:
            try:
                epsilon = float(line.split(":")[1].strip())
            except ValueError:
                epsilon = None
    
    return accuracy, epsilon

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Settings
datasets = ["data/cora_ml"]
dp_ppr_setting = [False, True, False, True, False, True, False, True]
dp_sgd_setting = [False, False, True, True, False, False, True, True]
feature_aware_setting = [False, False, False, False, True, True, True, True]
sigma_values = [0.15082, 0.08104, 0.05619, 0.04326, 0.03526, 0.02983, 0.02590]
num_runs = 5  # Run each experiment 5 times
topk_values = range(2, 9)  # Running experiments for topk from 2 to 8

# Collect results
results = []
for dataset in datasets:
    for i in range(len(dp_ppr_setting)):
        for topk in topk_values:  # Loop over topk values
            if dp_ppr_setting[i]:  # If DP-PPR is True, run for all sigma values
                for sigma in sigma_values:
                    acc_vals, eps_vals = [], []
                    for _ in range(num_runs):
                        accuracy, epsilon = run_experiment(
                            dataset, dp_ppr=True, dp_sgd=dp_sgd_setting[i], 
                            feature_aware=feature_aware_setting[i], sigma_values=sigma, topk=topk
                        )
                        acc_vals.append(accuracy)
                        eps_vals.append(epsilon)
                    
                    results.append((dataset, True, dp_sgd_setting[i], feature_aware_setting[i], 
                                    sigma, topk,
                                    sum(acc_vals) / num_runs,  # Mean Accuracy
                                    sum(eps_vals) / num_runs,  # Mean Epsilon
                                    (sum((x - (sum(acc_vals) / num_runs))**2 for x in acc_vals) / num_runs) ** 0.5,  # Std Dev Accuracy
                                    (sum((x - (sum(eps_vals) / num_runs))**2 for x in eps_vals) / num_runs) ** 0.5   # Std Dev Epsilon
                    ))
            else:  # Run normally without sigma
                acc_vals, eps_vals = [], []
                for _ in range(num_runs):
                    accuracy, epsilon = run_experiment(
                        dataset, dp_ppr=False, dp_sgd=dp_sgd_setting[i], 
                        feature_aware=feature_aware_setting[i], topk=topk
                    )
                    acc_vals.append(accuracy)
                    eps_vals.append(epsilon)

                results.append((dataset, False, dp_sgd_setting[i], feature_aware_setting[i], 
                                None, topk,
                                sum(acc_vals) / num_runs,  # Mean Accuracy
                                sum(eps_vals) / num_runs,  # Mean Epsilon
                                (sum((x - (sum(acc_vals) / num_runs))**2 for x in acc_vals) / num_runs) ** 0.5,  # Std Dev Accuracy
                                (sum((x - (sum(eps_vals) / num_runs))**2 for x in eps_vals) / num_runs) ** 0.5   
                ))

# Convert to DataFrame
df = pd.DataFrame(results, columns=["Dataset", "DP_PPR", "DP_SGD", "Feature_Aware", "Sigma", "TopK", 
                                    "Accuracy", "Epsilon", "Accuracy_Std", "Epsilon_Std"])

# Print DataFrame
print(df)

# Plot results grouped by dataset with standard deviation error bars
plt.figure(figsize=(10, 6))
for dataset, grp in df.groupby("Dataset"):
    plt.errorbar(grp.index, grp["Accuracy"], yerr=grp["Accuracy_Std"], fmt="o-", label=dataset, capsize=5)

plt.xlabel("Experiment Index")
plt.ylabel("Accuracy")
plt.title("Experiment Results Across Datasets with Standard Deviation")
plt.legend(title="Dataset")
plt.grid(True)
plt.show()


In [None]:
df
df.to_csv("experiment_results_feature_aware_20ite_r.csv", index=False)

In [None]:
cora_df = df[df["Dataset"] == "data/cora_ml"]
cora_df

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(range(16), cora_df[cora_df["Feature_Aware"] == False]["Accuracy"], marker="o", linestyle="-", label='No Feature Aware')
plt.plot(range(16), cora_df[cora_df["Feature_Aware"] == True]["Accuracy"], marker="o", linestyle="-", label='Feature Aware')
plt.xlabel("Experiment Index")
plt.ylabel("Accuracy")
plt.title("Experiment Results Across Cora")
plt.legend(title="Dataset")
plt.grid(True)
plt.show() 

In [None]:
df